def test_space_in_sample_name(self): directory = path.join(path_to_module, "ngs_space_in_sample_name") data_dir = path.join( directory, parsers.miseq.Parser.get_relative_data_directory()) file_list = parsers.common.get_file_list(data_dir) file_path = path.join(directory, "SampleSheet.csv") # Just making sure this doesn't thow an error sample_parser.parse_sample_list(sample_sheet_file=file_path, run_data_directory=data_dir, run_data_directory_file_list=file_list)
def test_valid(self): """ Ensure a a parsed valid directory matches the expected sample list :return: """ directory = path.join(path_to_module, "fake_ngs_data") sheet_file = path.join(directory, "SampleSheet.csv") data_dir = path.join( directory, parsers.miseq.Parser.get_relative_data_directory()) file_list = parsers.common.get_file_list(data_dir) sample = model.Sample( "01-1111", "Super bug", 1, { "Sample_Well": "01", "index": "AAAAAAAA", "Sample_Plate": "1", "I7_Index_ID": "N01", "sample_project": "6", "sequencer_sample_name": "01-1111", "I5_Index_ID": "S01", "index2": "TTTTTTTT", }) sequence_file_properties = { 'Sample_Plate': '1', 'Sample_Well': '01', 'I7_Index_ID': 'N01', 'index': 'AAAAAAAA', 'I5_Index_ID': 'S01', 'index2': 'TTTTTTTT' } file_path_1 = path.join(path_to_module, "fake_ngs_data", "Data", "Intensities", "BaseCalls", "01-1111_S1_L001_R1_001.fastq.gz") file_path_2 = path.join(path_to_module, "fake_ngs_data", "Data", "Intensities", "BaseCalls", "01-1111_S1_L001_R2_001.fastq.gz") raw_file_list = [file_path_1, file_path_2] res = sample_parser.parse_sample_list( sample_sheet_file=sheet_file, run_data_directory=data_dir, run_data_directory_file_list=file_list) # Check sample is the same self.assertEqual(res[0].get_uploadable_dict(), sample.get_uploadable_dict()) # Check sequencing file is correct self.assertEqual(res[0].sequence_file.properties_dict, sequence_file_properties) self.assertEqual(res[0].sequence_file.file_list.sort(), raw_file_list.sort())
def test_not_valid_pf_list(self): """ The file list in the sample sheet is invalid :return: """ directory = path.join(path_to_module, "ngs_not_valid_pf_list") data_dir = path.join( directory, parsers.miseq.Parser.get_relative_data_directory()) file_list = parsers.common.get_file_list(data_dir) file_path = path.join(directory, "SampleSheet.csv") with self.assertRaises(SequenceFileError): res = sample_parser.parse_sample_list( sample_sheet_file=file_path, run_data_directory=data_dir, run_data_directory_file_list=file_list)
def test_not_pf_list(self): """ No Valid files were found with names given in sample sheet :return: """ directory = path.join(path_to_module, "ngs_not_pf_list") data_dir = path.join( directory, parsers.miseq.Parser.get_relative_data_directory()) file_list = parsers.common.get_file_list(data_dir) file_path = path.join(directory, "SampleSheet.csv") with self.assertRaises(SequenceFileError): res = sample_parser.parse_sample_list( sample_sheet_file=file_path, run_data_directory=data_dir, run_data_directory_file_list=file_list)
def get_sequencing_run(sample_sheet, run_data_directory=None, run_data_directory_file_list=None): """ Does local validation on the integrety of the run directory / sample sheet Throws a ValidationError with a valadation result attached if it cannot make a sequencing run :param sample_sheet: Sample Sheet File :param run_data_directory: Optional: Directory (including run directory) to data files. Can be provided for bypassing os calls when developing on cloud systems :param run_data_directory_file_list: Optional: List of files in data directory. Can be provided for bypassing os calls when developing on cloud systems :return: SequencingRun """ # get data directory and file list validation_result = model.ValidationResult() try: if run_data_directory is None: run_data_directory = Parser.get_full_data_directory( sample_sheet) if run_data_directory_file_list is None: run_data_directory_file_list = common.get_file_list( run_data_directory) except exceptions.DirectoryError as error: validation_result.add_error(error) logging.error("Errors occurred while parsing files") raise exceptions.ValidationError( "Errors occurred while parsing files", validation_result) # Try to get the sample sheet, validate that the sample sheet is valid validation_result = validation.validate_sample_sheet(sample_sheet) if not validation_result.is_valid(): logging.error("Errors occurred while getting sample sheet") raise exceptions.ValidationError( "Errors occurred while getting sample sheet", validation_result) # Try to parse the meta data from the sample sheet, throw validation error if errors occur validation_result = model.ValidationResult() try: run_metadata = sample_parser.parse_metadata(sample_sheet) except exceptions.SampleSheetError as error: validation_result.add_error(error) logging.error("Errors occurred while parsing metadata") raise exceptions.ValidationError( "Errors occurred while parsing metadata", validation_result) # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur try: sample_list = sample_parser.parse_sample_list( sample_sheet, run_data_directory, run_data_directory_file_list) sequencing_run = common.build_sequencing_run_from_samples( sample_list, run_metadata) except exceptions.SequenceFileError as error: validation_result.add_error(error) logging.error( "Errors occurred while building sequence run from sample sheet" ) raise exceptions.ValidationError( "Errors occurred while building sequence run from sample sheet", validation_result) return sequencing_run