def test_build_valid_with_description_field(self): """ When given a valid directory, ensure a valid SequencingRun is built with Projects, Samples, ect :return: """ directory = path.join(path_to_module, "iseq_with_desc_field") sheet_file = path.join(directory, "SampleSheet.csv") meta_data = sample_parser.parse_metadata(sheet_file) data_dir = path.join(directory, parsers.miniseq.Parser.get_relative_data_directory()) data_dir = data_dir.replace("*", "some_dir") file_list = parsers.common.get_file_list(data_dir) sample_list = sample_parser.parse_sample_list(sample_sheet_file=sheet_file, run_data_directory=data_dir, run_data_directory_file_list=file_list) sequencing_run = parsers.common.build_sequencing_run_from_samples(sample_list, meta_data) # Returns a SequencingRun self.assertEqual(type(sequencing_run), model.SequencingRun) # Includes a single project self.assertEqual(len(sequencing_run.project_list), 1) # is of type Project self.assertEqual(type(sequencing_run.project_list[0]), model.Project) # Project has 3 samples self.assertEqual(len(sequencing_run.project_list[0].sample_list), 3) # samples are of type Sample self.assertEqual(type(sequencing_run.project_list[0].sample_list[0]), model.Sample) # samples have correct description self.assertEqual(sequencing_run.project_list[0].sample_list[0].description, "desc1") # samples have SequenceFile self.assertEqual(type(sequencing_run.project_list[0].sample_list[0].sequence_file), model.SequenceFile)
def test_parse_metadata_paired_valid(self, mock_csv_reader): """ When given a valid directory, ensure valid metadata is built paired end reads :return: """ h_field_values = ( "Local Run Manager Analysis Id,4004\n" "Experiment Name,Some_Test_Data\n" "Date,2015-05-14\n" "Workflow,GenerateFastQWorkflow\n" "Description,12-34\n" "Chemistry,Yes\n" ) reads = ( "151\n" "151\n" ) d_headers = ("Sample_ID,Sample_Name," "I7_Index_ID,index,I5_Index_ID,index2,Sample_Project") d_field_values = ("15-0318-4004,15-0318,N701,TAAGGCGA,S502,CTCTCTAT,203\n" "15-0455-4004,15-0455,N701,TAAGGCGA,S503,TATCCTCT,203\n" "15-0462-4004,15-0462,N701,TAAGGCGA,S505,GTAAGGAG,203\n") file_contents_str = ( "[Header]\n" "{h_field_values}\n" "[Reads]\n" "{reads}\n" "[Data]\n" "{d_headers}\n" "{d_field_values}" ).format(h_field_values=h_field_values, reads=reads, d_headers=d_headers, d_field_values=d_field_values) # converts string as a pseudo file / memory file sample_sheet_file = StringIO(file_contents_str) # the call to get_csv_reader() inside parse_samples() will return # items inside side_effect mock_csv_reader.side_effect = [reader(sample_sheet_file)] metadata = sample_parser.parse_metadata(None) # The meta data we care about the most self.assertEqual(metadata['readLengths'], "151") self.assertEqual(metadata['layoutType'], "PAIRED_END") # Other meta data should also be here self.assertEqual(metadata['localrunmanager'], "4004") self.assertEqual(metadata['experimentName'], "Some_Test_Data") self.assertEqual(metadata['date'], "2015-05-14") self.assertEqual(metadata['workflow'], "GenerateFastQWorkflow") self.assertEqual(metadata['description'], "12-34") self.assertEqual(metadata['chemistry'], "Yes")
def test_parse_metadata(self): """ Testing the parsing meta data with actual files, instead of mocked files :return: """ sheet_file = path.join(path_to_module, "fake_ngs_data", "SampleSheet.csv") meta_data = sample_parser.parse_metadata(sheet_file) correct_metadata = {"readLengths": "151", "workflow": "GenerateFastQWorkflow", "localrunmanager": "4004", "date": "10/15/2013", "chemistry": "Amplicon", "description": "Superbug", "experimentName": '1', "layoutType": "PAIRED_END"} self.assertEqual(correct_metadata, meta_data)
def get_sequencing_run(sample_sheet, run_data_directory=None, run_data_directory_file_list=None): """ Does local validation on the integrity of the run directory / sample sheet Throws a ValidationError with a validation result attached if it cannot make a sequencing run :param sample_sheet: Sample Sheet File :param run_data_directory: Optional: Directory (including run directory) to data files. Can be provided for bypassing os calls when developing on cloud systems :param run_data_directory_file_list: Optional: List of files in data directory. Can be provided for bypassing os calls when developing on cloud systems :return: SequencingRun """ # get data directory and file list validation_result = model.ValidationResult() try: if run_data_directory is None: run_data_directory = Parser.get_full_data_directory( sample_sheet) if run_data_directory_file_list is None: run_data_directory_file_list = common.get_file_list( run_data_directory) except exceptions.DirectoryError as error: validation_result.add_error(error) logging.error("Errors occurred while parsing files") raise exceptions.ValidationError( "Errors occurred while parsing files", validation_result) # Try to get the sample sheet, validate that the sample sheet is valid validation_result = validation.validate_sample_sheet(sample_sheet) if not validation_result.is_valid(): logging.error("Errors occurred while getting sample sheet") raise exceptions.ValidationError( "Errors occurred while getting sample sheet", validation_result) # Try to parse the meta data from the sample sheet, throw validation error if errors occur validation_result = model.ValidationResult() try: run_metadata = sample_parser.parse_metadata(sample_sheet) except exceptions.SampleSheetError as error: validation_result.add_error(error) logging.error("Errors occurred while parsing metadata") raise exceptions.ValidationError( "Errors occurred while parsing metadata", validation_result) # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur try: sample_list = sample_parser.parse_sample_list( sample_sheet, run_data_directory, run_data_directory_file_list) sequencing_run = common.build_sequencing_run_from_samples( sample_list, run_metadata) except exceptions.SequenceFileError as error: validation_result.add_error(error) logging.error( "Errors occurred while building sequence run from sample sheet" ) raise exceptions.ValidationError( "Errors occurred while building sequence run from sample sheet", validation_result) return sequencing_run