def test_parse_metadata_extra_commas(self): """ Tests parsing meta data when extra commas are in the file Testing the parsing meta data with actual files, instead of mocked files :return: """ sheet_file = path.join(path_to_module, "testValidSheetTrailingCommas", "SampleSheet.csv") meta_data = sample_parser.parse_metadata(sheet_file) correct_metadata = { "readLengths": "301", "assay": "TruSeq HT", "description": "252", "application": "FASTQ Only", "investigatorName": "Investigator", "adapter": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA", "adapterread2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT", "workflow": "GenerateFASTQ", "reversecomplement": "0", "iemfileversion": "4", "date": "2015-11-12", "experimentName": "252", "chemistry": "Amplicon", "layoutType": "PAIRED_END" } self.assertEqual(correct_metadata, meta_data)
def test_build_valid(self): """ When given a valid directory, ensure a valid SequencingRun is built with Projects, Samples, ect :return: """ sheet_file = path.join(path_to_module, "fake_ngs_data", "SampleSheet.csv") meta_data = sample_parser.parse_metadata(sheet_file) sequencing_run = sample_parser.build_sequencing_run_from_samples( sheet_file, meta_data) # Returns a SequencingRun self.assertEqual(type(sequencing_run), model.SequencingRun) # Includes a single project self.assertEqual(len(sequencing_run.project_list), 1) # is of type Project self.assertEqual(type(sequencing_run.project_list[0]), model.Project) # Project has 3 samples self.assertEqual(len(sequencing_run.project_list[0].sample_list), 3) # samples are of type Sample self.assertEqual(type(sequencing_run.project_list[0].sample_list[0]), model.Sample) # samples have SequenceFile self.assertEqual( type(sequencing_run.project_list[0].sample_list[0].sequence_file), model.SequenceFile)
def test_parse_metadata(self): """ Testing the parsing meta data with actual files, instead of mocked files :return: """ sheet_file = path.join(path_to_module, "fake_ngs_data", "SampleSheet.csv") meta_data = sample_parser.parse_metadata(sheet_file) correct_metadata = { "readLengths": "251", "assay": "Nextera XT", "description": "Superbug", "application": "FASTQ Only", "investigatorName": "Some Guy", "adapter": "AAAAGGGGAAAAGGGGAAA", "workflow": "GenerateFASTQ", "reversecomplement": "0", "iemfileversion": "4", "date": "10/15/2013", "experimentName": "1", "chemistry": "Amplicon", "layoutType": "PAIRED_END" } self.assertEqual(correct_metadata, meta_data)
def test_parse_metadata_paired_valid(self, mock_csv_reader): """ When given a valid directory, ensure valid metadata is built paired end reads :return: """ h_field_values = ("IEMFileVersion,4\n" + "Investigator Name,Test Name\n" + "Experiment Name,Some_Test_Data\n" + "Date,2015-05-14\n" + "Workflow,GenerateFASTQ\n" + "Application,FASTQ Only\n" + "Assay,ASDF\n" + "Description,12-34\n" + "Chemistry,Yes\n") reads = ("251\n" + "251\n") d_headers = ("Sample_ID,Sample_Name,Sample_Plate,Sample_Well," + "I7_Index_ID,index,I5_Index_ID,index2,Sample_Project," + "Description") d_field_values = ( "15-0318,,2015-08-05-SE,A01,N701,TAAGGCGA,S502,CTCTCTAT,203\n" + "15-0455,,2015-08-05-SE,B01,N701,TAAGGCGA,S503,TATCCTCT,203\n" + "15-0462,,2015-08-05-SE,C01,N701,TAAGGCGA,S505,GTAAGGAG,203\n") file_contents_str = ("[Header]\n" + "{h_field_values}\n" + "[Reads]\n" + "{reads}\n" + "[Data]\n" + "{d_headers}\n" + "{d_field_values}").format( h_field_values=h_field_values, reads=reads, d_headers=d_headers, d_field_values=d_field_values) # converts string as a pseudo file / memory file sample_sheet_file = StringIO(file_contents_str) # the call to get_csv_reader() inside parse_samples() will return # items inside side_effect mock_csv_reader.side_effect = [reader(sample_sheet_file)] metadata = sample_parser.parse_metadata(None) # The meta data we care about the most self.assertEqual(metadata['readLengths'], "251") self.assertEqual(metadata['layoutType'], "PAIRED_END") # Other meta data should also be here self.assertEqual(metadata['iemfileversion'], "4") self.assertEqual(metadata['investigatorName'], "Test Name") self.assertEqual(metadata['experimentName'], "Some_Test_Data") self.assertEqual(metadata['date'], "2015-05-14") self.assertEqual(metadata['workflow'], "GenerateFASTQ") self.assertEqual(metadata['application'], "FASTQ Only") self.assertEqual(metadata['assay'], "ASDF") self.assertEqual(metadata['description'], "12-34") self.assertEqual(metadata['chemistry'], "Yes")