def test_combinatorial_metadata_parsing(self): """ Tests parsing of combinatorial assay metadata from an Experiment Description file, e.g. as used in the Skyline workflow for proteomics. """ ed_file = factory.test_file_path( "experiment_description/advanced.xlsx") study = factory.StudyFactory() # creating *AFTER* setup of testing database records cache = ExperimentDescriptionContext() importer = CombinatorialCreationImporter(study, self.testuser, cache) parser = ExperimentDescFileParser(cache, importer) parsed: List[CombinatorialDescriptionInput] = parser.parse_excel( ed_file) combo_input: CombinatorialDescriptionInput = parsed[0] time_meta_type = models.MetadataType.system("Time") self.assertDictEqual( combo_input.protocol_to_combinatorial_meta_dict, { self.targeted_proteomics.pk: { time_meta_type.pk: [8.0, 24.0] }, self.metabolomics.pk: { time_meta_type.pk: [4.0, 6.0] }, }, )
def test_combinatorial_assay_creation(self): """ Tests combinatorial assay creation, e.g. from combinatorial time values in the Skyline workflow. """ study = models.Study.objects.create(name="Test") file = factory.test_file_path( "experiment_description/combinatorial_assays.xlsx") # creating *AFTER* setup of testing database records cache = ExperimentDescriptionContext() importer = CombinatorialCreationImporter(study, self.testuser, cache) with open(file, "rb") as fh: importer.do_import( fh, ExperimentDescriptionOptions(), filename="Fake Excel file", file_extension="xlsx", ) # verify results line = models.Line.objects.get(study_id=study.pk) self.assertEqual(line.assay_set.count(), 2) times = { assay.metadata_get(cache.assay_time_mtype) for assay in models.Assay.objects.filter( line=line, protocol=self.targeted_proteomics) } self.assertEqual(times, {8, 24})
def test_advanced_experiment_description_xlsx(self): strain, _ = models.Strain.objects.get_or_create(name="JW0111") # creating *AFTER* setup of testing database records cache = ExperimentDescriptionContext() cache.strains_by_pk = {strain.pk: strain} advanced_experiment_def_xlsx = factory.test_file_path( "experiment_description/advanced.xlsx") study = factory.StudyFactory() importer = CombinatorialCreationImporter(study, self.testuser, cache) parser = ExperimentDescFileParser(cache, importer) parsed = parser.parse_excel(advanced_experiment_def_xlsx) self.assertEqual(len(parsed), 1, "Expected a single set of parsed input") # TODO: following two calls should go away self._map_input_strains(parsed, cache, {"JBx_002078": strain}) importer._query_related_object_context(parsed) result = parsed[0].populate_study(study, importer.cache, ExperimentDescriptionOptions()) self.assertFalse(importer.errors, "Import generated errors") self.assertFalse(importer.warnings, "Import generated warnings") for line in result.lines_created: self.assertEqual(line.description, "Description blah blah") self.assertEqual(line.assay_set.count(), 4)
def test_ed_file_parse_err_detection(self): """" Tests for Experiment Description file errors that can be caught during parsing. Error detection includes catching column headers that don't match any of: 1) Line attributes supported by the parser (e.g. line name) 2) Line metadata defined in the database 3) Protocol + assay metadata defined in the database Also tests duplicate column header detection for each type of column definition implied by 1-3 above. """ # parameters don't matter for test, but need to be there importer = CombinatorialCreationImporter(None, None) file_path = factory.test_file_path( "experiment_description/parse_errors.xlsx") # creating *AFTER* setup of testing database records cache = ExperimentDescriptionContext() parser = ExperimentDescFileParser(cache, importer) parser.parse_excel(file_path) # expect to find these errors exp_errors = [ ( constants.BAD_FILE_CATEGORY, constants.INVALID_COLUMN_HEADER, '"T3mperature" (col B)', ), ( constants.BAD_FILE_CATEGORY, constants.DUPLICATE_LINE_METADATA, '"Media" (col G)', ), ( constants.BAD_FILE_CATEGORY, constants.UNMATCHED_ASSAY_COL_HEADERS_KEY, '"Tomperature" (col H)', ), ( constants.BAD_FILE_CATEGORY, constants.DUPLICATE_LINE_ATTR, '"Line Name" (col I)', ), ( constants.BAD_FILE_CATEGORY, constants.DUPLICATE_LINE_ATTR, '"Replicate Count" (col J)', ), ( constants.BAD_FILE_CATEGORY, constants.DUPLICATE_ASSAY_METADATA, '"Targeted Proteomics Time" (col K)', ), ( constants.BAD_FILE_CATEGORY, constants.DUPLICATE_LINE_METADATA, '"Strain(s)" (col L)', ), ( constants.BAD_GENERIC_INPUT_CATEGORY, constants.INVALID_REPLICATE_COUNT, '"X" (D2)', ), ( constants.INVALID_FILE_VALUE_CATEGORY, constants.MISSING_REQUIRED_LINE_NAME, "A3", ), ( constants.INVALID_FILE_VALUE_CATEGORY, constants.INCORRECT_TIME_FORMAT, '"A" (F4)', ), ( constants.INVALID_FILE_VALUE_CATEGORY, constants.DUPLICATE_LINE_NAME_LITERAL, '"181-aceF" (A2, A4)', ), ] for category, heading, detail in exp_errors: # make sure the category shows up assert category in importer.errors # make sure the heading shows up category_group = importer.errors.get(category) assert heading in category_group # make sure the heading has the right details assert detail in category_group.get(heading)._occurrence_details