def test_assay_reader_small2_assay(small2_investigation_file, small2_assay_file): """Use ``AssayReader`` to read in small assay file.""" # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream( small2_investigation_file).read() InvestigationValidator(investigation).validate() # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", small2_assay_file) assert 14 == len(reader.header) # Read assay assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check results assert os.path.normpath(str(assay.file)).endswith( os.path.normpath("data/i_small2/a_small2.txt")) assert 14 == len(assay.header) assert 25 == len(assay.materials) assert 41 == len(assay.processes) assert 74 == len(assay.arcs) # Comments expected = models.Comment(name="Replicate", value="B") assert assay.materials["S1-A1-0815-T1-Pro1-B-115-COL5"].comments[ 0] == expected # Expected arcs expected = ( models.Arc("S1-sample-0815-N1", "S1-A1-extraction-2-1"), models.Arc("S1-sample-0815-T1", "S1-A1-extraction-2-2"), models.Arc("S1-A1-extraction-2-1", "S1-A1-0815-N1-Pro1-COL3"), models.Arc("S1-A1-extraction-2-2", "S1-A1-0815-T1-Pro1-COL3"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-1"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-2"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-3"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-4"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-5"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-6"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-7"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-8"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-9"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-10"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-11"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-12"), models.Arc("S1-A1-labeling-4-1", "S1-A1-0815-N1-Pro1-A-114-COL5"), models.Arc("S1-A1-labeling-4-2", "S1-A1-0815-T1-Pro1-A-115-COL5"), models.Arc("S1-A1-labeling-4-3", "S1-A1-0815-N1-Pro1-B-114-COL5"), models.Arc("S1-A1-labeling-4-4", "S1-A1-0815-T1-Pro1-B-115-COL5"), models.Arc("S1-A1-labeling-4-5", "S1-A1-0815-N1-Pro1-C-114-COL5"), models.Arc("S1-A1-labeling-4-6", "S1-A1-0815-T1-Pro1-C-115-COL5"), models.Arc("S1-A1-labeling-4-7", "S1-A1-0815-N1-Pro1-D-114-COL5"), models.Arc("S1-A1-labeling-4-8", "S1-A1-0815-T1-Pro1-D-115-COL5"), models.Arc("S1-A1-labeling-4-9", "S1-A1-0815-N1-Pro1-E-114-COL5"), models.Arc("S1-A1-labeling-4-10", "S1-A1-0815-T1-Pro1-E-115-COL5"), models.Arc("S1-A1-labeling-4-11", "S1-A1-0815-N1-Pro1-F-114-COL5"), models.Arc("S1-A1-labeling-4-12", "S1-A1-0815-T1-Pro1-F-115-COL5"), models.Arc("S1-A1-0815-N1-Pro1-A-114-COL5", "S1-A1-chromatography-8-1"), models.Arc("S1-A1-0815-T1-Pro1-A-115-COL5", "S1-A1-chromatography-8-2"), models.Arc("S1-A1-0815-N1-Pro1-B-114-COL5", "S1-A1-chromatography-8-3"), models.Arc("S1-A1-0815-T1-Pro1-B-115-COL5", "S1-A1-chromatography-8-4"), models.Arc("S1-A1-0815-N1-Pro1-C-114-COL5", "S1-A1-chromatography-8-5"), models.Arc("S1-A1-0815-T1-Pro1-C-115-COL5", "S1-A1-chromatography-8-6"), models.Arc("S1-A1-0815-N1-Pro1-D-114-COL5", "S1-A1-chromatography-8-7"), models.Arc("S1-A1-0815-T1-Pro1-D-115-COL5", "S1-A1-chromatography-8-8"), models.Arc("S1-A1-0815-N1-Pro1-E-114-COL5", "S1-A1-chromatography-8-9"), models.Arc("S1-A1-0815-T1-Pro1-E-115-COL5", "S1-A1-chromatography-8-10"), models.Arc("S1-A1-0815-N1-Pro1-F-114-COL5", "S1-A1-chromatography-8-11"), models.Arc("S1-A1-0815-T1-Pro1-F-115-COL5", "S1-A1-chromatography-8-12"), models.Arc("S1-A1-chromatography-8-1", "S1-A1-poolA-10"), models.Arc("S1-A1-chromatography-8-2", "S1-A1-poolA-10"), models.Arc("S1-A1-chromatography-8-3", "S1-A1-mass spectrometry-9-3"), models.Arc("S1-A1-chromatography-8-4", "S1-A1-mass spectrometry-9-4"), models.Arc("S1-A1-chromatography-8-5", "S1-A1-poolC-10"), models.Arc("S1-A1-chromatography-8-6", "S1-A1-poolC-10"), models.Arc("S1-A1-chromatography-8-7", "S1-A1-mass spectrometry-9-7"), models.Arc("S1-A1-chromatography-8-8", "S1-A1-mass spectrometry-9-8"), models.Arc("S1-A1-chromatography-8-9", "S1-A1-poolE-10"), models.Arc("S1-A1-chromatography-8-10", "S1-A1-poolE-10"), models.Arc("S1-A1-chromatography-8-11", "S1-A1-poolF-10"), models.Arc("S1-A1-chromatography-8-12", "S1-A1-poolF-10"), models.Arc("S1-A1-poolA-10", "S1-A1-poolA.raw-COL11"), models.Arc("S1-A1-mass spectrometry-9-3", "S1-A1-poolB.raw-COL11"), models.Arc("S1-A1-mass spectrometry-9-4", "S1-A1-poolB.raw-COL11"), models.Arc("S1-A1-poolC-10", "S1-A1-Empty Raw Spectral Data File-11-5"), models.Arc("S1-A1-mass spectrometry-9-7", "S1-A1-Empty Raw Spectral Data File-11-7"), models.Arc("S1-A1-mass spectrometry-9-8", "S1-A1-Empty Raw Spectral Data File-11-8"), models.Arc("S1-A1-poolE-10", "S1-A1-poolE.raw-COL11"), models.Arc("S1-A1-poolF-10", "S1-A1-Empty Raw Spectral Data File-11-11"), models.Arc("S1-A1-poolA.raw-COL11", "S1-A1-data transformation-12-1"), models.Arc("S1-A1-poolB.raw-COL11", "S1-A1-data transformation-12-3"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-5", "S1-A1-data transformation-12-5"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-7", "S1-A1-data transformation-12-7"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-8", "S1-A1-data transformation-12-8"), models.Arc("S1-A1-poolE.raw-COL11", "S1-A1-data transformation-12-9"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-11", "S1-A1-data analysis-13"), models.Arc("S1-A1-data transformation-12-1", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-3", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-5", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-7", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-8", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-9", "S1-A1-Empty Derived Data File-14-9"), models.Arc("S1-A1-data analysis-13", "S1-A1-results.csv-COL14"), ) assert sorted(expected) == sorted(assay.arcs)
def test_parse_full_investigation(full_investigation_file): # Read Investigation from file-like object reader = InvestigationReader.from_stream(full_investigation_file) investigation = reader.read() InvestigationValidator(investigation).validate() # Check results # Investigation assert investigation # Ontology sources assert 9 == len(investigation.ontology_source_refs) expected_headers = [ *investigation_headers.ONTOLOGY_SOURCE_REF_KEYS, "Comment[Test]" ] expected = models.OntologyRef( "OBI", "http://data.bioontology.org/ontologies/OBI", "21", "Ontology for Biomedical Investigations", (models.Comment("Test", "4"), ), expected_headers, ) assert expected == investigation.ontology_source_refs["OBI"] expected = models.OntologyRef( "NCBITAXON", "http://data.bioontology.org/ontologies/NCBITAXON", "2", ("National Center for Biotechnology Information (NCBI) Organismal " "Classification"), (models.Comment("Test", "1"), ), expected_headers, ) assert expected == investigation.ontology_source_refs["NCBITAXON"] # Basic info assert ("Growth control of the eukaryote cell: a systems biology study " "in yeast") == investigation.info.title assert "BII-I-1" == investigation.info.identifier assert date(2007, 4, 30) == investigation.info.submission_date assert date(2009, 3, 10) == investigation.info.public_release_date expected_headers = [ *investigation_headers.INVESTIGATION_INFO_KEYS, "Comment[Created With Configuration]", "Comment[Last Opened With Configuration]", "Comment[Owning Organisation URI]", "Comment[Consortium URI]", "Comment[Principal Investigator URI]", "Comment[Investigation Keywords]", ] assert expected_headers == investigation.info.headers # Publications assert 3 == len(investigation.publications) expected_headers = [ *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[0:4], "Comment[Subtitle]", *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[4:], ] expected = models.PublicationInfo( "17439666", "doi:10.1186/jbiol54", "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, " "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, " "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart " "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, " "Oliver SG.", "Growth control of the eukaryote cell: a systems biology study in " "yeast.", models.OntologyTermRef("indexed in Pubmed", "", ""), (models.Comment("Subtitle", ""), ), expected_headers, ) assert expected == investigation.publications[0] expected = models.PublicationInfo( "1231222", "", "Piatnochka IT.", "Effect of prednisolone on the cardiovascular system in complex " "treatment of newly detected pulmonary tuberculosis", models.OntologyTermRef("published", "http://www.ebi.ac.uk/efo/EFO_0001796", "EFO"), (models.Comment("Subtitle", "Something"), ), expected_headers, ) assert expected == investigation.publications[1] # Contacts assert 3 == len(investigation.contacts) expected_headers = [ *investigation_headers.INVESTIGATION_CONTACTS_KEYS, "Comment[Investigation Person ORCID]", "Comment[Investigation Person REF]", ] expected = models.ContactInfo( "Oliver", "Stephen", "G", "*****@*****.**", "", "", "Oxford Road, Manchester M13 9PT, UK", "Faculty of Life Sciences, Michael Smith Building, " "University of Manchester", models.OntologyTermRef("corresponding author", "", ""), ( models.Comment("Investigation Person ORCID", "12345"), models.Comment("Investigation Person REF", "personA"), ), expected_headers, ) assert expected == investigation.contacts[0] expected = models.ContactInfo( "Juan", "Castrillo", "I", "", "123456789", "", "Oxford Road, Manchester M13 9PT, UK", "Faculty of Life Sciences, Michael Smith Building, " "University of Manchester", models.OntologyTermRef("author", "", ""), ( models.Comment("Investigation Person ORCID", "0987654321"), models.Comment("Investigation Person REF", "personB"), ), expected_headers, ) assert expected == investigation.contacts[1] expected = models.ContactInfo( "Leo", "Zeef", "A", "", "", "+49 123456789", "Oxford Road, Manchester M13 9PT, UK", "Faculty of Life Sciences, Michael Smith Building, " "University of Manchester", models.OntologyTermRef("author", "http://purl.obolibrary.org/obo/RoleO_0000061", "ROLEO"), ( models.Comment("Investigation Person ORCID", "1357908642"), models.Comment("Investigation Person REF", "personC"), ), expected_headers, ) assert expected == investigation.contacts[2] # Studies assert len(investigation.studies) == 2 # Study 1 study = investigation.studies[0] assert "BII-S-1" == study.info.identifier assert ("Study of the impact of changes in flux on the transcriptome, " "proteome, endometabolome and exometabolome of the yeast " "Saccharomyces cerevisiae under different nutrient limitations" ) == study.info.title assert Path("s_BII-S-1.txt") == study.info.path # Study 1 - Design descriptors assert 2 == len(study.designs) expected_headers = [ *investigation_headers.STUDY_DESIGN_DESCR_KEYS[0:1], "Comment[Test1]", *investigation_headers.STUDY_DESIGN_DESCR_KEYS[1:], "Comment[Test2]", ] expected = ( models.DesignDescriptorsInfo( models.OntologyTermRef( "intervention design", "http://purl.obolibrary.org/obo/OBI_0000115", "OBI"), (models.Comment("Test1", "1"), models.Comment("Test2", "3")), expected_headers, ), models.DesignDescriptorsInfo( models.OntologyTermRef( "genotyping design", "http://purl.obolibrary.org/obo/OBI_0001444", "OBI"), (models.Comment("Test1", "2"), models.Comment("Test2", "4")), expected_headers, ), ) assert expected == study.designs # Study 1 - Publications assert 1 == len(study.publications) expected = models.PublicationInfo( "17439666", "doi:10.1186/jbiol54", "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, " "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, " "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart " "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, " "Oliver SG.", "Growth control of the eukaryote cell: a systems biology study in " "yeast.", models.OntologyTermRef("published", "", ""), (models.Comment("Subtitle", "Something"), ), [ *investigation_headers.STUDY_PUBLICATIONS_KEYS[0:4], "Comment[Subtitle]", *investigation_headers.STUDY_PUBLICATIONS_KEYS[4:], ], ) assert expected == study.publications[0] # Study 1 - Factors assert 2 == len(study.factors) expected_headers = [ *investigation_headers.STUDY_FACTORS_KEYS, "Comment[FactorsTest]" ] expected = models.FactorInfo( "limiting nutrient", models.OntologyTermRef("chemical entity", "http://purl.obolibrary.org/obo/CHEBI_24431", "CHEBI"), (models.Comment("FactorsTest", "1"), ), expected_headers, ) assert expected == study.factors["limiting nutrient"] expected = models.FactorInfo( "rate", models.OntologyTermRef("rate", "http://purl.obolibrary.org/obo/PATO_0000161", "PATO"), (models.Comment("FactorsTest", "2"), ), expected_headers, ) assert expected == study.factors["rate"] # Study 1 - Assays assert 3 == len(study.assays) expected_headers = [ *investigation_headers.STUDY_ASSAYS_KEYS, "Comment[Extra Info]" ] expected = models.AssayInfo( models.OntologyTermRef("protein expression profiling", "http://purl.obolibrary.org/obo/OBI_0000615", "OBI"), models.OntologyTermRef("mass spectrometry", "http://purl.obolibrary.org/obo/OBI_0000470", "OBI"), "iTRAQ", Path("a_proteome.txt"), (models.Comment("Extra Info", "a"), ), expected_headers, ) assert expected == study.assays[0] expected = models.AssayInfo( models.OntologyTermRef("transcription profiling", "http://purl.obolibrary.org/obo/OBI_0000424", "OBI"), models.OntologyTermRef("DNA microarray", "http://purl.obolibrary.org/obo/OBI_0400148", "OBI"), "Affymetrix", Path("a_transcriptome.txt"), (models.Comment("Extra Info", "c"), ), expected_headers, ) assert expected == study.assays[2] # Study 1 - Protocols assert 7 == len(study.protocols) expected_headers = [ *investigation_headers.STUDY_PROTOCOLS_KEYS[0:7], "Comment[Protocol Rating]", *investigation_headers.STUDY_PROTOCOLS_KEYS[7:], ] expected = models.ProtocolInfo( "growth protocol", models.OntologyTermRef("growth", "", ""), "1. Biomass samples (45 ml) were taken via the sample port of the " "Applikon fermenters. The cells were pelleted by centrifugation for 5 " "min at 5000 rpm. The supernatant was removed and the RNA pellet " "resuspended in the residual medium to form a slurry. This was added " "in a dropwise manner directly into a 5 ml Teflon flask (B. Braun " "Biotech, Germany) containing liquid nitrogen and a 7 mm-diameter " "tungsten carbide ball. After allowing evaporation of the liquid " "nitrogen the flask was reassembled and the cells disrupted by " "agitation at 1500 rpm for 2 min in a Microdismembranator U (B. Braun " "Biotech, Germany) 2. The frozen powder was then dissolved in 1 ml of " "TriZol reagent (Sigma-Aldrich, UK), vortexed for 1 min, and then kept" " at room temperature for a further 5min. 3. Chloroform extraction was" " performed by addition of 0.2 ml chloroform, shaking vigorously or 15" " s, then 5min incubation at room temperature. 4. Following " "centrifugation at 12,000 rpm for 5 min, the RNA (contained in the " "aqueous phase) was precipitated with 0.5 vol of 2-propanol at room " "temperature for 15 min. 5. After further centrifugation (12,000 rpm " "for 10 min at 4 C) the RNA pellet was washed twice with 70 % (v/v) " "ethanol, briefly air-dried, and redissolved in 0.5 ml diethyl " "pyrocarbonate (DEPC)-treated water. 6. The single-stranded RNA was " "precipitated once more by addition of 0.5 ml of LiCl buffer (4 M " "LiCl, 20 mM Tris-HCl, pH 7.5, 10 mM EDTA), thus removing tRNA and " "DNA from the sample. 7. After precipitation (20 C for 1h) and " "centrifugation (12,000 rpm, 30 min, 4 C), the RNA was washed twice in" " 70 % (v/v) ethanol prior to being dissolved in a minimal volume of " "DEPC-treated water. 8. Total RNA quality was checked using the RNA " "6000 Nano Assay, and analysed on an Agilent 2100 Bioanalyser (Agilent" " Technologies). RNA was quantified using the Nanodrop ultra low " "volume spectrophotometer (Nanodrop Technologies).", "", "", { "rate": models.OntologyTermRef( "rate", "http://purl.obolibrary.org/obo/PATO_0000161", "PATO") }, {}, (models.Comment("Protocol Rating", "1"), ), expected_headers, ) assert expected == study.protocols["growth protocol"] expected = models.ProtocolInfo( "metabolite extraction", models.OntologyTermRef("extraction", "http://purl.obolibrary.org/obo/OBI_0302884", "OBI"), "", "", "", { "standard volume": models.OntologyTermRef("standard volume", "", ""), "sample volume": models.OntologyTermRef("sample volume", "", ""), }, { "pipette": models.ProtocolComponentInfo( "pipette", models.OntologyTermRef("instrument", "http://www.ebi.ac.uk/efo/EFO_0000548", "EFO"), ) }, (models.Comment("Protocol Rating", "7"), ), expected_headers, ) assert expected == study.protocols["metabolite extraction"] # Study 1 - Contacts assert 3 == len(study.contacts) expected_headers = [ *investigation_headers.STUDY_CONTACTS_KEYS, "Comment[Study Person REF]" ] expected = models.ContactInfo( "Oliver", "Stephen", "G", "*****@*****.**", "", "", "Oxford Road, Manchester M13 9PT, UK", "Faculty of Life Sciences, Michael Smith Building, " "University of Manchester", models.OntologyTermRef("corresponding author", "", ""), (models.Comment("Study Person REF", ""), ), expected_headers, ) assert expected == study.contacts[0] expected = models.ContactInfo( "Juan", "Castrillo", "I", "", "123456789", "", "Oxford Road, Manchester M13 9PT, UK", "Faculty of Life Sciences, Michael Smith Building, " "University of Manchester", models.OntologyTermRef("author", "http://purl.obolibrary.org/obo/RoleO_0000061", "ROLEO"), (models.Comment("Study Person REF", ""), ), expected_headers, ) assert expected == study.contacts[1] expected = models.ContactInfo( "Leo", "Zeef", "A", "", "", "+49 123456789", "Oxford Road, Manchester M13 9PT, UK", "Faculty of Life Sciences, Michael Smith Building, " "University of Manchester", models.OntologyTermRef("author", "http://purl.obolibrary.org/obo/RoleO_0000061", "ROLEO"), (models.Comment("Study Person REF", ""), ), expected_headers, ) assert expected == study.contacts[2] # Study 2 study = investigation.studies[1] expected = models.BasicInfo( Path("s_BII-S-2.txt"), "BII-S-2", "A time course analysis of transcription response in yeast treated " "with rapamycin, a specific inhibitor of the TORC1 complex: impact " "on yeast growth", "Comprehensive high-throughput analyses at the levels of mRNAs, " "proteins, and metabolites, and studies on gene expression patterns " "are required for systems biology studies of cell growth [4,26-29]. " "Although such comprehensive data sets are lacking, many studies have " "pointed to a central role for the target-of-rapamycin (TOR) signal " "transduction pathway in growth control. TOR is a serine/threonine " "kinase that has been conserved from yeasts to mammals; it integrates " "signals from nutrients or growth factors to regulate cell growth and " "cell-cycle progression coordinately. Although such comprehensive data " "sets are lacking, many studies have pointed to a central role for the " "target-of-rapamycin (TOR) signal transduction pathway in growth " "control. TOR is a serine/threonine kinase that has been conserved " "from yeasts to mammals; it integrates signals from nutrients or " "growth factors to regulate cell growth and cell-cycle progression " "coordinately. The effect of rapamycin were studied as follows: a " "culture growing at mid-exponential phase was divided into two. " "Rapamycin (200 ng/ml) was added to one half, and the drug's solvent " "to the other, as the control. Samples were taken at 0, 1, 2 and 4 h " "after treatment. Gene expression at the mRNA level was investigated " "by transcriptome analysis using Affymetrix hybridization arrays.", date(2007, 4, 30), date(2009, 3, 10), ( models.Comment("Study Grant Number", ""), models.Comment("Study Funding Agency", ""), models.Comment("Manuscript Licence", "CC BY 3.0"), models.Comment("Experimental Metadata Licence", "CC0"), models.Comment("Data Repository", ""), models.Comment("Data Record Accession", ""), models.Comment("Data Record URI", ""), models.Comment("Supplementary Information File Name", ""), models.Comment("Supplementary Information File Type", ""), models.Comment("Supplementary File URI", ""), models.Comment("Subject Keywords", ""), ), [ *investigation_headers.STUDY_INFO_KEYS[0:3], "Comment[Study Grant Number]", "Comment[Study Funding Agency]", *investigation_headers.STUDY_INFO_KEYS[3:], "Comment[Manuscript Licence]", "Comment[Experimental Metadata Licence]", "Comment[Data Repository]", "Comment[Data Record Accession]", "Comment[Data Record URI]", "Comment[Supplementary Information File Name]", "Comment[Supplementary Information File Type]", "Comment[Supplementary File URI]", "Comment[Subject Keywords]", ], ) assert expected == study.info # Study 2 - Factors assert 3 == len(study.factors) expected = models.FactorInfo( "exposure time", models.OntologyTermRef("time", "http://purl.obolibrary.org/obo/PATO_0000165", "OBI_BCGO"), (), [*investigation_headers.STUDY_FACTORS_KEYS], ) assert expected == study.factors["exposure time"] # Study 2 - Assays assert 1 == len(study.assays) expected = models.AssayInfo( models.OntologyTermRef("transcription profiling", "http://purl.obolibrary.org/obo/OBI_0000424", "OBI"), models.OntologyTermRef("DNA microarray", "http://purl.obolibrary.org/obo/OBI_0400148", "OBI"), "Affymetrix", Path("a_microarray.txt"), (), [*investigation_headers.STUDY_ASSAYS_KEYS], ) assert expected == study.assays[0] # Study 2 - Protocols assert 10 == len(study.protocols) expected = models.ProtocolInfo( "NMR spectroscopy", models.OntologyTermRef("NMR spectroscopy", "http://purl.obolibrary.org/obo/OBI_0000623", "OBI"), "", "", "", {}, { "NMR tubes": models.ProtocolComponentInfo( "NMR tubes", models.OntologyTermRef(None, None, None)), "Bruker-Av600": models.ProtocolComponentInfo( "Bruker-Av600", models.OntologyTermRef("instrument", "http://www.ebi.ac.uk/efo/EFO_0000548", "EFO"), ), }, (), [*investigation_headers.STUDY_PROTOCOLS_KEYS], ) assert expected == study.protocols["NMR spectroscopy"] # Study 2 - Contacts assert 3 == len(study.contacts) expected = models.ContactInfo( "Juan", "Castrillo", "I", "", "123456789", "", "Oxford Road, Manchester M13 9PT, UK", "Faculty of Life Sciences, Michael Smith Building, " "University of Manchester", models.OntologyTermRef("author", "http://purl.obolibrary.org/obo/RoleO_0000061", "ROLEO"), (models.Comment("Study Person REF", "personB"), ), [ *investigation_headers.STUDY_CONTACTS_KEYS, "Comment[Study Person REF]" ], ) assert expected == study.contacts[1]
def test_parse_comment_investigation(comment_investigation_file): # Read Investigation from file-like object reader = InvestigationReader.from_stream(comment_investigation_file) investigation = reader.read() InvestigationValidator(investigation).validate() # Check results # Investigation assert investigation # Ontology sources assert 9 == len(investigation.ontology_source_refs) expected = models.OntologyRef( "OBI", "http://data.bioontology.org/ontologies/OBI", "21", "Ontology for Biomedical Investigations", (models.Comment("OntologyComment", "TestValue01"), ), [ *investigation_headers.ONTOLOGY_SOURCE_REF_KEYS[0:2], "Comment[OntologyComment]", *investigation_headers.ONTOLOGY_SOURCE_REF_KEYS[2:], ], ) assert expected == investigation.ontology_source_refs["OBI"] # Basic info assert "BII-I-1" == investigation.info.identifier assert "Owning Organisation URI" == investigation.info.comments[2].name assert "TestValue01" == investigation.info.comments[2].value expected_headers = [ *investigation_headers.INVESTIGATION_INFO_KEYS, "Comment[Created With Configuration]", "Comment[Last Opened With Configuration]", "Comment[Owning Organisation URI]", "Comment[Consortium URI]", "Comment[Principal Investigator URI]", "Comment[Investigation Keywords]", ] # Publications assert 3 == len(investigation.publications) expected = models.PublicationInfo( "17439666", "doi:10.1186/jbiol54", "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, " "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, " "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart " "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, " "Oliver SG.", "Growth control of the eukaryote cell: a systems biology study in " "yeast.", models.OntologyTermRef("indexed in Pubmed", "", ""), (models.Comment("InvestPubsComment", "TestValue01"), ), [ *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[0:2], "Comment[InvestPubsComment]", *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[2:], ], ) assert expected == investigation.publications[0] # Contacts assert 3 == len(investigation.contacts) expected = models.ContactInfo( "Leo", "Zeef", "A", "", "", "+49 123456789", "Oxford Road, Manchester M13 9PT, UK", "Faculty of Life Sciences, Michael Smith Building, " "University of Manchester", models.OntologyTermRef("author", "http://purl.obolibrary.org/obo/RoleO_0000061", "ROLEO"), ( models.Comment("Investigation Person ORCID", "1357908642"), models.Comment("Investigation Person REF", "personC"), ), [ *investigation_headers.INVESTIGATION_CONTACTS_KEYS, "Comment[Investigation Person ORCID]", "Comment[Investigation Person REF]", ], ) assert expected == investigation.contacts[2] # Studies assert len(investigation.studies) == 2 # Study 1 study = investigation.studies[0] assert "BII-S-1" == study.info.identifier assert Path("s_BII-S-1.txt") == study.info.path assert "Manuscript Licence" == study.info.comments[2].name assert "CC BY 3.0" == study.info.comments[2].value expected_headers = [ *investigation_headers.STUDY_INFO_KEYS[0:3], "Comment[Study Grant Number]", "Comment[Study Funding Agency]", *investigation_headers.STUDY_INFO_KEYS[3:], "Comment[Manuscript Licence]", "Comment[Experimental Metadata Licence]", "Comment[Data Repository]", "Comment[Data Record Accession]", "Comment[Data Record URI]", "Comment[Supplementary Information File Name]", "Comment[Supplementary Information File Type]", "Comment[Supplementary File URI]", "Comment[Subject Keywords]", ] assert expected_headers == study.info.headers # Study 1 - Design descriptors assert 2 == len(study.designs) expected = models.DesignDescriptorsInfo( models.OntologyTermRef("genotyping design", "http://purl.obolibrary.org/obo/OBI_0001444", "OBI"), (models.Comment("DesignDescsComment", "TestValue01"), ), [ *investigation_headers.STUDY_DESIGN_DESCR_KEYS, "Comment[DesignDescsComment]" ], ) assert expected == study.designs[1] # Study 1 - Publications assert 1 == len(study.publications) expected = models.PublicationInfo( "17439666", "doi:10.1186/jbiol54", "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, " "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, " "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart " "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, " "Oliver SG.", "Growth control of the eukaryote cell: a systems biology study in " "yeast.", models.OntologyTermRef("published", "", ""), (models.Comment("StudyPubsComment", "TestValue01"), ), [ *investigation_headers.STUDY_PUBLICATIONS_KEYS[0:4], "Comment[StudyPubsComment]", *investigation_headers.STUDY_PUBLICATIONS_KEYS[4:], ], ) assert expected == study.publications[0] # Study 1 - Factors assert 2 == len(study.factors) expected = models.FactorInfo( "rate", models.OntologyTermRef("rate", "http://purl.obolibrary.org/obo/PATO_0000161", "PATO"), (models.Comment("FactorsComment", "TestValue01"), ), [*investigation_headers.STUDY_FACTORS_KEYS, "Comment[FactorsComment]"], ) assert expected == study.factors["rate"] # Study 1 - Assays assert 3 == len(study.assays) expected = models.AssayInfo( models.OntologyTermRef("transcription profiling", "http://purl.obolibrary.org/obo/OBI_0000424", "OBI"), models.OntologyTermRef("DNA microarray", "http://purl.obolibrary.org/obo/OBI_0400148", "OBI"), "Affymetrix", Path("a_transcriptome.txt"), (models.Comment("AssaysComment", "A comment within ontology terms?"), ), [ *investigation_headers.STUDY_ASSAYS_KEYS[0:5], "Comment[AssaysComment]", *investigation_headers.STUDY_ASSAYS_KEYS[5:], ], ) assert expected == study.assays[2] # Study 1 - Protocols assert 7 == len(study.protocols) expected = models.ProtocolInfo( "metabolite extraction", models.OntologyTermRef("extraction", "http://purl.obolibrary.org/obo/OBI_0302884", "OBI"), "", "", "", { "standard volume": models.OntologyTermRef("standard volume", "", ""), "sample volume": models.OntologyTermRef("sample volume", "", ""), }, { "pipette": models.ProtocolComponentInfo( "pipette", models.OntologyTermRef("instrument", "http://www.ebi.ac.uk/efo/EFO_0000548", "EFO"), ) }, (models.Comment("ProtocolsComment", "TestValue01"), ), [ *investigation_headers.STUDY_PROTOCOLS_KEYS[0:7], "Comment[ProtocolsComment]", *investigation_headers.STUDY_PROTOCOLS_KEYS[7:], ], ) assert expected == study.protocols["metabolite extraction"] # Study 1 - Contacts assert 3 == len(study.contacts) expected = models.ContactInfo( "Juan", "Castrillo", "I", "", "123456789", "", "Oxford Road, Manchester M13 9PT, UK", "Faculty of Life Sciences, Michael Smith Building, " "University of Manchester", models.OntologyTermRef("author", "http://purl.obolibrary.org/obo/RoleO_0000061", "ROLEO"), (models.Comment("Study Person REF", ""), ), [ *investigation_headers.STUDY_CONTACTS_KEYS, "Comment[Study Person REF]" ], ) assert expected == study.contacts[1] # Study 2 study = investigation.studies[1] assert "BII-S-2" == study.info.identifier assert Path("s_BII-S-2.txt") == study.info.path assert "Study Grant Number" == study.info.comments[0].name assert "" == study.info.comments[0].value assert "Manuscript Licence" == study.info.comments[2].name assert "CC BY 3.0" == study.info.comments[2].value expected_headers = [ *investigation_headers.STUDY_INFO_KEYS[0:3], "Comment[Study Grant Number]", "Comment[Study Funding Agency]", *investigation_headers.STUDY_INFO_KEYS[3:], "Comment[Manuscript Licence]", "Comment[Experimental Metadata Licence]", "Comment[Data Repository]", "Comment[Data Record Accession]", "Comment[Data Record URI]", "Comment[Supplementary Information File Name]", "Comment[Supplementary Information File Type]", "Comment[Supplementary File URI]", "Comment[Subject Keywords]", ] assert expected_headers == study.info.headers # Study 2 - Contacts assert 3 == len(study.contacts) expected = models.ContactInfo( "Juan", "Castrillo", "I", "", "123456789", "", "Oxford Road, Manchester M13 9PT, UK", "Faculty of Life Sciences, Michael Smith Building, " "University of Manchester", models.OntologyTermRef("author", "http://purl.obolibrary.org/obo/RoleO_0000061", "ROLEO"), (models.Comment("Study Person REF", "personB"), ), [ *investigation_headers.STUDY_CONTACTS_KEYS, "Comment[Study Person REF]" ], ) assert expected == study.contacts[1]
def create_and_write(out_path): """Create an investigation with a study and assay and write to ``output_path``.""" # Prepare one or more study sections # Prepare basic study information study_info = models.BasicInfo( path="s_minimal.txt", identifier="s_minimal", title="Germline Study", description=None, submission_date=None, public_release_date=None, comments=( models.Comment(name="Study Grant Number", value=None), models.Comment(name="Study Funding Agency", value=None), ), headers=[], ) # Create one or more assays assay_01 = models.AssayInfo( measurement_type=models.OntologyTermRef( name="exome sequencing assay", accession="http://purl.obolibrary.org/obo/OBI_0002118", ontology_name="OBI", ), technology_type=models.OntologyTermRef( name="nucleotide sequencing", accession="http://purl.obolibrary.org/obo/OBI_0000626", ontology_name="OBI", ), platform=None, path="a_minimal.txt", comments=(), headers=[], ) # Prepare one or more protocols protocol_01 = models.ProtocolInfo( name="sample collection", type=models.OntologyTermRef(name="sample collection"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) protocol_02 = models.ProtocolInfo( name="nucleic acid sequencing", type=models.OntologyTermRef(name="nucleic acid sequencing"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) # Create study object study_01 = models.StudyInfo( info=study_info, designs=(), publications=(), factors={}, assays=(assay_01, ), protocols={ protocol_01.name: protocol_01, protocol_02.name: protocol_02 }, contacts=(), ) # Prepare other investigation section # Prepare one or more ontology term source references onto_ref_01 = models.OntologyRef( name="OBI", file="http://data.bioontology.org/ontologies/OBI", version="31", description="Ontology for Biomedical Investigations", comments=(), headers=[], ) # Prepare basic investigation information invest_info = models.BasicInfo( path="i_minimal.txt", identifier="i_minimal", title="Minimal Investigation", description=None, submission_date=None, public_release_date=None, comments=(), headers=[], ) # Create investigation object investigation = models.InvestigationInfo( ontology_source_refs={onto_ref_01.name: onto_ref_01}, info=invest_info, publications=(), contacts=(), studies=(study_01, ), ) # Validate investigation InvestigationValidator(investigation).validate() # Write the investigation as ISA-Tab txt file with open(join(out_path, investigation.info.path), "wt", newline="") as outputf: InvestigationWriter.from_stream(investigation=investigation, output_file=outputf).write() # Create a corresponding Study graph # Create at least on source, one sample and one collection process # Unique names are required for unambiguous node identification source_01 = models.Material( type="Source Name", unique_name="S1-source-0815", name="0815", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SOURCE_NAME], ) sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) process_01 = models.Process( protocol_ref="sample collection", unique_name="S1-sample collection-2-1", name=None, name_type=None, date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arc_01 = models.Arc(tail="S1-source-0815", head="S1-sample collection-2-1") arc_02 = models.Arc(tail="S1-sample collection-2-1", head="S1-sample-0815-N1") # Create the study graph object study_graph_01 = models.Study( file=investigation.studies[0].info.path, header=None, materials={ source_01.unique_name: source_01, sample_01.unique_name: sample_01 }, processes={process_01.unique_name: process_01}, arcs=(arc_01, arc_02), ) # Validate study graph StudyValidator(investigation=investigation, study_info=investigation.studies[0], study=study_graph_01).validate() # Write the study as ISA-Tab txt file with open(join(out_path, investigation.studies[0].info.path), "wt", newline="") as outputf: StudyWriter.from_stream(study_or_assay=study_graph_01, output_file=outputf).write() # Create a corresponding Assay graph # Create at least on samples, one output material and one collection process # Unique names are required for unambiguous node identification # Explicit header definition per node is currently required to enable export to ISA-Tab sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) data_file_01 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", name="0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) data_file_02 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", name="0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) process_01 = models.Process( protocol_ref="nucleic acid sequencing", unique_name="S1-A1-0815-N1-DNA1-WES1-3", name="0815-N1-DNA1-WES1", name_type="Assay Name", date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arcs = ( models.Arc(tail="S1-sample-0815-N1", head="S1-A1-0815-N1-DNA1-WES1-3"), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1-3", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", ), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", ), ) # Create the assay graph object assay_graph_01 = models.Assay( file=investigation.studies[0].assays[0].path, header=None, materials={ sample_01.unique_name: sample_01, data_file_01.unique_name: data_file_01, data_file_02.unique_name: data_file_02, }, processes={process_01.unique_name: process_01}, arcs=arcs, ) # Validate assay graph AssayValidator( investigation=investigation, study_info=investigation.studies[0], assay_info=investigation.studies[0].assays[0], assay=assay_graph_01, ).validate() # Write the assay as ISA-Tab txt file with open(join(out_path, investigation.studies[0].assays[0].path), "wt", newline="") as outputf: AssayWriter.from_stream(study_or_assay=assay_graph_01, output_file=outputf).write()