def test_assay_reader_minimal_assay(minimal_investigation_file, minimal_assay_file): """Use ``AssayReader`` to read in minimal assay file. Using the ``AssayReader`` instead of the ``AssayRowReader`` gives us ``Assay`` objects instead of just the row-wise nodes. """ # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream( minimal_investigation_file).read() with pytest.warns(IsaWarning) as record: InvestigationValidator(investigation).validate() # Check warnings assert 1 == len(record) # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", minimal_assay_file) assert 5 == len(reader.header) # Read and validate assay assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check results assert os.path.normpath(str(assay.file)).endswith( os.path.normpath("data/i_minimal/a_minimal.txt")) assert 5 == len(assay.header) assert 3 == len(assay.materials) assert 1 == len(assay.processes) assert 3 == len(assay.arcs) expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == assay.materials["S1-sample-0815-N1"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5"] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-N1-DNA1-WES1-3", "0815-N1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-3"] expected = ( models.Arc("S1-sample-0815-N1", "S1-A1-0815-N1-DNA1-WES1-3"), models.Arc("S1-A1-0815-N1-DNA1-WES1-3", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"), models.Arc( "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", ), ) assert expected == assay.arcs
def test_assay_reader_small2_assay(small2_investigation_file, small2_assay_file): """Use ``AssayReader`` to read in small assay file.""" # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream( small2_investigation_file).read() InvestigationValidator(investigation).validate() # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", small2_assay_file) assert 14 == len(reader.header) # Read assay assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check results assert os.path.normpath(str(assay.file)).endswith( os.path.normpath("data/i_small2/a_small2.txt")) assert 14 == len(assay.header) assert 25 == len(assay.materials) assert 41 == len(assay.processes) assert 74 == len(assay.arcs) # Comments expected = models.Comment(name="Replicate", value="B") assert assay.materials["S1-A1-0815-T1-Pro1-B-115-COL5"].comments[ 0] == expected # Expected arcs expected = ( models.Arc("S1-sample-0815-N1", "S1-A1-extraction-2-1"), models.Arc("S1-sample-0815-T1", "S1-A1-extraction-2-2"), models.Arc("S1-A1-extraction-2-1", "S1-A1-0815-N1-Pro1-COL3"), models.Arc("S1-A1-extraction-2-2", "S1-A1-0815-T1-Pro1-COL3"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-1"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-2"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-3"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-4"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-5"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-6"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-7"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-8"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-9"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-10"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-11"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-12"), models.Arc("S1-A1-labeling-4-1", "S1-A1-0815-N1-Pro1-A-114-COL5"), models.Arc("S1-A1-labeling-4-2", "S1-A1-0815-T1-Pro1-A-115-COL5"), models.Arc("S1-A1-labeling-4-3", "S1-A1-0815-N1-Pro1-B-114-COL5"), models.Arc("S1-A1-labeling-4-4", "S1-A1-0815-T1-Pro1-B-115-COL5"), models.Arc("S1-A1-labeling-4-5", "S1-A1-0815-N1-Pro1-C-114-COL5"), models.Arc("S1-A1-labeling-4-6", "S1-A1-0815-T1-Pro1-C-115-COL5"), models.Arc("S1-A1-labeling-4-7", "S1-A1-0815-N1-Pro1-D-114-COL5"), models.Arc("S1-A1-labeling-4-8", "S1-A1-0815-T1-Pro1-D-115-COL5"), models.Arc("S1-A1-labeling-4-9", "S1-A1-0815-N1-Pro1-E-114-COL5"), models.Arc("S1-A1-labeling-4-10", "S1-A1-0815-T1-Pro1-E-115-COL5"), models.Arc("S1-A1-labeling-4-11", "S1-A1-0815-N1-Pro1-F-114-COL5"), models.Arc("S1-A1-labeling-4-12", "S1-A1-0815-T1-Pro1-F-115-COL5"), models.Arc("S1-A1-0815-N1-Pro1-A-114-COL5", "S1-A1-chromatography-8-1"), models.Arc("S1-A1-0815-T1-Pro1-A-115-COL5", "S1-A1-chromatography-8-2"), models.Arc("S1-A1-0815-N1-Pro1-B-114-COL5", "S1-A1-chromatography-8-3"), models.Arc("S1-A1-0815-T1-Pro1-B-115-COL5", "S1-A1-chromatography-8-4"), models.Arc("S1-A1-0815-N1-Pro1-C-114-COL5", "S1-A1-chromatography-8-5"), models.Arc("S1-A1-0815-T1-Pro1-C-115-COL5", "S1-A1-chromatography-8-6"), models.Arc("S1-A1-0815-N1-Pro1-D-114-COL5", "S1-A1-chromatography-8-7"), models.Arc("S1-A1-0815-T1-Pro1-D-115-COL5", "S1-A1-chromatography-8-8"), models.Arc("S1-A1-0815-N1-Pro1-E-114-COL5", "S1-A1-chromatography-8-9"), models.Arc("S1-A1-0815-T1-Pro1-E-115-COL5", "S1-A1-chromatography-8-10"), models.Arc("S1-A1-0815-N1-Pro1-F-114-COL5", "S1-A1-chromatography-8-11"), models.Arc("S1-A1-0815-T1-Pro1-F-115-COL5", "S1-A1-chromatography-8-12"), models.Arc("S1-A1-chromatography-8-1", "S1-A1-poolA-10"), models.Arc("S1-A1-chromatography-8-2", "S1-A1-poolA-10"), models.Arc("S1-A1-chromatography-8-3", "S1-A1-mass spectrometry-9-3"), models.Arc("S1-A1-chromatography-8-4", "S1-A1-mass spectrometry-9-4"), models.Arc("S1-A1-chromatography-8-5", "S1-A1-poolC-10"), models.Arc("S1-A1-chromatography-8-6", "S1-A1-poolC-10"), models.Arc("S1-A1-chromatography-8-7", "S1-A1-mass spectrometry-9-7"), models.Arc("S1-A1-chromatography-8-8", "S1-A1-mass spectrometry-9-8"), models.Arc("S1-A1-chromatography-8-9", "S1-A1-poolE-10"), models.Arc("S1-A1-chromatography-8-10", "S1-A1-poolE-10"), models.Arc("S1-A1-chromatography-8-11", "S1-A1-poolF-10"), models.Arc("S1-A1-chromatography-8-12", "S1-A1-poolF-10"), models.Arc("S1-A1-poolA-10", "S1-A1-poolA.raw-COL11"), models.Arc("S1-A1-mass spectrometry-9-3", "S1-A1-poolB.raw-COL11"), models.Arc("S1-A1-mass spectrometry-9-4", "S1-A1-poolB.raw-COL11"), models.Arc("S1-A1-poolC-10", "S1-A1-Empty Raw Spectral Data File-11-5"), models.Arc("S1-A1-mass spectrometry-9-7", "S1-A1-Empty Raw Spectral Data File-11-7"), models.Arc("S1-A1-mass spectrometry-9-8", "S1-A1-Empty Raw Spectral Data File-11-8"), models.Arc("S1-A1-poolE-10", "S1-A1-poolE.raw-COL11"), models.Arc("S1-A1-poolF-10", "S1-A1-Empty Raw Spectral Data File-11-11"), models.Arc("S1-A1-poolA.raw-COL11", "S1-A1-data transformation-12-1"), models.Arc("S1-A1-poolB.raw-COL11", "S1-A1-data transformation-12-3"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-5", "S1-A1-data transformation-12-5"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-7", "S1-A1-data transformation-12-7"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-8", "S1-A1-data transformation-12-8"), models.Arc("S1-A1-poolE.raw-COL11", "S1-A1-data transformation-12-9"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-11", "S1-A1-data analysis-13"), models.Arc("S1-A1-data transformation-12-1", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-3", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-5", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-7", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-8", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-9", "S1-A1-Empty Derived Data File-14-9"), models.Arc("S1-A1-data analysis-13", "S1-A1-results.csv-COL14"), ) assert sorted(expected) == sorted(assay.arcs)
def test_assay_reader_small_assay(small_investigation_file, small_assay_file): """Use ``AssayReader`` to read in small assay file.""" # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream( small_investigation_file).read() with pytest.warns(IsaWarning) as record: InvestigationValidator(investigation).validate() # Check warnings assert 1 == len(record) # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", small_assay_file) assert 9 == len(reader.header) # Read assay with pytest.warns(IsaWarning) as record: assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check warnings assert 1 == len(record) # Check results assert os.path.normpath(str(assay.file)).endswith( os.path.normpath("data/i_small/a_small.txt")) assert 9 == len(assay.header) assert 9 == len(assay.materials) assert 5 == len(assay.processes) assert 13 == len(assay.arcs) expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == assay.materials["S1-sample-0815-N1"] expected = models.Material( "Sample Name", "S1-sample-0815-T1", "0815-T1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == assay.materials["S1-sample-0815-T1"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"] expected = models.Material( "Raw Data File", "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "0815-T1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"] expected = models.Material( "Raw Data File", "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "0815-T1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"] expected = models.Material( "Derived Data File", "S1-A1-0815-somatic.vcf.gz-COL9", "0815-somatic.vcf.gz", None, (), (), (), None, [table_headers.DERIVED_DATA_FILE], ) assert expected == assay.materials["S1-A1-0815-somatic.vcf.gz-COL9"] expected = models.Process( "library preparation", "S1-A1-library preparation-2-1", None, None, None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF], ) assert expected == assay.processes["S1-A1-library preparation-2-1"] expected = models.Process( "library preparation", "S1-A1-library preparation-2-2", None, None, None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF], ) assert expected == assay.processes["S1-A1-library preparation-2-2"] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-N1-DNA1-WES1-5", "0815-N1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-5"] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-T1-DNA1-WES1-5", "0815-T1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == assay.processes["S1-A1-0815-T1-DNA1-WES1-5"] expected = ( models.Arc("S1-sample-0815-N1", "S1-A1-library preparation-2-1"), models.Arc("S1-A1-library preparation-2-1", "S1-A1-0815-N1-DNA1-COL3"), models.Arc("S1-A1-0815-N1-DNA1-COL3", "S1-A1-0815-N1-DNA1-WES1-5"), models.Arc("S1-A1-0815-N1-DNA1-WES1-5", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"), models.Arc( "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", ), models.Arc("S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "S1-A1-somatic variant calling-1-8"), models.Arc("S1-A1-somatic variant calling-1-8", "S1-A1-0815-somatic.vcf.gz-COL9"), models.Arc("S1-sample-0815-T1", "S1-A1-library preparation-2-2"), models.Arc("S1-A1-library preparation-2-2", "S1-A1-0815-T1-DNA1-COL3"), models.Arc("S1-A1-0815-T1-DNA1-COL3", "S1-A1-0815-T1-DNA1-WES1-5"), models.Arc("S1-A1-0815-T1-DNA1-WES1-5", "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"), models.Arc( "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", ), models.Arc("S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "S1-A1-somatic variant calling-1-8"), ) assert expected == assay.arcs
def test_study_reader_minimal_study(minimal_investigation_file, minimal_study_file): """Use ``StudyReader`` to read in minimal study file. Using the ``StudyReader`` instead of the ``StudyRowReader`` gives us ``Study`` objects instead of just the row-wise nodes. """ # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream( minimal_investigation_file).read() with pytest.warns(IsaWarning) as record: InvestigationValidator(investigation).validate() # Check warnings assert 2 == len(record) # Create new row reader and check read headers reader = StudyReader.from_stream("S1", minimal_study_file) assert 3 == len(reader.header) # Read study study = reader.read() StudyValidator(investigation, investigation.studies[0], study).validate() # Check results assert os.path.normpath(str(study.file)).endswith( os.path.normpath("data/i_minimal/s_minimal.txt")) assert 3 == len(study.header) assert 2 == len(study.materials) assert 1 == len(study.processes) assert 2 == len(study.arcs) expected = models.Material("Source Name", "S1-source-0815", "0815", None, (), (), (), None, [table_headers.SOURCE_NAME]) assert expected == study.materials["S1-source-0815"] expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == study.materials["S1-sample-0815-N1"] expected = models.Process( "sample collection", "S1-sample collection-2-1", None, None, None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF], ) assert expected == study.processes["S1-sample collection-2-1"] expected = ( models.Arc("S1-source-0815", "S1-sample collection-2-1"), models.Arc("S1-sample collection-2-1", "S1-sample-0815-N1"), ) assert expected == study.arcs
def test_study_reader_small_study(small_investigation_file, small_study_file): """Use ``StudyReader`` to read in small study file.""" # Load investigation (tested elsewhere) with pytest.warns(IsaWarning) as record: investigation = InvestigationReader.from_stream( small_investigation_file).read() InvestigationValidator(investigation).validate() # Check warnings assert 2 == len(record) # Create new row reader and check read headers reader = StudyReader.from_stream("S1", small_study_file) assert 13 == len(reader.header) # Read study study = reader.read() StudyValidator(investigation, investigation.studies[0], study).validate() # Check results assert os.path.normpath(str(study.file)).endswith( os.path.normpath("data/i_small/s_small.txt")) assert 13 == len(study.header) assert 9 == len(study.materials) assert 5 == len(study.processes) assert 10 == len(study.arcs) headers_source = [ table_headers.SOURCE_NAME, table_headers.CHARACTERISTICS + "[organism]", table_headers.TERM_SOURCE_REF, table_headers.TERM_ACCESSION_NUMBER, table_headers.CHARACTERISTICS + "[age]", table_headers.UNIT, table_headers.TERM_SOURCE_REF, table_headers.TERM_ACCESSION_NUMBER, ] headers_collection = [ table_headers.PROTOCOL_REF, table_headers.PARAMETER_VALUE + "[instrument]", table_headers.PERFORMER, table_headers.DATE, ] headers_sample = [ table_headers.SAMPLE_NAME, table_headers.CHARACTERISTICS + "[status]", table_headers.FACTOR_VALUE + "[treatment]", ] unit = models.OntologyTermRef( name="day", accession="http://purl.obolibrary.org/obo/UO_0000033", ontology_name="UO") characteristics1 = ( models.Characteristics( name="organism", value=[ models.OntologyTermRef( name="Mus musculus", accession="http://purl.bioontology.org/ontology/" "NCBITAXON/10090", ontology_name="NCBITAXON", ) ], unit=None, ), models.Characteristics(name="age", value=["90"], unit=unit), ) characteristics2 = ( models.Characteristics( name="organism", value=[models.OntologyTermRef("Mus musculus", "", "")], unit=None), models.Characteristics(name="age", value=[""], unit=unit), ) characteristics3 = ( models.Characteristics( name="organism", value=[models.OntologyTermRef(None, None, None)], unit=None), models.Characteristics(name="age", value=["150"], unit=unit), ) expected = models.Material( "Source Name", "S1-source-0815", "0815", None, characteristics1, (), (), None, headers_source, ) assert expected == study.materials["S1-source-0815"] expected = models.Material( "Source Name", "S1-source-0816", "0816", None, characteristics2, (), (), None, headers_source, ) assert expected == study.materials["S1-source-0816"] expected = models.Material( "Source Name", "S1-source-0817", "0817", None, characteristics3, (), (), None, headers_source, ) assert expected == study.materials["S1-source-0817"] expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (models.Characteristics("status", ["0"], None), ), (), (models.FactorValue("treatment", "yes", None), ), None, headers_sample, ) assert expected == study.materials["S1-sample-0815-N1"] expected = models.Material( "Sample Name", "S1-sample-0815-T1", "0815-T1", None, (models.Characteristics("status", ["2"], None), ), (), (models.FactorValue("treatment", "", None), ), None, headers_sample, ) assert expected == study.materials["S1-sample-0815-T1"] expected = models.Material( "Sample Name", "S1-sample-0816-T1", "0816-T1", None, (models.Characteristics("status", ["1"], None), ), (), (models.FactorValue("treatment", "yes", None), ), None, headers_sample, ) assert expected == study.materials["S1-sample-0816-T1"] expected = models.Material( "Sample Name", "S1-Empty Sample Name-13-5", "", None, (models.Characteristics("status", [""], None), ), (), (models.FactorValue("treatment", "", None), ), None, headers_sample, ) assert expected == study.materials["S1-Empty Sample Name-13-5"] expected = models.Process( "sample collection", "S1-sample collection-9-2", None, None, date(2018, 2, 2), "John Doe", (models.ParameterValue("instrument", ["scalpel"], None), ), (), None, None, None, headers_collection, ) assert expected == study.processes["S1-sample collection-9-2"] expected = models.Process( "sample collection", "S1-sample collection-9-3", None, None, date(2018, 2, 2), "John Doe", (models.ParameterValue("instrument", ["scalpel type A", "scalpel type B"], None), ), (), None, None, None, headers_collection, ) assert expected == study.processes["S1-sample collection-9-3"] expected = models.Process( "sample collection", "S1-sample collection-9-4", None, None, date(2018, 2, 2), "John Doe", (models.ParameterValue("instrument", ["scalpel"], None), ), (), None, None, None, headers_collection, ) assert expected == study.processes["S1-sample collection-9-4"] expected = ( models.Arc("S1-source-0814", "S1-sample collection-9-1"), models.Arc("S1-sample collection-9-1", "S1-sample-0814-N1"), models.Arc("S1-source-0815", "S1-sample collection-9-2"), models.Arc("S1-sample collection-9-2", "S1-sample-0815-N1"), models.Arc("S1-source-0815", "S1-sample collection-9-3"), models.Arc("S1-sample collection-9-3", "S1-sample-0815-T1"), models.Arc("S1-source-0816", "S1-sample collection-9-4"), models.Arc("S1-sample collection-9-4", "S1-sample-0816-T1"), models.Arc("S1-source-0817", "S1-sample collection-9-5"), models.Arc("S1-sample collection-9-5", "S1-Empty Sample Name-13-5"), ) assert expected == study.arcs
def create_and_write(out_path): """Create an investigation with a study and assay and write to ``output_path``.""" # Prepare one or more study sections # Prepare basic study information study_info = models.BasicInfo( path="s_minimal.txt", identifier="s_minimal", title="Germline Study", description=None, submission_date=None, public_release_date=None, comments=( models.Comment(name="Study Grant Number", value=None), models.Comment(name="Study Funding Agency", value=None), ), headers=[], ) # Create one or more assays assay_01 = models.AssayInfo( measurement_type=models.OntologyTermRef( name="exome sequencing assay", accession="http://purl.obolibrary.org/obo/OBI_0002118", ontology_name="OBI", ), technology_type=models.OntologyTermRef( name="nucleotide sequencing", accession="http://purl.obolibrary.org/obo/OBI_0000626", ontology_name="OBI", ), platform=None, path="a_minimal.txt", comments=(), headers=[], ) # Prepare one or more protocols protocol_01 = models.ProtocolInfo( name="sample collection", type=models.OntologyTermRef(name="sample collection"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) protocol_02 = models.ProtocolInfo( name="nucleic acid sequencing", type=models.OntologyTermRef(name="nucleic acid sequencing"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) # Create study object study_01 = models.StudyInfo( info=study_info, designs=(), publications=(), factors={}, assays=(assay_01, ), protocols={ protocol_01.name: protocol_01, protocol_02.name: protocol_02 }, contacts=(), ) # Prepare other investigation section # Prepare one or more ontology term source references onto_ref_01 = models.OntologyRef( name="OBI", file="http://data.bioontology.org/ontologies/OBI", version="31", description="Ontology for Biomedical Investigations", comments=(), headers=[], ) # Prepare basic investigation information invest_info = models.BasicInfo( path="i_minimal.txt", identifier="i_minimal", title="Minimal Investigation", description=None, submission_date=None, public_release_date=None, comments=(), headers=[], ) # Create investigation object investigation = models.InvestigationInfo( ontology_source_refs={onto_ref_01.name: onto_ref_01}, info=invest_info, publications=(), contacts=(), studies=(study_01, ), ) # Validate investigation InvestigationValidator(investigation).validate() # Write the investigation as ISA-Tab txt file with open(join(out_path, investigation.info.path), "wt", newline="") as outputf: InvestigationWriter.from_stream(investigation=investigation, output_file=outputf).write() # Create a corresponding Study graph # Create at least on source, one sample and one collection process # Unique names are required for unambiguous node identification source_01 = models.Material( type="Source Name", unique_name="S1-source-0815", name="0815", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SOURCE_NAME], ) sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) process_01 = models.Process( protocol_ref="sample collection", unique_name="S1-sample collection-2-1", name=None, name_type=None, date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arc_01 = models.Arc(tail="S1-source-0815", head="S1-sample collection-2-1") arc_02 = models.Arc(tail="S1-sample collection-2-1", head="S1-sample-0815-N1") # Create the study graph object study_graph_01 = models.Study( file=investigation.studies[0].info.path, header=None, materials={ source_01.unique_name: source_01, sample_01.unique_name: sample_01 }, processes={process_01.unique_name: process_01}, arcs=(arc_01, arc_02), ) # Validate study graph StudyValidator(investigation=investigation, study_info=investigation.studies[0], study=study_graph_01).validate() # Write the study as ISA-Tab txt file with open(join(out_path, investigation.studies[0].info.path), "wt", newline="") as outputf: StudyWriter.from_stream(study_or_assay=study_graph_01, output_file=outputf).write() # Create a corresponding Assay graph # Create at least on samples, one output material and one collection process # Unique names are required for unambiguous node identification # Explicit header definition per node is currently required to enable export to ISA-Tab sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) data_file_01 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", name="0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) data_file_02 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", name="0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) process_01 = models.Process( protocol_ref="nucleic acid sequencing", unique_name="S1-A1-0815-N1-DNA1-WES1-3", name="0815-N1-DNA1-WES1", name_type="Assay Name", date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arcs = ( models.Arc(tail="S1-sample-0815-N1", head="S1-A1-0815-N1-DNA1-WES1-3"), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1-3", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", ), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", ), ) # Create the assay graph object assay_graph_01 = models.Assay( file=investigation.studies[0].assays[0].path, header=None, materials={ sample_01.unique_name: sample_01, data_file_01.unique_name: data_file_01, data_file_02.unique_name: data_file_02, }, processes={process_01.unique_name: process_01}, arcs=arcs, ) # Validate assay graph AssayValidator( investigation=investigation, study_info=investigation.studies[0], assay_info=investigation.studies[0].assays[0], assay=assay_graph_01, ).validate() # Write the assay as ISA-Tab txt file with open(join(out_path, investigation.studies[0].assays[0].path), "wt", newline="") as outputf: AssayWriter.from_stream(study_or_assay=assay_graph_01, output_file=outputf).write()