def run_warnings_caught(args): # Read investigation investigation = InvestigationReader.from_stream(args.input_investigation_file).read() args.input_investigation_file.close() # Validate investigation InvestigationValidator(investigation).validate() # Read studies and assays path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name)) studies = {} assays = {} for s, study_info in enumerate(investigation.studies): if study_info.info.path: with open(os.path.join(path_in, study_info.info.path), "rt") as inputf: studies[s] = StudyReader.from_stream("S{}".format(s + 1), inputf).read() if study_info.assays: assays[s] = {} for a, assay_info in enumerate(study_info.assays): if assay_info.path: with open(os.path.join(path_in, assay_info.path), "rt") as inputf: assays[s][a] = AssayReader.from_stream( "S{}".format(s + 1), "A{}".format(a + 1), inputf ).read() # Validate studies and assays for s, study_info in enumerate(investigation.studies): if study_info.info.path: StudyValidator(investigation, study_info, studies[s]).validate() for a, assay_info in enumerate(study_info.assays): if assay_info.path: AssayValidator(investigation, study_info, assay_info, assays[s][a]).validate()
def test_assay_reader_minimal_assay_iostring2(minimal_investigation_file, minimal_assay_file): # Load investigation (tested elsewhere) stringio = io.StringIO(minimal_investigation_file.read()) investigation = InvestigationReader.from_stream(stringio).read() with pytest.warns(IsaWarning) as record: InvestigationValidator(investigation).validate() # Check warnings assert 2 == len(record) # Create new assay reader and read from StringIO with no filename indicated stringio = io.StringIO(minimal_assay_file.read()) reader = AssayReader.from_stream("S1", "A1", stringio) assert 5 == len(reader.header) # Read and validate assay assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check results assert str(assay.file) == os.path.normpath("<no file>") assert 5 == len(assay.header) assert 3 == len(assay.materials) assert 1 == len(assay.processes) assert 3 == len(assay.arcs)
def _parse_write_assert_assay(investigation_file, tmp_path, quote=None, normalize=False, skip=None): # Load investigation investigation = InvestigationReader.from_stream(investigation_file).read() InvestigationValidator(investigation).validate() directory = os.path.normpath(os.path.dirname(investigation_file.name)) # Iterate assays for s, study_info in enumerate(investigation.studies): for a, assay_info in enumerate(study_info.assays): if skip and str(assay_info.path) in skip: continue # Load assay path_in = os.path.join(directory, assay_info.path) with open(path_in, "rt") as inputf: assay = AssayReader.from_stream("S{}".format(s + 1), "A{}".format(a + 1), inputf).read() AssayValidator(investigation, study_info, assay_info, assay).validate() # Write assay to temporary file path_out = tmp_path / assay_info.path with open(path_out, "wt", newline="") as file: AssayWriter.from_stream(assay, file, quote=quote).write() if normalize: # Read and write assay again path_in = path_out with open(path_out, "rt") as inputf: assay = AssayReader.from_stream("S{}".format(s + 1), "A{}".format(a + 1), inputf).read() AssayValidator(investigation, study_info, assay_info, assay).validate() path_out = tmp_path / (assay_info.path.name + "_b") with open(path_out, "wt", newline="") as file: AssayWriter.from_stream(assay, file, quote=quote).write() # Sort and compare input and output path_in_s = tmp_path / (assay_info.path.name + ".in.sorted") path_out_s = tmp_path / (assay_info.path.name + ".out.sorted") assert filecmp.cmp(sort_file(path_in, path_in_s), sort_file(path_out, path_out_s), shallow=False)
def test_assay_reader_minimal_assay(minimal_investigation_file, minimal_assay_file): """Use ``AssayReader`` to read in minimal assay file. Using the ``AssayReader`` instead of the ``AssayRowReader`` gives us ``Assay`` objects instead of just the row-wise nodes. """ # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream( minimal_investigation_file).read() with pytest.warns(IsaWarning) as record: InvestigationValidator(investigation).validate() # Check warnings assert 1 == len(record) # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", minimal_assay_file) assert 5 == len(reader.header) # Read and validate assay assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check results assert os.path.normpath(str(assay.file)).endswith( os.path.normpath("data/i_minimal/a_minimal.txt")) assert 5 == len(assay.header) assert 3 == len(assay.materials) assert 1 == len(assay.processes) assert 3 == len(assay.arcs) expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == assay.materials["S1-sample-0815-N1"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5"] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-N1-DNA1-WES1-3", "0815-N1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-3"] expected = ( models.Arc("S1-sample-0815-N1", "S1-A1-0815-N1-DNA1-WES1-3"), models.Arc("S1-A1-0815-N1-DNA1-WES1-3", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"), models.Arc( "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", ), ) assert expected == assay.arcs
def test_assay_reader_gelelect(gelelect_investigation_file, gelelect_assay_file): """Use ``AssayReader`` to read in small assay file.""" with pytest.warns(IsaWarning) as record: # Load investigation investigation = InvestigationReader.from_stream( gelelect_investigation_file).read() InvestigationValidator(investigation).validate() # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", gelelect_assay_file) assert 22 == len(reader.header) # Read assay assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check warnings assert 4 == len(record) # Check results assert os.path.normpath(str(assay.file)).endswith( os.path.normpath( "data/test_gelelect/a_study01_protein_expression_profiling_gel_electrophoresis.txt" )) assert 22 == len(assay.header) assert 9 == len(assay.materials) assert 10 == len(assay.processes) assert 18 == len(assay.arcs) expected = models.Material( "Image File", "S1-A1-Image01.jpeg-COL19", "Image01.jpeg", None, (), (), (), None, [table_headers.IMAGE_FILE], ) assert expected == assay.materials["S1-A1-Image01.jpeg-COL19"] expected = models.Process( "data collection", "S1-A1-Scan02-18", "Scan02", "Scan Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.SCAN_NAME], ) assert expected == assay.processes["S1-A1-Scan02-18"] header_electrophoresis = [ table_headers.PROTOCOL_REF, table_headers.GEL_ELECTROPHORESIS_ASSAY_NAME, table_headers.FIRST_DIMENSION, table_headers.TERM_SOURCE_REF, table_headers.TERM_ACCESSION_NUMBER, table_headers.SECOND_DIMENSION, table_headers.TERM_SOURCE_REF, table_headers.TERM_ACCESSION_NUMBER, ] expected = models.Process( "electrophoresis", "S1-A1-Assay01-10", "Assay01", "Gel Electrophoresis Assay Name", None, None, (), (), None, models.OntologyTermRef("", "", ""), models.OntologyTermRef("", "", ""), header_electrophoresis, ) assert expected == assay.processes["S1-A1-Assay01-10"] expected = models.Process( "electrophoresis", "S1-A1-electrophoresis-9-2", "", "Gel Electrophoresis Assay Name", None, None, (), (), None, models.OntologyTermRef("AssayX", None, None), models.OntologyTermRef("AssayY", None, None), header_electrophoresis, ) assert expected == assay.processes["S1-A1-electrophoresis-9-2"]
def test_assay_reader_small2_assay(small2_investigation_file, small2_assay_file): """Use ``AssayReader`` to read in small assay file.""" # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream( small2_investigation_file).read() InvestigationValidator(investigation).validate() # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", small2_assay_file) assert 14 == len(reader.header) # Read assay assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check results assert os.path.normpath(str(assay.file)).endswith( os.path.normpath("data/i_small2/a_small2.txt")) assert 14 == len(assay.header) assert 25 == len(assay.materials) assert 41 == len(assay.processes) assert 74 == len(assay.arcs) # Comments expected = models.Comment(name="Replicate", value="B") assert assay.materials["S1-A1-0815-T1-Pro1-B-115-COL5"].comments[ 0] == expected # Expected arcs expected = ( models.Arc("S1-sample-0815-N1", "S1-A1-extraction-2-1"), models.Arc("S1-sample-0815-T1", "S1-A1-extraction-2-2"), models.Arc("S1-A1-extraction-2-1", "S1-A1-0815-N1-Pro1-COL3"), models.Arc("S1-A1-extraction-2-2", "S1-A1-0815-T1-Pro1-COL3"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-1"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-2"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-3"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-4"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-5"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-6"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-7"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-8"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-9"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-10"), models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-11"), models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-12"), models.Arc("S1-A1-labeling-4-1", "S1-A1-0815-N1-Pro1-A-114-COL5"), models.Arc("S1-A1-labeling-4-2", "S1-A1-0815-T1-Pro1-A-115-COL5"), models.Arc("S1-A1-labeling-4-3", "S1-A1-0815-N1-Pro1-B-114-COL5"), models.Arc("S1-A1-labeling-4-4", "S1-A1-0815-T1-Pro1-B-115-COL5"), models.Arc("S1-A1-labeling-4-5", "S1-A1-0815-N1-Pro1-C-114-COL5"), models.Arc("S1-A1-labeling-4-6", "S1-A1-0815-T1-Pro1-C-115-COL5"), models.Arc("S1-A1-labeling-4-7", "S1-A1-0815-N1-Pro1-D-114-COL5"), models.Arc("S1-A1-labeling-4-8", "S1-A1-0815-T1-Pro1-D-115-COL5"), models.Arc("S1-A1-labeling-4-9", "S1-A1-0815-N1-Pro1-E-114-COL5"), models.Arc("S1-A1-labeling-4-10", "S1-A1-0815-T1-Pro1-E-115-COL5"), models.Arc("S1-A1-labeling-4-11", "S1-A1-0815-N1-Pro1-F-114-COL5"), models.Arc("S1-A1-labeling-4-12", "S1-A1-0815-T1-Pro1-F-115-COL5"), models.Arc("S1-A1-0815-N1-Pro1-A-114-COL5", "S1-A1-chromatography-8-1"), models.Arc("S1-A1-0815-T1-Pro1-A-115-COL5", "S1-A1-chromatography-8-2"), models.Arc("S1-A1-0815-N1-Pro1-B-114-COL5", "S1-A1-chromatography-8-3"), models.Arc("S1-A1-0815-T1-Pro1-B-115-COL5", "S1-A1-chromatography-8-4"), models.Arc("S1-A1-0815-N1-Pro1-C-114-COL5", "S1-A1-chromatography-8-5"), models.Arc("S1-A1-0815-T1-Pro1-C-115-COL5", "S1-A1-chromatography-8-6"), models.Arc("S1-A1-0815-N1-Pro1-D-114-COL5", "S1-A1-chromatography-8-7"), models.Arc("S1-A1-0815-T1-Pro1-D-115-COL5", "S1-A1-chromatography-8-8"), models.Arc("S1-A1-0815-N1-Pro1-E-114-COL5", "S1-A1-chromatography-8-9"), models.Arc("S1-A1-0815-T1-Pro1-E-115-COL5", "S1-A1-chromatography-8-10"), models.Arc("S1-A1-0815-N1-Pro1-F-114-COL5", "S1-A1-chromatography-8-11"), models.Arc("S1-A1-0815-T1-Pro1-F-115-COL5", "S1-A1-chromatography-8-12"), models.Arc("S1-A1-chromatography-8-1", "S1-A1-poolA-10"), models.Arc("S1-A1-chromatography-8-2", "S1-A1-poolA-10"), models.Arc("S1-A1-chromatography-8-3", "S1-A1-mass spectrometry-9-3"), models.Arc("S1-A1-chromatography-8-4", "S1-A1-mass spectrometry-9-4"), models.Arc("S1-A1-chromatography-8-5", "S1-A1-poolC-10"), models.Arc("S1-A1-chromatography-8-6", "S1-A1-poolC-10"), models.Arc("S1-A1-chromatography-8-7", "S1-A1-mass spectrometry-9-7"), models.Arc("S1-A1-chromatography-8-8", "S1-A1-mass spectrometry-9-8"), models.Arc("S1-A1-chromatography-8-9", "S1-A1-poolE-10"), models.Arc("S1-A1-chromatography-8-10", "S1-A1-poolE-10"), models.Arc("S1-A1-chromatography-8-11", "S1-A1-poolF-10"), models.Arc("S1-A1-chromatography-8-12", "S1-A1-poolF-10"), models.Arc("S1-A1-poolA-10", "S1-A1-poolA.raw-COL11"), models.Arc("S1-A1-mass spectrometry-9-3", "S1-A1-poolB.raw-COL11"), models.Arc("S1-A1-mass spectrometry-9-4", "S1-A1-poolB.raw-COL11"), models.Arc("S1-A1-poolC-10", "S1-A1-Empty Raw Spectral Data File-11-5"), models.Arc("S1-A1-mass spectrometry-9-7", "S1-A1-Empty Raw Spectral Data File-11-7"), models.Arc("S1-A1-mass spectrometry-9-8", "S1-A1-Empty Raw Spectral Data File-11-8"), models.Arc("S1-A1-poolE-10", "S1-A1-poolE.raw-COL11"), models.Arc("S1-A1-poolF-10", "S1-A1-Empty Raw Spectral Data File-11-11"), models.Arc("S1-A1-poolA.raw-COL11", "S1-A1-data transformation-12-1"), models.Arc("S1-A1-poolB.raw-COL11", "S1-A1-data transformation-12-3"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-5", "S1-A1-data transformation-12-5"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-7", "S1-A1-data transformation-12-7"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-8", "S1-A1-data transformation-12-8"), models.Arc("S1-A1-poolE.raw-COL11", "S1-A1-data transformation-12-9"), models.Arc("S1-A1-Empty Raw Spectral Data File-11-11", "S1-A1-data analysis-13"), models.Arc("S1-A1-data transformation-12-1", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-3", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-5", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-7", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-8", "S1-A1-results.csv-COL14"), models.Arc("S1-A1-data transformation-12-9", "S1-A1-Empty Derived Data File-14-9"), models.Arc("S1-A1-data analysis-13", "S1-A1-results.csv-COL14"), ) assert sorted(expected) == sorted(assay.arcs)
def test_assay_reader_small_assay(small_investigation_file, small_assay_file): """Use ``AssayReader`` to read in small assay file.""" # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream( small_investigation_file).read() with pytest.warns(IsaWarning) as record: InvestigationValidator(investigation).validate() # Check warnings assert 1 == len(record) # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", small_assay_file) assert 9 == len(reader.header) # Read assay with pytest.warns(IsaWarning) as record: assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check warnings assert 1 == len(record) # Check results assert os.path.normpath(str(assay.file)).endswith( os.path.normpath("data/i_small/a_small.txt")) assert 9 == len(assay.header) assert 9 == len(assay.materials) assert 5 == len(assay.processes) assert 13 == len(assay.arcs) expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == assay.materials["S1-sample-0815-N1"] expected = models.Material( "Sample Name", "S1-sample-0815-T1", "0815-T1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == assay.materials["S1-sample-0815-T1"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"] expected = models.Material( "Raw Data File", "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "0815-T1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"] expected = models.Material( "Raw Data File", "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "0815-T1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"] expected = models.Material( "Derived Data File", "S1-A1-0815-somatic.vcf.gz-COL9", "0815-somatic.vcf.gz", None, (), (), (), None, [table_headers.DERIVED_DATA_FILE], ) assert expected == assay.materials["S1-A1-0815-somatic.vcf.gz-COL9"] expected = models.Process( "library preparation", "S1-A1-library preparation-2-1", None, None, None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF], ) assert expected == assay.processes["S1-A1-library preparation-2-1"] expected = models.Process( "library preparation", "S1-A1-library preparation-2-2", None, None, None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF], ) assert expected == assay.processes["S1-A1-library preparation-2-2"] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-N1-DNA1-WES1-5", "0815-N1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-5"] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-T1-DNA1-WES1-5", "0815-T1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == assay.processes["S1-A1-0815-T1-DNA1-WES1-5"] expected = ( models.Arc("S1-sample-0815-N1", "S1-A1-library preparation-2-1"), models.Arc("S1-A1-library preparation-2-1", "S1-A1-0815-N1-DNA1-COL3"), models.Arc("S1-A1-0815-N1-DNA1-COL3", "S1-A1-0815-N1-DNA1-WES1-5"), models.Arc("S1-A1-0815-N1-DNA1-WES1-5", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"), models.Arc( "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", ), models.Arc("S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "S1-A1-somatic variant calling-1-8"), models.Arc("S1-A1-somatic variant calling-1-8", "S1-A1-0815-somatic.vcf.gz-COL9"), models.Arc("S1-sample-0815-T1", "S1-A1-library preparation-2-2"), models.Arc("S1-A1-library preparation-2-2", "S1-A1-0815-T1-DNA1-COL3"), models.Arc("S1-A1-0815-T1-DNA1-COL3", "S1-A1-0815-T1-DNA1-WES1-5"), models.Arc("S1-A1-0815-T1-DNA1-WES1-5", "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"), models.Arc( "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", ), models.Arc("S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "S1-A1-somatic variant calling-1-8"), ) assert expected == assay.arcs
def create_and_write(out_path): """Create an investigation with a study and assay and write to ``output_path``.""" # Prepare one or more study sections # Prepare basic study information study_info = models.BasicInfo( path="s_minimal.txt", identifier="s_minimal", title="Germline Study", description=None, submission_date=None, public_release_date=None, comments=( models.Comment(name="Study Grant Number", value=None), models.Comment(name="Study Funding Agency", value=None), ), headers=[], ) # Create one or more assays assay_01 = models.AssayInfo( measurement_type=models.OntologyTermRef( name="exome sequencing assay", accession="http://purl.obolibrary.org/obo/OBI_0002118", ontology_name="OBI", ), technology_type=models.OntologyTermRef( name="nucleotide sequencing", accession="http://purl.obolibrary.org/obo/OBI_0000626", ontology_name="OBI", ), platform=None, path="a_minimal.txt", comments=(), headers=[], ) # Prepare one or more protocols protocol_01 = models.ProtocolInfo( name="sample collection", type=models.OntologyTermRef(name="sample collection"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) protocol_02 = models.ProtocolInfo( name="nucleic acid sequencing", type=models.OntologyTermRef(name="nucleic acid sequencing"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) # Create study object study_01 = models.StudyInfo( info=study_info, designs=(), publications=(), factors={}, assays=(assay_01, ), protocols={ protocol_01.name: protocol_01, protocol_02.name: protocol_02 }, contacts=(), ) # Prepare other investigation section # Prepare one or more ontology term source references onto_ref_01 = models.OntologyRef( name="OBI", file="http://data.bioontology.org/ontologies/OBI", version="31", description="Ontology for Biomedical Investigations", comments=(), headers=[], ) # Prepare basic investigation information invest_info = models.BasicInfo( path="i_minimal.txt", identifier="i_minimal", title="Minimal Investigation", description=None, submission_date=None, public_release_date=None, comments=(), headers=[], ) # Create investigation object investigation = models.InvestigationInfo( ontology_source_refs={onto_ref_01.name: onto_ref_01}, info=invest_info, publications=(), contacts=(), studies=(study_01, ), ) # Validate investigation InvestigationValidator(investigation).validate() # Write the investigation as ISA-Tab txt file with open(join(out_path, investigation.info.path), "wt", newline="") as outputf: InvestigationWriter.from_stream(investigation=investigation, output_file=outputf).write() # Create a corresponding Study graph # Create at least on source, one sample and one collection process # Unique names are required for unambiguous node identification source_01 = models.Material( type="Source Name", unique_name="S1-source-0815", name="0815", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SOURCE_NAME], ) sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) process_01 = models.Process( protocol_ref="sample collection", unique_name="S1-sample collection-2-1", name=None, name_type=None, date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arc_01 = models.Arc(tail="S1-source-0815", head="S1-sample collection-2-1") arc_02 = models.Arc(tail="S1-sample collection-2-1", head="S1-sample-0815-N1") # Create the study graph object study_graph_01 = models.Study( file=investigation.studies[0].info.path, header=None, materials={ source_01.unique_name: source_01, sample_01.unique_name: sample_01 }, processes={process_01.unique_name: process_01}, arcs=(arc_01, arc_02), ) # Validate study graph StudyValidator(investigation=investigation, study_info=investigation.studies[0], study=study_graph_01).validate() # Write the study as ISA-Tab txt file with open(join(out_path, investigation.studies[0].info.path), "wt", newline="") as outputf: StudyWriter.from_stream(study_or_assay=study_graph_01, output_file=outputf).write() # Create a corresponding Assay graph # Create at least on samples, one output material and one collection process # Unique names are required for unambiguous node identification # Explicit header definition per node is currently required to enable export to ISA-Tab sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) data_file_01 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", name="0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) data_file_02 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", name="0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) process_01 = models.Process( protocol_ref="nucleic acid sequencing", unique_name="S1-A1-0815-N1-DNA1-WES1-3", name="0815-N1-DNA1-WES1", name_type="Assay Name", date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arcs = ( models.Arc(tail="S1-sample-0815-N1", head="S1-A1-0815-N1-DNA1-WES1-3"), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1-3", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", ), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", ), ) # Create the assay graph object assay_graph_01 = models.Assay( file=investigation.studies[0].assays[0].path, header=None, materials={ sample_01.unique_name: sample_01, data_file_01.unique_name: data_file_01, data_file_02.unique_name: data_file_02, }, processes={process_01.unique_name: process_01}, arcs=arcs, ) # Validate assay graph AssayValidator( investigation=investigation, study_info=investigation.studies[0], assay_info=investigation.studies[0].assays[0], assay=assay_graph_01, ).validate() # Write the assay as ISA-Tab txt file with open(join(out_path, investigation.studies[0].assays[0].path), "wt", newline="") as outputf: AssayWriter.from_stream(study_or_assay=assay_graph_01, output_file=outputf).write()