Example #1
0
def test_assay_reader_gelelect(gelelect_investigation_file,
                               gelelect_assay_file):
    """Use ``AssayReader`` to read in small assay file."""
    with pytest.warns(IsaWarning) as record:
        # Load investigation
        investigation = InvestigationReader.from_stream(
            gelelect_investigation_file).read()
        InvestigationValidator(investigation).validate()

        # Create new row reader and check read headers
        reader = AssayReader.from_stream("S1", "A1", gelelect_assay_file)
        assert 22 == len(reader.header)

        # Read assay
        assay = reader.read()
        AssayValidator(investigation, investigation.studies[0],
                       investigation.studies[0].assays[0], assay).validate()

    # Check warnings
    assert 4 == len(record)

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath(
            "data/test_gelelect/a_study01_protein_expression_profiling_gel_electrophoresis.txt"
        ))
    assert 22 == len(assay.header)
    assert 9 == len(assay.materials)
    assert 10 == len(assay.processes)
    assert 18 == len(assay.arcs)

    expected = models.Material(
        "Image File",
        "S1-A1-Image01.jpeg-COL19",
        "Image01.jpeg",
        None,
        (),
        (),
        (),
        None,
        [table_headers.IMAGE_FILE],
    )
    assert expected == assay.materials["S1-A1-Image01.jpeg-COL19"]

    expected = models.Process(
        "data collection",
        "S1-A1-Scan02-18",
        "Scan02",
        "Scan Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.SCAN_NAME],
    )
    assert expected == assay.processes["S1-A1-Scan02-18"]

    header_electrophoresis = [
        table_headers.PROTOCOL_REF,
        table_headers.GEL_ELECTROPHORESIS_ASSAY_NAME,
        table_headers.FIRST_DIMENSION,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
        table_headers.SECOND_DIMENSION,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
    ]

    expected = models.Process(
        "electrophoresis",
        "S1-A1-Assay01-10",
        "Assay01",
        "Gel Electrophoresis Assay Name",
        None,
        None,
        (),
        (),
        None,
        models.OntologyTermRef("", "", ""),
        models.OntologyTermRef("", "", ""),
        header_electrophoresis,
    )
    assert expected == assay.processes["S1-A1-Assay01-10"]

    expected = models.Process(
        "electrophoresis",
        "S1-A1-electrophoresis-9-2",
        "",
        "Gel Electrophoresis Assay Name",
        None,
        None,
        (),
        (),
        None,
        models.OntologyTermRef("AssayX", None, None),
        models.OntologyTermRef("AssayY", None, None),
        header_electrophoresis,
    )
    assert expected == assay.processes["S1-A1-electrophoresis-9-2"]
Example #2
0
def test_study_reader_small_study(small_investigation_file, small_study_file):
    """Use ``StudyReader`` to read in small study file."""
    # Load investigation (tested elsewhere)
    with pytest.warns(IsaWarning) as record:
        investigation = InvestigationReader.from_stream(
            small_investigation_file).read()
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 2 == len(record)

    # Create new row reader and check read headers
    reader = StudyReader.from_stream("S1", small_study_file)
    assert 13 == len(reader.header)

    # Read study
    study = reader.read()
    StudyValidator(investigation, investigation.studies[0], study).validate()

    # Check results
    assert os.path.normpath(str(study.file)).endswith(
        os.path.normpath("data/i_small/s_small.txt"))
    assert 13 == len(study.header)
    assert 9 == len(study.materials)
    assert 5 == len(study.processes)
    assert 10 == len(study.arcs)

    headers_source = [
        table_headers.SOURCE_NAME,
        table_headers.CHARACTERISTICS + "[organism]",
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
        table_headers.CHARACTERISTICS + "[age]",
        table_headers.UNIT,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
    ]
    headers_collection = [
        table_headers.PROTOCOL_REF,
        table_headers.PARAMETER_VALUE + "[instrument]",
        table_headers.PERFORMER,
        table_headers.DATE,
    ]
    headers_sample = [
        table_headers.SAMPLE_NAME,
        table_headers.CHARACTERISTICS + "[status]",
        table_headers.FACTOR_VALUE + "[treatment]",
    ]

    unit = models.OntologyTermRef(
        name="day",
        accession="http://purl.obolibrary.org/obo/UO_0000033",
        ontology_name="UO")

    characteristics1 = (
        models.Characteristics(
            name="organism",
            value=[
                models.OntologyTermRef(
                    name="Mus musculus",
                    accession="http://purl.bioontology.org/ontology/"
                    "NCBITAXON/10090",
                    ontology_name="NCBITAXON",
                )
            ],
            unit=None,
        ),
        models.Characteristics(name="age", value=["90"], unit=unit),
    )
    characteristics2 = (
        models.Characteristics(
            name="organism",
            value=[models.OntologyTermRef("Mus musculus", "", "")],
            unit=None),
        models.Characteristics(name="age", value=[""], unit=unit),
    )
    characteristics3 = (
        models.Characteristics(
            name="organism",
            value=[models.OntologyTermRef(None, None, None)],
            unit=None),
        models.Characteristics(name="age", value=["150"], unit=unit),
    )

    expected = models.Material(
        "Source Name",
        "S1-source-0815",
        "0815",
        None,
        characteristics1,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == study.materials["S1-source-0815"]
    expected = models.Material(
        "Source Name",
        "S1-source-0816",
        "0816",
        None,
        characteristics2,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == study.materials["S1-source-0816"]
    expected = models.Material(
        "Source Name",
        "S1-source-0817",
        "0817",
        None,
        characteristics3,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == study.materials["S1-source-0817"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (models.Characteristics("status", ["0"], None), ),
        (),
        (models.FactorValue("treatment", "yes", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-sample-0815-N1"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-T1",
        "0815-T1",
        None,
        (models.Characteristics("status", ["2"], None), ),
        (),
        (models.FactorValue("treatment", "", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-sample-0815-T1"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0816-T1",
        "0816-T1",
        None,
        (models.Characteristics("status", ["1"], None), ),
        (),
        (models.FactorValue("treatment", "yes", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-sample-0816-T1"]
    expected = models.Material(
        "Sample Name",
        "S1-Empty Sample Name-13-5",
        "",
        None,
        (models.Characteristics("status", [""], None), ),
        (),
        (models.FactorValue("treatment", "", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-Empty Sample Name-13-5"]

    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-2",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument", ["scalpel"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == study.processes["S1-sample collection-9-2"]
    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-3",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument",
                               ["scalpel type A", "scalpel type B"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == study.processes["S1-sample collection-9-3"]
    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-4",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument", ["scalpel"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == study.processes["S1-sample collection-9-4"]

    expected = (
        models.Arc("S1-source-0814", "S1-sample collection-9-1"),
        models.Arc("S1-sample collection-9-1", "S1-sample-0814-N1"),
        models.Arc("S1-source-0815", "S1-sample collection-9-2"),
        models.Arc("S1-sample collection-9-2", "S1-sample-0815-N1"),
        models.Arc("S1-source-0815", "S1-sample collection-9-3"),
        models.Arc("S1-sample collection-9-3", "S1-sample-0815-T1"),
        models.Arc("S1-source-0816", "S1-sample collection-9-4"),
        models.Arc("S1-sample collection-9-4", "S1-sample-0816-T1"),
        models.Arc("S1-source-0817", "S1-sample collection-9-5"),
        models.Arc("S1-sample collection-9-5", "S1-Empty Sample Name-13-5"),
    )
    assert expected == study.arcs
Example #3
0
def test_study_row_reader_small_study(small_investigation_file,
                                      small_study_file):
    """Use ``StudyRowReader`` to read in small study file."""

    # Create new row reader and check read headers (+ string representation)
    row_reader = StudyRowReader.from_stream("S1", small_study_file)
    assert 13 == len(row_reader.header)
    rep0 = "ColumnHeader(column_type='Source Name', col_no=0, span=1)"
    rep1 = "LabeledColumnHeader(column_type='Characteristics', col_no=1, span=1, label='organism')"
    assert rep0 == repr(row_reader.header[0])
    assert rep1 == repr(row_reader.header[1])

    # Read all rows in study
    rows = list(row_reader.read())

    # Check results
    assert 5 == len(rows)
    first_row = rows[0]
    second_row = rows[1]
    third_row = rows[2]

    assert 3 == len(second_row)

    headers_source = [
        table_headers.SOURCE_NAME,
        table_headers.CHARACTERISTICS + "[organism]",
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
        table_headers.CHARACTERISTICS + "[age]",
        table_headers.UNIT,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
    ]
    headers_collection = [
        table_headers.PROTOCOL_REF,
        table_headers.PARAMETER_VALUE + "[instrument]",
        table_headers.PERFORMER,
        table_headers.DATE,
    ]
    headers_sample = [
        table_headers.SAMPLE_NAME,
        table_headers.CHARACTERISTICS + "[status]",
        table_headers.FACTOR_VALUE + "[treatment]",
    ]

    unit = models.OntologyTermRef(
        name="day",
        accession="http://purl.obolibrary.org/obo/UO_0000033",
        ontology_name="UO")

    characteristics1 = (
        models.Characteristics(
            name="organism",
            value=[
                models.OntologyTermRef(
                    name="Mus musculus",
                    accession=
                    "http://purl.bioontology.org/ontology/NCBITAXON/10090",
                    ontology_name="NCBITAXON",
                ),
                models.OntologyTermRef(
                    name="H**o sapiens",
                    accession=
                    "http://purl.bioontology.org/ontology/NCBITAXON/9606",
                    ontology_name="NCBITAXON",
                ),
            ],
            unit=None,
        ),
        models.Characteristics(name="age", value=["90"], unit=unit),
    )

    expected = models.Material(
        "Source Name",
        "S1-source-0814",
        "0814",
        None,
        characteristics1,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == first_row[0]

    characteristics2 = (
        models.Characteristics(
            name="organism",
            value=[
                models.OntologyTermRef(
                    name="Mus musculus",
                    accession=
                    "http://purl.bioontology.org/ontology/NCBITAXON/10090",
                    ontology_name="NCBITAXON",
                )
            ],
            unit=None,
        ),
        models.Characteristics(name="age", value=["90"], unit=unit),
    )

    expected = models.Material(
        "Source Name",
        "S1-source-0815",
        "0815",
        None,
        characteristics2,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == second_row[0]
    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-2",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument", ["scalpel"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == second_row[1]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (models.Characteristics("status", ["0"], None), ),
        (),
        (models.FactorValue("treatment", "yes", None), ),
        None,
        headers_sample,
    )
    assert expected == second_row[2]

    assert 3 == len(third_row)
    expected = models.Material(
        "Source Name",
        "S1-source-0815",
        "0815",
        None,
        characteristics2,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == third_row[0]
    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-3",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument",
                               ["scalpel type A", "scalpel type B"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == third_row[1]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-T1",
        "0815-T1",
        None,
        (models.Characteristics("status", ["2"], None), ),
        (),
        (models.FactorValue("treatment", "", None), ),
        None,
        headers_sample,
    )
    assert expected == third_row[2]
def test_parse_comment_investigation(comment_investigation_file):
    # Read Investigation from file-like object
    reader = InvestigationReader.from_stream(comment_investigation_file)
    investigation = reader.read()
    InvestigationValidator(investigation).validate()

    # Check results
    # Investigation
    assert investigation

    # Ontology sources
    assert 9 == len(investigation.ontology_source_refs)
    expected = models.OntologyRef(
        "OBI",
        "http://data.bioontology.org/ontologies/OBI",
        "21",
        "Ontology for Biomedical Investigations",
        (models.Comment("OntologyComment", "TestValue01"), ),
        [
            *investigation_headers.ONTOLOGY_SOURCE_REF_KEYS[0:2],
            "Comment[OntologyComment]",
            *investigation_headers.ONTOLOGY_SOURCE_REF_KEYS[2:],
        ],
    )
    assert expected == investigation.ontology_source_refs["OBI"]

    # Basic info
    assert "BII-I-1" == investigation.info.identifier
    assert "Owning Organisation URI" == investigation.info.comments[2].name
    assert "TestValue01" == investigation.info.comments[2].value

    expected_headers = [
        *investigation_headers.INVESTIGATION_INFO_KEYS,
        "Comment[Created With Configuration]",
        "Comment[Last Opened With Configuration]",
        "Comment[Owning Organisation URI]",
        "Comment[Consortium URI]",
        "Comment[Principal Investigator URI]",
        "Comment[Investigation Keywords]",
    ]

    # Publications
    assert 3 == len(investigation.publications)
    expected = models.PublicationInfo(
        "17439666",
        "doi:10.1186/jbiol54",
        "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, "
        "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, "
        "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart "
        "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, "
        "Oliver SG.",
        "Growth control of the eukaryote cell: a systems biology study in "
        "yeast.",
        models.OntologyTermRef("indexed in Pubmed", "", ""),
        (models.Comment("InvestPubsComment", "TestValue01"), ),
        [
            *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[0:2],
            "Comment[InvestPubsComment]",
            *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[2:],
        ],
    )
    assert expected == investigation.publications[0]

    # Contacts
    assert 3 == len(investigation.contacts)
    expected = models.ContactInfo(
        "Leo",
        "Zeef",
        "A",
        "",
        "",
        "+49 123456789",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (
            models.Comment("Investigation Person ORCID", "1357908642"),
            models.Comment("Investigation Person REF", "personC"),
        ),
        [
            *investigation_headers.INVESTIGATION_CONTACTS_KEYS,
            "Comment[Investigation Person ORCID]",
            "Comment[Investigation Person REF]",
        ],
    )
    assert expected == investigation.contacts[2]

    # Studies
    assert len(investigation.studies) == 2

    # Study 1
    study = investigation.studies[0]
    assert "BII-S-1" == study.info.identifier
    assert Path("s_BII-S-1.txt") == study.info.path
    assert "Manuscript Licence" == study.info.comments[2].name
    assert "CC BY 3.0" == study.info.comments[2].value

    expected_headers = [
        *investigation_headers.STUDY_INFO_KEYS[0:3],
        "Comment[Study Grant Number]",
        "Comment[Study Funding Agency]",
        *investigation_headers.STUDY_INFO_KEYS[3:],
        "Comment[Manuscript Licence]",
        "Comment[Experimental Metadata Licence]",
        "Comment[Data Repository]",
        "Comment[Data Record Accession]",
        "Comment[Data Record URI]",
        "Comment[Supplementary Information File Name]",
        "Comment[Supplementary Information File Type]",
        "Comment[Supplementary File URI]",
        "Comment[Subject Keywords]",
    ]
    assert expected_headers == study.info.headers

    # Study 1 - Design descriptors
    assert 2 == len(study.designs)
    expected = models.DesignDescriptorsInfo(
        models.OntologyTermRef("genotyping design",
                               "http://purl.obolibrary.org/obo/OBI_0001444",
                               "OBI"),
        (models.Comment("DesignDescsComment", "TestValue01"), ),
        [
            *investigation_headers.STUDY_DESIGN_DESCR_KEYS,
            "Comment[DesignDescsComment]"
        ],
    )
    assert expected == study.designs[1]

    # Study 1 - Publications
    assert 1 == len(study.publications)
    expected = models.PublicationInfo(
        "17439666",
        "doi:10.1186/jbiol54",
        "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, "
        "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, "
        "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart "
        "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, "
        "Oliver SG.",
        "Growth control of the eukaryote cell: a systems biology study in "
        "yeast.",
        models.OntologyTermRef("published", "", ""),
        (models.Comment("StudyPubsComment", "TestValue01"), ),
        [
            *investigation_headers.STUDY_PUBLICATIONS_KEYS[0:4],
            "Comment[StudyPubsComment]",
            *investigation_headers.STUDY_PUBLICATIONS_KEYS[4:],
        ],
    )
    assert expected == study.publications[0]

    # Study 1 - Factors
    assert 2 == len(study.factors)
    expected = models.FactorInfo(
        "rate",
        models.OntologyTermRef("rate",
                               "http://purl.obolibrary.org/obo/PATO_0000161",
                               "PATO"),
        (models.Comment("FactorsComment", "TestValue01"), ),
        [*investigation_headers.STUDY_FACTORS_KEYS, "Comment[FactorsComment]"],
    )
    assert expected == study.factors["rate"]

    # Study 1 - Assays
    assert 3 == len(study.assays)
    expected = models.AssayInfo(
        models.OntologyTermRef("transcription profiling",
                               "http://purl.obolibrary.org/obo/OBI_0000424",
                               "OBI"),
        models.OntologyTermRef("DNA microarray",
                               "http://purl.obolibrary.org/obo/OBI_0400148",
                               "OBI"),
        "Affymetrix",
        Path("a_transcriptome.txt"),
        (models.Comment("AssaysComment",
                        "A comment within ontology terms?"), ),
        [
            *investigation_headers.STUDY_ASSAYS_KEYS[0:5],
            "Comment[AssaysComment]",
            *investigation_headers.STUDY_ASSAYS_KEYS[5:],
        ],
    )
    assert expected == study.assays[2]

    # Study 1 - Protocols
    assert 7 == len(study.protocols)
    expected = models.ProtocolInfo(
        "metabolite extraction",
        models.OntologyTermRef("extraction",
                               "http://purl.obolibrary.org/obo/OBI_0302884",
                               "OBI"),
        "",
        "",
        "",
        {
            "standard volume": models.OntologyTermRef("standard volume", "",
                                                      ""),
            "sample volume": models.OntologyTermRef("sample volume", "", ""),
        },
        {
            "pipette":
            models.ProtocolComponentInfo(
                "pipette",
                models.OntologyTermRef("instrument",
                                       "http://www.ebi.ac.uk/efo/EFO_0000548",
                                       "EFO"),
            )
        },
        (models.Comment("ProtocolsComment", "TestValue01"), ),
        [
            *investigation_headers.STUDY_PROTOCOLS_KEYS[0:7],
            "Comment[ProtocolsComment]",
            *investigation_headers.STUDY_PROTOCOLS_KEYS[7:],
        ],
    )
    assert expected == study.protocols["metabolite extraction"]

    # Study 1 - Contacts
    assert 3 == len(study.contacts)
    expected = models.ContactInfo(
        "Juan",
        "Castrillo",
        "I",
        "",
        "123456789",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (models.Comment("Study Person REF", ""), ),
        [
            *investigation_headers.STUDY_CONTACTS_KEYS,
            "Comment[Study Person REF]"
        ],
    )
    assert expected == study.contacts[1]

    # Study 2
    study = investigation.studies[1]
    assert "BII-S-2" == study.info.identifier
    assert Path("s_BII-S-2.txt") == study.info.path
    assert "Study Grant Number" == study.info.comments[0].name
    assert "" == study.info.comments[0].value
    assert "Manuscript Licence" == study.info.comments[2].name
    assert "CC BY 3.0" == study.info.comments[2].value

    expected_headers = [
        *investigation_headers.STUDY_INFO_KEYS[0:3],
        "Comment[Study Grant Number]",
        "Comment[Study Funding Agency]",
        *investigation_headers.STUDY_INFO_KEYS[3:],
        "Comment[Manuscript Licence]",
        "Comment[Experimental Metadata Licence]",
        "Comment[Data Repository]",
        "Comment[Data Record Accession]",
        "Comment[Data Record URI]",
        "Comment[Supplementary Information File Name]",
        "Comment[Supplementary Information File Type]",
        "Comment[Supplementary File URI]",
        "Comment[Subject Keywords]",
    ]
    assert expected_headers == study.info.headers

    # Study 2 - Contacts
    assert 3 == len(study.contacts)
    expected = models.ContactInfo(
        "Juan",
        "Castrillo",
        "I",
        "",
        "123456789",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (models.Comment("Study Person REF", "personB"), ),
        [
            *investigation_headers.STUDY_CONTACTS_KEYS,
            "Comment[Study Person REF]"
        ],
    )
    assert expected == study.contacts[1]
def test_parse_full_investigation(full_investigation_file):
    # Read Investigation from file-like object
    reader = InvestigationReader.from_stream(full_investigation_file)
    investigation = reader.read()
    InvestigationValidator(investigation).validate()

    # Check results
    # Investigation
    assert investigation

    # Ontology sources
    assert 9 == len(investigation.ontology_source_refs)
    expected_headers = [
        *investigation_headers.ONTOLOGY_SOURCE_REF_KEYS, "Comment[Test]"
    ]
    expected = models.OntologyRef(
        "OBI",
        "http://data.bioontology.org/ontologies/OBI",
        "21",
        "Ontology for Biomedical Investigations",
        (models.Comment("Test", "4"), ),
        expected_headers,
    )
    assert expected == investigation.ontology_source_refs["OBI"]
    expected = models.OntologyRef(
        "NCBITAXON",
        "http://data.bioontology.org/ontologies/NCBITAXON",
        "2",
        ("National Center for Biotechnology Information (NCBI) Organismal "
         "Classification"),
        (models.Comment("Test", "1"), ),
        expected_headers,
    )
    assert expected == investigation.ontology_source_refs["NCBITAXON"]

    # Basic info
    assert ("Growth control of the eukaryote cell: a systems biology study "
            "in yeast") == investigation.info.title
    assert "BII-I-1" == investigation.info.identifier
    assert date(2007, 4, 30) == investigation.info.submission_date
    assert date(2009, 3, 10) == investigation.info.public_release_date

    expected_headers = [
        *investigation_headers.INVESTIGATION_INFO_KEYS,
        "Comment[Created With Configuration]",
        "Comment[Last Opened With Configuration]",
        "Comment[Owning Organisation URI]",
        "Comment[Consortium URI]",
        "Comment[Principal Investigator URI]",
        "Comment[Investigation Keywords]",
    ]
    assert expected_headers == investigation.info.headers

    # Publications
    assert 3 == len(investigation.publications)
    expected_headers = [
        *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[0:4],
        "Comment[Subtitle]",
        *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[4:],
    ]
    expected = models.PublicationInfo(
        "17439666",
        "doi:10.1186/jbiol54",
        "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, "
        "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, "
        "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart "
        "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, "
        "Oliver SG.",
        "Growth control of the eukaryote cell: a systems biology study in "
        "yeast.",
        models.OntologyTermRef("indexed in Pubmed", "", ""),
        (models.Comment("Subtitle", ""), ),
        expected_headers,
    )
    assert expected == investigation.publications[0]
    expected = models.PublicationInfo(
        "1231222",
        "",
        "Piatnochka IT.",
        "Effect of prednisolone on the cardiovascular system in complex "
        "treatment of newly detected pulmonary tuberculosis",
        models.OntologyTermRef("published",
                               "http://www.ebi.ac.uk/efo/EFO_0001796", "EFO"),
        (models.Comment("Subtitle", "Something"), ),
        expected_headers,
    )
    assert expected == investigation.publications[1]

    # Contacts
    assert 3 == len(investigation.contacts)
    expected_headers = [
        *investigation_headers.INVESTIGATION_CONTACTS_KEYS,
        "Comment[Investigation Person ORCID]",
        "Comment[Investigation Person REF]",
    ]
    expected = models.ContactInfo(
        "Oliver",
        "Stephen",
        "G",
        "*****@*****.**",
        "",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("corresponding author", "", ""),
        (
            models.Comment("Investigation Person ORCID", "12345"),
            models.Comment("Investigation Person REF", "personA"),
        ),
        expected_headers,
    )
    assert expected == investigation.contacts[0]
    expected = models.ContactInfo(
        "Juan",
        "Castrillo",
        "I",
        "",
        "123456789",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author", "", ""),
        (
            models.Comment("Investigation Person ORCID", "0987654321"),
            models.Comment("Investigation Person REF", "personB"),
        ),
        expected_headers,
    )
    assert expected == investigation.contacts[1]
    expected = models.ContactInfo(
        "Leo",
        "Zeef",
        "A",
        "",
        "",
        "+49 123456789",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (
            models.Comment("Investigation Person ORCID", "1357908642"),
            models.Comment("Investigation Person REF", "personC"),
        ),
        expected_headers,
    )
    assert expected == investigation.contacts[2]

    # Studies
    assert len(investigation.studies) == 2

    # Study 1
    study = investigation.studies[0]
    assert "BII-S-1" == study.info.identifier
    assert ("Study of the impact of changes in flux on the transcriptome, "
            "proteome, endometabolome and exometabolome of the yeast "
            "Saccharomyces cerevisiae under different nutrient limitations"
            ) == study.info.title
    assert Path("s_BII-S-1.txt") == study.info.path

    # Study 1 - Design descriptors
    assert 2 == len(study.designs)
    expected_headers = [
        *investigation_headers.STUDY_DESIGN_DESCR_KEYS[0:1],
        "Comment[Test1]",
        *investigation_headers.STUDY_DESIGN_DESCR_KEYS[1:],
        "Comment[Test2]",
    ]
    expected = (
        models.DesignDescriptorsInfo(
            models.OntologyTermRef(
                "intervention design",
                "http://purl.obolibrary.org/obo/OBI_0000115", "OBI"),
            (models.Comment("Test1", "1"), models.Comment("Test2", "3")),
            expected_headers,
        ),
        models.DesignDescriptorsInfo(
            models.OntologyTermRef(
                "genotyping design",
                "http://purl.obolibrary.org/obo/OBI_0001444", "OBI"),
            (models.Comment("Test1", "2"), models.Comment("Test2", "4")),
            expected_headers,
        ),
    )
    assert expected == study.designs

    # Study 1 - Publications
    assert 1 == len(study.publications)
    expected = models.PublicationInfo(
        "17439666",
        "doi:10.1186/jbiol54",
        "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, "
        "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, "
        "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart "
        "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, "
        "Oliver SG.",
        "Growth control of the eukaryote cell: a systems biology study in "
        "yeast.",
        models.OntologyTermRef("published", "", ""),
        (models.Comment("Subtitle", "Something"), ),
        [
            *investigation_headers.STUDY_PUBLICATIONS_KEYS[0:4],
            "Comment[Subtitle]",
            *investigation_headers.STUDY_PUBLICATIONS_KEYS[4:],
        ],
    )
    assert expected == study.publications[0]

    # Study 1 - Factors
    assert 2 == len(study.factors)
    expected_headers = [
        *investigation_headers.STUDY_FACTORS_KEYS, "Comment[FactorsTest]"
    ]
    expected = models.FactorInfo(
        "limiting nutrient",
        models.OntologyTermRef("chemical entity",
                               "http://purl.obolibrary.org/obo/CHEBI_24431",
                               "CHEBI"),
        (models.Comment("FactorsTest", "1"), ),
        expected_headers,
    )
    assert expected == study.factors["limiting nutrient"]
    expected = models.FactorInfo(
        "rate",
        models.OntologyTermRef("rate",
                               "http://purl.obolibrary.org/obo/PATO_0000161",
                               "PATO"),
        (models.Comment("FactorsTest", "2"), ),
        expected_headers,
    )
    assert expected == study.factors["rate"]

    # Study 1 - Assays
    assert 3 == len(study.assays)
    expected_headers = [
        *investigation_headers.STUDY_ASSAYS_KEYS, "Comment[Extra Info]"
    ]
    expected = models.AssayInfo(
        models.OntologyTermRef("protein expression profiling",
                               "http://purl.obolibrary.org/obo/OBI_0000615",
                               "OBI"),
        models.OntologyTermRef("mass spectrometry",
                               "http://purl.obolibrary.org/obo/OBI_0000470",
                               "OBI"),
        "iTRAQ",
        Path("a_proteome.txt"),
        (models.Comment("Extra Info", "a"), ),
        expected_headers,
    )
    assert expected == study.assays[0]
    expected = models.AssayInfo(
        models.OntologyTermRef("transcription profiling",
                               "http://purl.obolibrary.org/obo/OBI_0000424",
                               "OBI"),
        models.OntologyTermRef("DNA microarray",
                               "http://purl.obolibrary.org/obo/OBI_0400148",
                               "OBI"),
        "Affymetrix",
        Path("a_transcriptome.txt"),
        (models.Comment("Extra Info", "c"), ),
        expected_headers,
    )
    assert expected == study.assays[2]

    # Study 1 - Protocols
    assert 7 == len(study.protocols)
    expected_headers = [
        *investigation_headers.STUDY_PROTOCOLS_KEYS[0:7],
        "Comment[Protocol Rating]",
        *investigation_headers.STUDY_PROTOCOLS_KEYS[7:],
    ]
    expected = models.ProtocolInfo(
        "growth protocol",
        models.OntologyTermRef("growth", "", ""),
        "1. Biomass samples (45 ml) were taken via the sample port of the "
        "Applikon fermenters. The cells were pelleted by centrifugation for 5 "
        "min at 5000 rpm. The supernatant was removed and the RNA pellet "
        "resuspended in the residual medium to form a slurry. This was added "
        "in a dropwise manner directly into a 5 ml Teflon flask (B. Braun "
        "Biotech, Germany) containing liquid nitrogen and a 7 mm-diameter "
        "tungsten carbide ball. After allowing evaporation of the liquid "
        "nitrogen the flask was reassembled and the cells disrupted by "
        "agitation at 1500 rpm for 2 min in a Microdismembranator U (B. Braun "
        "Biotech, Germany) 2. The frozen powder was then dissolved in 1 ml of "
        "TriZol reagent (Sigma-Aldrich, UK), vortexed for 1 min, and then kept"
        " at room temperature for a further 5min. 3. Chloroform extraction was"
        " performed by addition of 0.2 ml chloroform, shaking vigorously or 15"
        " s, then 5min incubation at room temperature. 4. Following "
        "centrifugation at 12,000 rpm for 5 min, the RNA (contained in the "
        "aqueous phase) was precipitated with 0.5 vol of 2-propanol at room "
        "temperature for 15 min. 5. After further centrifugation (12,000 rpm "
        "for 10 min at 4 C) the RNA pellet was washed twice with 70 % (v/v) "
        "ethanol, briefly air-dried, and redissolved in 0.5 ml diethyl "
        "pyrocarbonate (DEPC)-treated water. 6. The single-stranded RNA was "
        "precipitated once more by addition of 0.5 ml of LiCl buffer (4 M "
        "LiCl, 20 mM Tris-HCl, pH 7.5, 10 mM EDTA), thus removing tRNA and "
        "DNA from the sample. 7. After precipitation (20 C for 1h) and "
        "centrifugation (12,000 rpm, 30 min, 4 C), the RNA was washed twice in"
        " 70 % (v/v) ethanol prior to being dissolved in a minimal volume of "
        "DEPC-treated water. 8. Total RNA quality was checked using the RNA "
        "6000 Nano Assay, and analysed on an Agilent 2100 Bioanalyser (Agilent"
        " Technologies). RNA was quantified using the Nanodrop ultra low "
        "volume spectrophotometer (Nanodrop Technologies).",
        "",
        "",
        {
            "rate":
            models.OntologyTermRef(
                "rate", "http://purl.obolibrary.org/obo/PATO_0000161", "PATO")
        },
        {},
        (models.Comment("Protocol Rating", "1"), ),
        expected_headers,
    )
    assert expected == study.protocols["growth protocol"]
    expected = models.ProtocolInfo(
        "metabolite extraction",
        models.OntologyTermRef("extraction",
                               "http://purl.obolibrary.org/obo/OBI_0302884",
                               "OBI"),
        "",
        "",
        "",
        {
            "standard volume": models.OntologyTermRef("standard volume", "",
                                                      ""),
            "sample volume": models.OntologyTermRef("sample volume", "", ""),
        },
        {
            "pipette":
            models.ProtocolComponentInfo(
                "pipette",
                models.OntologyTermRef("instrument",
                                       "http://www.ebi.ac.uk/efo/EFO_0000548",
                                       "EFO"),
            )
        },
        (models.Comment("Protocol Rating", "7"), ),
        expected_headers,
    )
    assert expected == study.protocols["metabolite extraction"]

    # Study 1 - Contacts
    assert 3 == len(study.contacts)
    expected_headers = [
        *investigation_headers.STUDY_CONTACTS_KEYS, "Comment[Study Person REF]"
    ]
    expected = models.ContactInfo(
        "Oliver",
        "Stephen",
        "G",
        "*****@*****.**",
        "",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("corresponding author", "", ""),
        (models.Comment("Study Person REF", ""), ),
        expected_headers,
    )
    assert expected == study.contacts[0]
    expected = models.ContactInfo(
        "Juan",
        "Castrillo",
        "I",
        "",
        "123456789",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (models.Comment("Study Person REF", ""), ),
        expected_headers,
    )
    assert expected == study.contacts[1]
    expected = models.ContactInfo(
        "Leo",
        "Zeef",
        "A",
        "",
        "",
        "+49 123456789",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (models.Comment("Study Person REF", ""), ),
        expected_headers,
    )
    assert expected == study.contacts[2]

    # Study 2
    study = investigation.studies[1]
    expected = models.BasicInfo(
        Path("s_BII-S-2.txt"),
        "BII-S-2",
        "A time course analysis of transcription response in yeast treated "
        "with rapamycin, a specific inhibitor of the TORC1 complex: impact "
        "on yeast growth",
        "Comprehensive high-throughput analyses at the levels of mRNAs, "
        "proteins, and metabolites, and studies on gene expression patterns "
        "are required for systems biology studies of cell growth [4,26-29]. "
        "Although such comprehensive data sets are lacking, many studies have "
        "pointed to a central role for the target-of-rapamycin (TOR) signal "
        "transduction pathway in growth control. TOR is a serine/threonine "
        "kinase that has been conserved from yeasts to mammals; it integrates "
        "signals from nutrients or growth factors to regulate cell growth and "
        "cell-cycle progression coordinately. Although such comprehensive data "
        "sets are lacking, many studies have pointed to a central role for the "
        "target-of-rapamycin (TOR) signal transduction pathway in growth "
        "control. TOR is a serine/threonine kinase that has been conserved "
        "from yeasts to mammals; it integrates signals from nutrients or "
        "growth factors to regulate cell growth and cell-cycle progression "
        "coordinately. The effect of rapamycin were studied as follows: a "
        "culture growing at mid-exponential phase was divided into two. "
        "Rapamycin (200 ng/ml) was added to one half, and the drug's solvent "
        "to the other, as the control. Samples were taken at 0, 1, 2 and 4 h "
        "after treatment. Gene expression at the mRNA level was investigated "
        "by transcriptome analysis using Affymetrix hybridization arrays.",
        date(2007, 4, 30),
        date(2009, 3, 10),
        (
            models.Comment("Study Grant Number", ""),
            models.Comment("Study Funding Agency", ""),
            models.Comment("Manuscript Licence", "CC BY 3.0"),
            models.Comment("Experimental Metadata Licence", "CC0"),
            models.Comment("Data Repository", ""),
            models.Comment("Data Record Accession", ""),
            models.Comment("Data Record URI", ""),
            models.Comment("Supplementary Information File Name", ""),
            models.Comment("Supplementary Information File Type", ""),
            models.Comment("Supplementary File URI", ""),
            models.Comment("Subject Keywords", ""),
        ),
        [
            *investigation_headers.STUDY_INFO_KEYS[0:3],
            "Comment[Study Grant Number]",
            "Comment[Study Funding Agency]",
            *investigation_headers.STUDY_INFO_KEYS[3:],
            "Comment[Manuscript Licence]",
            "Comment[Experimental Metadata Licence]",
            "Comment[Data Repository]",
            "Comment[Data Record Accession]",
            "Comment[Data Record URI]",
            "Comment[Supplementary Information File Name]",
            "Comment[Supplementary Information File Type]",
            "Comment[Supplementary File URI]",
            "Comment[Subject Keywords]",
        ],
    )
    assert expected == study.info

    # Study 2 - Factors
    assert 3 == len(study.factors)
    expected = models.FactorInfo(
        "exposure time",
        models.OntologyTermRef("time",
                               "http://purl.obolibrary.org/obo/PATO_0000165",
                               "OBI_BCGO"),
        (),
        [*investigation_headers.STUDY_FACTORS_KEYS],
    )
    assert expected == study.factors["exposure time"]

    # Study 2 - Assays
    assert 1 == len(study.assays)
    expected = models.AssayInfo(
        models.OntologyTermRef("transcription profiling",
                               "http://purl.obolibrary.org/obo/OBI_0000424",
                               "OBI"),
        models.OntologyTermRef("DNA microarray",
                               "http://purl.obolibrary.org/obo/OBI_0400148",
                               "OBI"),
        "Affymetrix",
        Path("a_microarray.txt"),
        (),
        [*investigation_headers.STUDY_ASSAYS_KEYS],
    )
    assert expected == study.assays[0]

    # Study 2 - Protocols
    assert 10 == len(study.protocols)
    expected = models.ProtocolInfo(
        "NMR spectroscopy",
        models.OntologyTermRef("NMR spectroscopy",
                               "http://purl.obolibrary.org/obo/OBI_0000623",
                               "OBI"),
        "",
        "",
        "",
        {},
        {
            "NMR tubes":
            models.ProtocolComponentInfo(
                "NMR tubes", models.OntologyTermRef(None, None, None)),
            "Bruker-Av600":
            models.ProtocolComponentInfo(
                "Bruker-Av600",
                models.OntologyTermRef("instrument",
                                       "http://www.ebi.ac.uk/efo/EFO_0000548",
                                       "EFO"),
            ),
        },
        (),
        [*investigation_headers.STUDY_PROTOCOLS_KEYS],
    )
    assert expected == study.protocols["NMR spectroscopy"]

    # Study 2 - Contacts
    assert 3 == len(study.contacts)
    expected = models.ContactInfo(
        "Juan",
        "Castrillo",
        "I",
        "",
        "123456789",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (models.Comment("Study Person REF", "personB"), ),
        [
            *investigation_headers.STUDY_CONTACTS_KEYS,
            "Comment[Study Person REF]"
        ],
    )
    assert expected == study.contacts[1]
Example #6
0
def create_and_write(out_path):
    """Create an investigation with a study and assay and write to ``output_path``."""

    # Prepare one or more study sections
    # Prepare basic study information
    study_info = models.BasicInfo(
        path="s_minimal.txt",
        identifier="s_minimal",
        title="Germline Study",
        description=None,
        submission_date=None,
        public_release_date=None,
        comments=(
            models.Comment(name="Study Grant Number", value=None),
            models.Comment(name="Study Funding Agency", value=None),
        ),
        headers=[],
    )

    # Create one or more assays
    assay_01 = models.AssayInfo(
        measurement_type=models.OntologyTermRef(
            name="exome sequencing assay",
            accession="http://purl.obolibrary.org/obo/OBI_0002118",
            ontology_name="OBI",
        ),
        technology_type=models.OntologyTermRef(
            name="nucleotide sequencing",
            accession="http://purl.obolibrary.org/obo/OBI_0000626",
            ontology_name="OBI",
        ),
        platform=None,
        path="a_minimal.txt",
        comments=(),
        headers=[],
    )

    # Prepare one or more protocols
    protocol_01 = models.ProtocolInfo(
        name="sample collection",
        type=models.OntologyTermRef(name="sample collection"),
        description=None,
        uri=None,
        version=None,
        parameters={},
        components={},
        comments=(),
        headers=[],
    )
    protocol_02 = models.ProtocolInfo(
        name="nucleic acid sequencing",
        type=models.OntologyTermRef(name="nucleic acid sequencing"),
        description=None,
        uri=None,
        version=None,
        parameters={},
        components={},
        comments=(),
        headers=[],
    )

    # Create study object
    study_01 = models.StudyInfo(
        info=study_info,
        designs=(),
        publications=(),
        factors={},
        assays=(assay_01, ),
        protocols={
            protocol_01.name: protocol_01,
            protocol_02.name: protocol_02
        },
        contacts=(),
    )

    # Prepare other investigation section
    # Prepare one or more ontology term source references
    onto_ref_01 = models.OntologyRef(
        name="OBI",
        file="http://data.bioontology.org/ontologies/OBI",
        version="31",
        description="Ontology for Biomedical Investigations",
        comments=(),
        headers=[],
    )

    # Prepare basic investigation information
    invest_info = models.BasicInfo(
        path="i_minimal.txt",
        identifier="i_minimal",
        title="Minimal Investigation",
        description=None,
        submission_date=None,
        public_release_date=None,
        comments=(),
        headers=[],
    )

    # Create investigation object
    investigation = models.InvestigationInfo(
        ontology_source_refs={onto_ref_01.name: onto_ref_01},
        info=invest_info,
        publications=(),
        contacts=(),
        studies=(study_01, ),
    )

    # Validate investigation
    InvestigationValidator(investigation).validate()

    # Write the investigation as ISA-Tab txt file
    with open(join(out_path, investigation.info.path), "wt",
              newline="") as outputf:
        InvestigationWriter.from_stream(investigation=investigation,
                                        output_file=outputf).write()

    # Create a corresponding Study graph

    # Create at least on source, one sample and one collection process
    # Unique names are required for unambiguous node identification
    source_01 = models.Material(
        type="Source Name",
        unique_name="S1-source-0815",
        name="0815",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SOURCE_NAME],
    )

    sample_01 = models.Material(
        type="Sample Name",
        unique_name="S1-sample-0815-N1",
        name="0815-N1",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SAMPLE_NAME],
    )

    process_01 = models.Process(
        protocol_ref="sample collection",
        unique_name="S1-sample collection-2-1",
        name=None,
        name_type=None,
        date=None,
        performer=None,
        parameter_values=(),
        comments=(),
        array_design_ref=None,
        first_dimension=None,
        second_dimension=None,
        headers=[table_headers.PROTOCOL_REF],
    )

    # Create the arcs to connect the material and process nodes, referenced by the unique name
    arc_01 = models.Arc(tail="S1-source-0815", head="S1-sample collection-2-1")
    arc_02 = models.Arc(tail="S1-sample collection-2-1",
                        head="S1-sample-0815-N1")

    # Create the study graph object
    study_graph_01 = models.Study(
        file=investigation.studies[0].info.path,
        header=None,
        materials={
            source_01.unique_name: source_01,
            sample_01.unique_name: sample_01
        },
        processes={process_01.unique_name: process_01},
        arcs=(arc_01, arc_02),
    )

    # Validate study graph
    StudyValidator(investigation=investigation,
                   study_info=investigation.studies[0],
                   study=study_graph_01).validate()

    # Write the study as ISA-Tab txt file
    with open(join(out_path, investigation.studies[0].info.path),
              "wt",
              newline="") as outputf:
        StudyWriter.from_stream(study_or_assay=study_graph_01,
                                output_file=outputf).write()

    # Create a corresponding Assay graph

    # Create at least on samples, one output material and one collection process
    # Unique names are required for unambiguous node identification
    # Explicit header definition per node is currently required to enable export to ISA-Tab
    sample_01 = models.Material(
        type="Sample Name",
        unique_name="S1-sample-0815-N1",
        name="0815-N1",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SAMPLE_NAME],
    )

    data_file_01 = models.Material(
        type="Raw Data File",
        unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        name="0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.RAW_DATA_FILE],
    )

    data_file_02 = models.Material(
        type="Raw Data File",
        unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        name="0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.RAW_DATA_FILE],
    )

    process_01 = models.Process(
        protocol_ref="nucleic acid sequencing",
        unique_name="S1-A1-0815-N1-DNA1-WES1-3",
        name="0815-N1-DNA1-WES1",
        name_type="Assay Name",
        date=None,
        performer=None,
        parameter_values=(),
        comments=(),
        array_design_ref=None,
        first_dimension=None,
        second_dimension=None,
        headers=[table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )

    # Create the arcs to connect the material and process nodes, referenced by the unique name
    arcs = (
        models.Arc(tail="S1-sample-0815-N1", head="S1-A1-0815-N1-DNA1-WES1-3"),
        models.Arc(
            tail="S1-A1-0815-N1-DNA1-WES1-3",
            head="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        ),
        models.Arc(
            tail="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
            head="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        ),
    )

    # Create the assay graph object
    assay_graph_01 = models.Assay(
        file=investigation.studies[0].assays[0].path,
        header=None,
        materials={
            sample_01.unique_name: sample_01,
            data_file_01.unique_name: data_file_01,
            data_file_02.unique_name: data_file_02,
        },
        processes={process_01.unique_name: process_01},
        arcs=arcs,
    )

    # Validate assay graph
    AssayValidator(
        investigation=investigation,
        study_info=investigation.studies[0],
        assay_info=investigation.studies[0].assays[0],
        assay=assay_graph_01,
    ).validate()

    # Write the assay as ISA-Tab txt file
    with open(join(out_path, investigation.studies[0].assays[0].path),
              "wt",
              newline="") as outputf:
        AssayWriter.from_stream(study_or_assay=assay_graph_01,
                                output_file=outputf).write()