コード例 #1
0
ファイル: test_parse_study.py プロジェクト: bihealth/altamisa
def test_study_row_reader_minimal_study(minimal_investigation_file,
                                        minimal_study_file):
    """Use ``StudyRowReader`` to read in minimal study file."""

    # Create new row reader and check read headers
    row_reader = StudyRowReader.from_stream("S1", minimal_study_file)
    assert 3 == len(row_reader.header)

    # Read all rows in study
    rows = list(row_reader.read())

    # Check results
    assert 1 == len(rows)
    first_row = rows[0]

    assert 3 == len(first_row)

    expected = models.Material("Source Name", "S1-source-0815", "0815", None,
                               (), (), (), None, [table_headers.SOURCE_NAME])
    assert expected == first_row[0]
    expected = models.Process(
        "sample collection",
        "S1-sample collection-2-1",
        None,
        None,
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF],
    )
    assert expected == first_row[1]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == first_row[2]
コード例 #2
0
def test_assay_reader_minimal_assay(minimal_investigation_file,
                                    minimal_assay_file):
    """Use ``AssayReader`` to read in minimal assay file.

    Using the ``AssayReader`` instead of the ``AssayRowReader`` gives us
    ``Assay`` objects instead of just the row-wise nodes.
    """
    # Load investigation (tested elsewhere)
    investigation = InvestigationReader.from_stream(
        minimal_investigation_file).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 1 == len(record)

    # Create new row reader and check read headers
    reader = AssayReader.from_stream("S1", "A1", minimal_assay_file)
    assert 5 == len(reader.header)

    # Read and validate assay
    assay = reader.read()
    AssayValidator(investigation, investigation.studies[0],
                   investigation.studies[0].assays[0], assay).validate()

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath("data/i_minimal/a_minimal.txt"))
    assert 5 == len(assay.header)
    assert 3 == len(assay.materials)
    assert 1 == len(assay.processes)
    assert 3 == len(assay.arcs)

    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == assay.materials["S1-sample-0815-N1"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5"]

    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-N1-DNA1-WES1-3",
        "0815-N1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-3"]

    expected = (
        models.Arc("S1-sample-0815-N1", "S1-A1-0815-N1-DNA1-WES1-3"),
        models.Arc("S1-A1-0815-N1-DNA1-WES1-3",
                   "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"),
        models.Arc(
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        ),
    )
    assert expected == assay.arcs
コード例 #3
0
def test_assay_reader_gelelect(gelelect_investigation_file,
                               gelelect_assay_file):
    """Use ``AssayReader`` to read in small assay file."""
    with pytest.warns(IsaWarning) as record:
        # Load investigation
        investigation = InvestigationReader.from_stream(
            gelelect_investigation_file).read()
        InvestigationValidator(investigation).validate()

        # Create new row reader and check read headers
        reader = AssayReader.from_stream("S1", "A1", gelelect_assay_file)
        assert 22 == len(reader.header)

        # Read assay
        assay = reader.read()
        AssayValidator(investigation, investigation.studies[0],
                       investigation.studies[0].assays[0], assay).validate()

    # Check warnings
    assert 4 == len(record)

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath(
            "data/test_gelelect/a_study01_protein_expression_profiling_gel_electrophoresis.txt"
        ))
    assert 22 == len(assay.header)
    assert 9 == len(assay.materials)
    assert 10 == len(assay.processes)
    assert 18 == len(assay.arcs)

    expected = models.Material(
        "Image File",
        "S1-A1-Image01.jpeg-COL19",
        "Image01.jpeg",
        None,
        (),
        (),
        (),
        None,
        [table_headers.IMAGE_FILE],
    )
    assert expected == assay.materials["S1-A1-Image01.jpeg-COL19"]

    expected = models.Process(
        "data collection",
        "S1-A1-Scan02-18",
        "Scan02",
        "Scan Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.SCAN_NAME],
    )
    assert expected == assay.processes["S1-A1-Scan02-18"]

    header_electrophoresis = [
        table_headers.PROTOCOL_REF,
        table_headers.GEL_ELECTROPHORESIS_ASSAY_NAME,
        table_headers.FIRST_DIMENSION,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
        table_headers.SECOND_DIMENSION,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
    ]

    expected = models.Process(
        "electrophoresis",
        "S1-A1-Assay01-10",
        "Assay01",
        "Gel Electrophoresis Assay Name",
        None,
        None,
        (),
        (),
        None,
        models.OntologyTermRef("", "", ""),
        models.OntologyTermRef("", "", ""),
        header_electrophoresis,
    )
    assert expected == assay.processes["S1-A1-Assay01-10"]

    expected = models.Process(
        "electrophoresis",
        "S1-A1-electrophoresis-9-2",
        "",
        "Gel Electrophoresis Assay Name",
        None,
        None,
        (),
        (),
        None,
        models.OntologyTermRef("AssayX", None, None),
        models.OntologyTermRef("AssayY", None, None),
        header_electrophoresis,
    )
    assert expected == assay.processes["S1-A1-electrophoresis-9-2"]
コード例 #4
0
def test_assay_reader_small_assay(small_investigation_file, small_assay_file):
    """Use ``AssayReader`` to read in small assay file."""
    # Load investigation (tested elsewhere)
    investigation = InvestigationReader.from_stream(
        small_investigation_file).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 1 == len(record)

    # Create new row reader and check read headers
    reader = AssayReader.from_stream("S1", "A1", small_assay_file)
    assert 9 == len(reader.header)

    # Read assay
    with pytest.warns(IsaWarning) as record:
        assay = reader.read()
        AssayValidator(investigation, investigation.studies[0],
                       investigation.studies[0].assays[0], assay).validate()

    # Check warnings
    assert 1 == len(record)

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath("data/i_small/a_small.txt"))
    assert 9 == len(assay.header)
    assert 9 == len(assay.materials)
    assert 5 == len(assay.processes)
    assert 13 == len(assay.arcs)

    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == assay.materials["S1-sample-0815-N1"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-T1",
        "0815-T1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == assay.materials["S1-sample-0815-T1"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
        "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
        "0815-T1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        "0815-T1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"]
    expected = models.Material(
        "Derived Data File",
        "S1-A1-0815-somatic.vcf.gz-COL9",
        "0815-somatic.vcf.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.DERIVED_DATA_FILE],
    )
    assert expected == assay.materials["S1-A1-0815-somatic.vcf.gz-COL9"]

    expected = models.Process(
        "library preparation",
        "S1-A1-library preparation-2-1",
        None,
        None,
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF],
    )
    assert expected == assay.processes["S1-A1-library preparation-2-1"]
    expected = models.Process(
        "library preparation",
        "S1-A1-library preparation-2-2",
        None,
        None,
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF],
    )
    assert expected == assay.processes["S1-A1-library preparation-2-2"]
    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-N1-DNA1-WES1-5",
        "0815-N1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-5"]
    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-T1-DNA1-WES1-5",
        "0815-T1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == assay.processes["S1-A1-0815-T1-DNA1-WES1-5"]

    expected = (
        models.Arc("S1-sample-0815-N1", "S1-A1-library preparation-2-1"),
        models.Arc("S1-A1-library preparation-2-1", "S1-A1-0815-N1-DNA1-COL3"),
        models.Arc("S1-A1-0815-N1-DNA1-COL3", "S1-A1-0815-N1-DNA1-WES1-5"),
        models.Arc("S1-A1-0815-N1-DNA1-WES1-5",
                   "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"),
        models.Arc(
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        ),
        models.Arc("S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
                   "S1-A1-somatic variant calling-1-8"),
        models.Arc("S1-A1-somatic variant calling-1-8",
                   "S1-A1-0815-somatic.vcf.gz-COL9"),
        models.Arc("S1-sample-0815-T1", "S1-A1-library preparation-2-2"),
        models.Arc("S1-A1-library preparation-2-2", "S1-A1-0815-T1-DNA1-COL3"),
        models.Arc("S1-A1-0815-T1-DNA1-COL3", "S1-A1-0815-T1-DNA1-WES1-5"),
        models.Arc("S1-A1-0815-T1-DNA1-WES1-5",
                   "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"),
        models.Arc(
            "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
            "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        ),
        models.Arc("S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
                   "S1-A1-somatic variant calling-1-8"),
    )
    assert expected == assay.arcs
コード例 #5
0
def test_assay_row_reader_small_assay(small_investigation_file,
                                      small_assay_file):
    """Use ``AssayRowReader`` to read in small assay file."""

    # Create new row reader and check read headers
    row_reader = AssayRowReader.from_stream("S1", "A1", small_assay_file)
    assert 9 == len(row_reader.header)

    # Read all rows in assay
    rows = list(row_reader.read())

    # Check results
    assert 2 == len(rows)
    first_row = rows[0]
    second_row = rows[1]

    assert 8 == len(first_row)

    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == first_row[0]

    expected = models.Process(
        "library preparation",
        "S1-A1-library preparation-2-1",
        None,
        None,
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF],
    )
    assert expected == first_row[1]

    expected = models.Material(
        "Library Name",
        "S1-A1-0815-N1-DNA1-COL3",
        "0815-N1-DNA1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.LIBRARY_NAME],
    )
    assert expected == first_row[2]

    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-N1-DNA1-WES1-5",
        "0815-N1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == first_row[3]

    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
        "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == first_row[4]

    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == first_row[5]

    expected = models.Process(
        "Unknown",
        "S1-A1-somatic variant calling-1-8",
        "somatic variant calling-1",
        "Data Transformation Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.DATA_TRANSFORMATION_NAME],
    )
    assert expected == first_row[6]

    expected = models.Material(
        "Derived Data File",
        "S1-A1-0815-somatic.vcf.gz-COL9",
        "0815-somatic.vcf.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.DERIVED_DATA_FILE],
    )
    assert expected == first_row[7]

    assert 8 == len(second_row)

    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-T1",
        "0815-T1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == second_row[0]

    expected = models.Process(
        "library preparation",
        "S1-A1-library preparation-2-2",
        None,
        None,
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF],
    )
    assert expected == second_row[1]

    expected = models.Material(
        "Library Name",
        "S1-A1-0815-T1-DNA1-COL3",
        "0815-T1-DNA1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.LIBRARY_NAME],
    )
    assert expected == second_row[2]

    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-T1-DNA1-WES1-5",
        "0815-T1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == second_row[3]

    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
        "0815-T1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == second_row[4]

    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        "0815-T1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == second_row[5]

    expected = models.Process(
        "Unknown",
        "S1-A1-somatic variant calling-1-8",
        "somatic variant calling-1",
        "Data Transformation Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.DATA_TRANSFORMATION_NAME],
    )
    assert expected == second_row[6]

    expected = models.Material(
        "Derived Data File",
        "S1-A1-0815-somatic.vcf.gz-COL9",
        "0815-somatic.vcf.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.DERIVED_DATA_FILE],
    )
    assert expected == second_row[7]
コード例 #6
0
def test_assay_row_reader_minimal_assay(minimal_investigation_file,
                                        minimal_assay_file):
    """Use ``AssayRowReader`` to read in minimal assay file."""

    # Create new row reader and check read headers
    row_reader = AssayRowReader.from_stream("S1", "A1", minimal_assay_file)
    assert 5 == len(row_reader.header)

    # Read all rows in assay
    rows = list(row_reader.read())

    # Check results
    assert 1 == len(rows)
    first_row = rows[0]

    assert 4 == len(first_row)

    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == first_row[0]
    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-N1-DNA1-WES1-3",
        "0815-N1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == first_row[1]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == first_row[2]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == first_row[3]
コード例 #7
0
ファイル: test_parse_study.py プロジェクト: bihealth/altamisa
def test_study_reader_minimal_study(minimal_investigation_file,
                                    minimal_study_file):
    """Use ``StudyReader`` to read in minimal study file.

    Using the ``StudyReader`` instead of the ``StudyRowReader`` gives us
    ``Study`` objects instead of just the row-wise nodes.
    """
    # Load investigation (tested elsewhere)
    investigation = InvestigationReader.from_stream(
        minimal_investigation_file).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 2 == len(record)

    # Create new row reader and check read headers
    reader = StudyReader.from_stream("S1", minimal_study_file)
    assert 3 == len(reader.header)

    # Read study
    study = reader.read()
    StudyValidator(investigation, investigation.studies[0], study).validate()

    # Check results
    assert os.path.normpath(str(study.file)).endswith(
        os.path.normpath("data/i_minimal/s_minimal.txt"))
    assert 3 == len(study.header)
    assert 2 == len(study.materials)
    assert 1 == len(study.processes)
    assert 2 == len(study.arcs)

    expected = models.Material("Source Name", "S1-source-0815", "0815", None,
                               (), (), (), None, [table_headers.SOURCE_NAME])
    assert expected == study.materials["S1-source-0815"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == study.materials["S1-sample-0815-N1"]

    expected = models.Process(
        "sample collection",
        "S1-sample collection-2-1",
        None,
        None,
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF],
    )
    assert expected == study.processes["S1-sample collection-2-1"]

    expected = (
        models.Arc("S1-source-0815", "S1-sample collection-2-1"),
        models.Arc("S1-sample collection-2-1", "S1-sample-0815-N1"),
    )
    assert expected == study.arcs
コード例 #8
0
ファイル: test_parse_study.py プロジェクト: bihealth/altamisa
def test_study_reader_small_study(small_investigation_file, small_study_file):
    """Use ``StudyReader`` to read in small study file."""
    # Load investigation (tested elsewhere)
    with pytest.warns(IsaWarning) as record:
        investigation = InvestigationReader.from_stream(
            small_investigation_file).read()
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 2 == len(record)

    # Create new row reader and check read headers
    reader = StudyReader.from_stream("S1", small_study_file)
    assert 13 == len(reader.header)

    # Read study
    study = reader.read()
    StudyValidator(investigation, investigation.studies[0], study).validate()

    # Check results
    assert os.path.normpath(str(study.file)).endswith(
        os.path.normpath("data/i_small/s_small.txt"))
    assert 13 == len(study.header)
    assert 9 == len(study.materials)
    assert 5 == len(study.processes)
    assert 10 == len(study.arcs)

    headers_source = [
        table_headers.SOURCE_NAME,
        table_headers.CHARACTERISTICS + "[organism]",
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
        table_headers.CHARACTERISTICS + "[age]",
        table_headers.UNIT,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
    ]
    headers_collection = [
        table_headers.PROTOCOL_REF,
        table_headers.PARAMETER_VALUE + "[instrument]",
        table_headers.PERFORMER,
        table_headers.DATE,
    ]
    headers_sample = [
        table_headers.SAMPLE_NAME,
        table_headers.CHARACTERISTICS + "[status]",
        table_headers.FACTOR_VALUE + "[treatment]",
    ]

    unit = models.OntologyTermRef(
        name="day",
        accession="http://purl.obolibrary.org/obo/UO_0000033",
        ontology_name="UO")

    characteristics1 = (
        models.Characteristics(
            name="organism",
            value=[
                models.OntologyTermRef(
                    name="Mus musculus",
                    accession="http://purl.bioontology.org/ontology/"
                    "NCBITAXON/10090",
                    ontology_name="NCBITAXON",
                )
            ],
            unit=None,
        ),
        models.Characteristics(name="age", value=["90"], unit=unit),
    )
    characteristics2 = (
        models.Characteristics(
            name="organism",
            value=[models.OntologyTermRef("Mus musculus", "", "")],
            unit=None),
        models.Characteristics(name="age", value=[""], unit=unit),
    )
    characteristics3 = (
        models.Characteristics(
            name="organism",
            value=[models.OntologyTermRef(None, None, None)],
            unit=None),
        models.Characteristics(name="age", value=["150"], unit=unit),
    )

    expected = models.Material(
        "Source Name",
        "S1-source-0815",
        "0815",
        None,
        characteristics1,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == study.materials["S1-source-0815"]
    expected = models.Material(
        "Source Name",
        "S1-source-0816",
        "0816",
        None,
        characteristics2,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == study.materials["S1-source-0816"]
    expected = models.Material(
        "Source Name",
        "S1-source-0817",
        "0817",
        None,
        characteristics3,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == study.materials["S1-source-0817"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (models.Characteristics("status", ["0"], None), ),
        (),
        (models.FactorValue("treatment", "yes", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-sample-0815-N1"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-T1",
        "0815-T1",
        None,
        (models.Characteristics("status", ["2"], None), ),
        (),
        (models.FactorValue("treatment", "", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-sample-0815-T1"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0816-T1",
        "0816-T1",
        None,
        (models.Characteristics("status", ["1"], None), ),
        (),
        (models.FactorValue("treatment", "yes", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-sample-0816-T1"]
    expected = models.Material(
        "Sample Name",
        "S1-Empty Sample Name-13-5",
        "",
        None,
        (models.Characteristics("status", [""], None), ),
        (),
        (models.FactorValue("treatment", "", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-Empty Sample Name-13-5"]

    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-2",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument", ["scalpel"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == study.processes["S1-sample collection-9-2"]
    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-3",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument",
                               ["scalpel type A", "scalpel type B"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == study.processes["S1-sample collection-9-3"]
    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-4",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument", ["scalpel"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == study.processes["S1-sample collection-9-4"]

    expected = (
        models.Arc("S1-source-0814", "S1-sample collection-9-1"),
        models.Arc("S1-sample collection-9-1", "S1-sample-0814-N1"),
        models.Arc("S1-source-0815", "S1-sample collection-9-2"),
        models.Arc("S1-sample collection-9-2", "S1-sample-0815-N1"),
        models.Arc("S1-source-0815", "S1-sample collection-9-3"),
        models.Arc("S1-sample collection-9-3", "S1-sample-0815-T1"),
        models.Arc("S1-source-0816", "S1-sample collection-9-4"),
        models.Arc("S1-sample collection-9-4", "S1-sample-0816-T1"),
        models.Arc("S1-source-0817", "S1-sample collection-9-5"),
        models.Arc("S1-sample collection-9-5", "S1-Empty Sample Name-13-5"),
    )
    assert expected == study.arcs
コード例 #9
0
ファイル: test_parse_study.py プロジェクト: bihealth/altamisa
def test_study_row_reader_small_study(small_investigation_file,
                                      small_study_file):
    """Use ``StudyRowReader`` to read in small study file."""

    # Create new row reader and check read headers (+ string representation)
    row_reader = StudyRowReader.from_stream("S1", small_study_file)
    assert 13 == len(row_reader.header)
    rep0 = "ColumnHeader(column_type='Source Name', col_no=0, span=1)"
    rep1 = "LabeledColumnHeader(column_type='Characteristics', col_no=1, span=1, label='organism')"
    assert rep0 == repr(row_reader.header[0])
    assert rep1 == repr(row_reader.header[1])

    # Read all rows in study
    rows = list(row_reader.read())

    # Check results
    assert 5 == len(rows)
    first_row = rows[0]
    second_row = rows[1]
    third_row = rows[2]

    assert 3 == len(second_row)

    headers_source = [
        table_headers.SOURCE_NAME,
        table_headers.CHARACTERISTICS + "[organism]",
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
        table_headers.CHARACTERISTICS + "[age]",
        table_headers.UNIT,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
    ]
    headers_collection = [
        table_headers.PROTOCOL_REF,
        table_headers.PARAMETER_VALUE + "[instrument]",
        table_headers.PERFORMER,
        table_headers.DATE,
    ]
    headers_sample = [
        table_headers.SAMPLE_NAME,
        table_headers.CHARACTERISTICS + "[status]",
        table_headers.FACTOR_VALUE + "[treatment]",
    ]

    unit = models.OntologyTermRef(
        name="day",
        accession="http://purl.obolibrary.org/obo/UO_0000033",
        ontology_name="UO")

    characteristics1 = (
        models.Characteristics(
            name="organism",
            value=[
                models.OntologyTermRef(
                    name="Mus musculus",
                    accession=
                    "http://purl.bioontology.org/ontology/NCBITAXON/10090",
                    ontology_name="NCBITAXON",
                ),
                models.OntologyTermRef(
                    name="H**o sapiens",
                    accession=
                    "http://purl.bioontology.org/ontology/NCBITAXON/9606",
                    ontology_name="NCBITAXON",
                ),
            ],
            unit=None,
        ),
        models.Characteristics(name="age", value=["90"], unit=unit),
    )

    expected = models.Material(
        "Source Name",
        "S1-source-0814",
        "0814",
        None,
        characteristics1,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == first_row[0]

    characteristics2 = (
        models.Characteristics(
            name="organism",
            value=[
                models.OntologyTermRef(
                    name="Mus musculus",
                    accession=
                    "http://purl.bioontology.org/ontology/NCBITAXON/10090",
                    ontology_name="NCBITAXON",
                )
            ],
            unit=None,
        ),
        models.Characteristics(name="age", value=["90"], unit=unit),
    )

    expected = models.Material(
        "Source Name",
        "S1-source-0815",
        "0815",
        None,
        characteristics2,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == second_row[0]
    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-2",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument", ["scalpel"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == second_row[1]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (models.Characteristics("status", ["0"], None), ),
        (),
        (models.FactorValue("treatment", "yes", None), ),
        None,
        headers_sample,
    )
    assert expected == second_row[2]

    assert 3 == len(third_row)
    expected = models.Material(
        "Source Name",
        "S1-source-0815",
        "0815",
        None,
        characteristics2,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == third_row[0]
    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-3",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument",
                               ["scalpel type A", "scalpel type B"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == third_row[1]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-T1",
        "0815-T1",
        None,
        (models.Characteristics("status", ["2"], None), ),
        (),
        (models.FactorValue("treatment", "", None), ),
        None,
        headers_sample,
    )
    assert expected == third_row[2]
コード例 #10
0
def create_and_write(out_path):
    """Create an investigation with a study and assay and write to ``output_path``."""

    # Prepare one or more study sections
    # Prepare basic study information
    study_info = models.BasicInfo(
        path="s_minimal.txt",
        identifier="s_minimal",
        title="Germline Study",
        description=None,
        submission_date=None,
        public_release_date=None,
        comments=(
            models.Comment(name="Study Grant Number", value=None),
            models.Comment(name="Study Funding Agency", value=None),
        ),
        headers=[],
    )

    # Create one or more assays
    assay_01 = models.AssayInfo(
        measurement_type=models.OntologyTermRef(
            name="exome sequencing assay",
            accession="http://purl.obolibrary.org/obo/OBI_0002118",
            ontology_name="OBI",
        ),
        technology_type=models.OntologyTermRef(
            name="nucleotide sequencing",
            accession="http://purl.obolibrary.org/obo/OBI_0000626",
            ontology_name="OBI",
        ),
        platform=None,
        path="a_minimal.txt",
        comments=(),
        headers=[],
    )

    # Prepare one or more protocols
    protocol_01 = models.ProtocolInfo(
        name="sample collection",
        type=models.OntologyTermRef(name="sample collection"),
        description=None,
        uri=None,
        version=None,
        parameters={},
        components={},
        comments=(),
        headers=[],
    )
    protocol_02 = models.ProtocolInfo(
        name="nucleic acid sequencing",
        type=models.OntologyTermRef(name="nucleic acid sequencing"),
        description=None,
        uri=None,
        version=None,
        parameters={},
        components={},
        comments=(),
        headers=[],
    )

    # Create study object
    study_01 = models.StudyInfo(
        info=study_info,
        designs=(),
        publications=(),
        factors={},
        assays=(assay_01, ),
        protocols={
            protocol_01.name: protocol_01,
            protocol_02.name: protocol_02
        },
        contacts=(),
    )

    # Prepare other investigation section
    # Prepare one or more ontology term source references
    onto_ref_01 = models.OntologyRef(
        name="OBI",
        file="http://data.bioontology.org/ontologies/OBI",
        version="31",
        description="Ontology for Biomedical Investigations",
        comments=(),
        headers=[],
    )

    # Prepare basic investigation information
    invest_info = models.BasicInfo(
        path="i_minimal.txt",
        identifier="i_minimal",
        title="Minimal Investigation",
        description=None,
        submission_date=None,
        public_release_date=None,
        comments=(),
        headers=[],
    )

    # Create investigation object
    investigation = models.InvestigationInfo(
        ontology_source_refs={onto_ref_01.name: onto_ref_01},
        info=invest_info,
        publications=(),
        contacts=(),
        studies=(study_01, ),
    )

    # Validate investigation
    InvestigationValidator(investigation).validate()

    # Write the investigation as ISA-Tab txt file
    with open(join(out_path, investigation.info.path), "wt",
              newline="") as outputf:
        InvestigationWriter.from_stream(investigation=investigation,
                                        output_file=outputf).write()

    # Create a corresponding Study graph

    # Create at least on source, one sample and one collection process
    # Unique names are required for unambiguous node identification
    source_01 = models.Material(
        type="Source Name",
        unique_name="S1-source-0815",
        name="0815",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SOURCE_NAME],
    )

    sample_01 = models.Material(
        type="Sample Name",
        unique_name="S1-sample-0815-N1",
        name="0815-N1",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SAMPLE_NAME],
    )

    process_01 = models.Process(
        protocol_ref="sample collection",
        unique_name="S1-sample collection-2-1",
        name=None,
        name_type=None,
        date=None,
        performer=None,
        parameter_values=(),
        comments=(),
        array_design_ref=None,
        first_dimension=None,
        second_dimension=None,
        headers=[table_headers.PROTOCOL_REF],
    )

    # Create the arcs to connect the material and process nodes, referenced by the unique name
    arc_01 = models.Arc(tail="S1-source-0815", head="S1-sample collection-2-1")
    arc_02 = models.Arc(tail="S1-sample collection-2-1",
                        head="S1-sample-0815-N1")

    # Create the study graph object
    study_graph_01 = models.Study(
        file=investigation.studies[0].info.path,
        header=None,
        materials={
            source_01.unique_name: source_01,
            sample_01.unique_name: sample_01
        },
        processes={process_01.unique_name: process_01},
        arcs=(arc_01, arc_02),
    )

    # Validate study graph
    StudyValidator(investigation=investigation,
                   study_info=investigation.studies[0],
                   study=study_graph_01).validate()

    # Write the study as ISA-Tab txt file
    with open(join(out_path, investigation.studies[0].info.path),
              "wt",
              newline="") as outputf:
        StudyWriter.from_stream(study_or_assay=study_graph_01,
                                output_file=outputf).write()

    # Create a corresponding Assay graph

    # Create at least on samples, one output material and one collection process
    # Unique names are required for unambiguous node identification
    # Explicit header definition per node is currently required to enable export to ISA-Tab
    sample_01 = models.Material(
        type="Sample Name",
        unique_name="S1-sample-0815-N1",
        name="0815-N1",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SAMPLE_NAME],
    )

    data_file_01 = models.Material(
        type="Raw Data File",
        unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        name="0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.RAW_DATA_FILE],
    )

    data_file_02 = models.Material(
        type="Raw Data File",
        unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        name="0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.RAW_DATA_FILE],
    )

    process_01 = models.Process(
        protocol_ref="nucleic acid sequencing",
        unique_name="S1-A1-0815-N1-DNA1-WES1-3",
        name="0815-N1-DNA1-WES1",
        name_type="Assay Name",
        date=None,
        performer=None,
        parameter_values=(),
        comments=(),
        array_design_ref=None,
        first_dimension=None,
        second_dimension=None,
        headers=[table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )

    # Create the arcs to connect the material and process nodes, referenced by the unique name
    arcs = (
        models.Arc(tail="S1-sample-0815-N1", head="S1-A1-0815-N1-DNA1-WES1-3"),
        models.Arc(
            tail="S1-A1-0815-N1-DNA1-WES1-3",
            head="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        ),
        models.Arc(
            tail="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
            head="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        ),
    )

    # Create the assay graph object
    assay_graph_01 = models.Assay(
        file=investigation.studies[0].assays[0].path,
        header=None,
        materials={
            sample_01.unique_name: sample_01,
            data_file_01.unique_name: data_file_01,
            data_file_02.unique_name: data_file_02,
        },
        processes={process_01.unique_name: process_01},
        arcs=arcs,
    )

    # Validate assay graph
    AssayValidator(
        investigation=investigation,
        study_info=investigation.studies[0],
        assay_info=investigation.studies[0].assays[0],
        assay=assay_graph_01,
    ).validate()

    # Write the assay as ISA-Tab txt file
    with open(join(out_path, investigation.studies[0].assays[0].path),
              "wt",
              newline="") as outputf:
        AssayWriter.from_stream(study_or_assay=assay_graph_01,
                                output_file=outputf).write()