def test_write_BII_I_1_investigation(BII_I_1_investigation_file, tmp_path):
    # Read Investigation from file-like object
    with pytest.warns(IsaWarning) as record:
        investigation = InvestigationReader.from_stream(
            BII_I_1_investigation_file).read()
        InvestigationValidator(investigation).validate()
    # Check warnings
    assert 1 == len(record)
    msg = "Skipping empty ontology source: , , , "
    assert record[0].category == ParseIsatabWarning
    assert str(record[0].message) == msg
    # Write Investigation to temporary file
    path1 = tmp_path / "i_investigation.txt"
    with open(path1, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Read Investigation from temporary file
    with open(path1, "rt") as file:
        reader = InvestigationReader.from_stream(file)
        investigation = reader.read()
    # Write Investigation to second temporary file
    path2 = tmp_path / "i_investigation_2.txt"
    with open(path2, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Compare input and output
    assert filecmp.cmp(path1, path2, shallow=False)
def test_write_full2_investigation(full2_investigation_file, tmp_path):
    # Read Investigation from file-like object
    with pytest.warns(IsaWarning) as record:
        investigation = InvestigationReader.from_stream(
            full2_investigation_file).read()
        InvestigationValidator(investigation).validate()
    # Check warnings
    assert 3 == len(record)
    msg = "Study with incomplete minimal information (ID and path):\nID:\t\nTitle:\t\nPath:\t"
    assert record[0].category == CriticalIsaValidationWarning
    assert str(record[0].message) == msg
    msg = "Study without title:\nID:\t\nTitle:\t\nPath:\t"
    assert record[1].category == ModerateIsaValidationWarning
    assert str(record[1].message) == msg
    msg = (
        "Assay with incomplete minimal information (path, measurement and technology type):\n"
        "Path:\t\n"
        "Measurement Type:\tmetabolite profiling\n"
        "Technology Type:\tmass spectrometry\n"
        "Technology Platform:\tLC-MS/MS")
    assert record[2].category == CriticalIsaValidationWarning
    assert str(record[2].message) == msg
    # Write Investigation to temporary file
    path = tmp_path / "i_fullinvest2.txt"
    with open(path, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Compare input and output
    assert filecmp.cmp(full2_investigation_file.name, path, shallow=False)
Example #3
0
    def _perform_update(self, isa, ped_donors):
        # Traverse investigation, studies, assays, potentially updating the nodes.
        donor_map = self._build_donor_map(ped_donors)
        visitor = SheetUpdateVisitor(donor_map, self.config)
        iwalker = isa_support.InvestigationTraversal(isa.investigation, isa.studies, isa.assays)
        iwalker.run(visitor)
        investigation, studies, assays = iwalker.build_evolved()

        # Add records to study and assay for donors not seen so far.
        todo_ped_donors = [
            donor for donor in donor_map.values() if donor.name not in visitor.seen_source_names
        ]
        studies, assays = isa_germline_append_donors(
            studies, assays, tuple(todo_ped_donors), tuple(visitor.seen_sample_names), self.config
        )
        new_isa = attr.evolve(isa, investigation=investigation, studies=studies, assays=assays)

        # Write ISA-tab into string buffers.
        io_investigation = io.StringIO()
        InvestigationWriter.from_stream(isa.investigation, io_investigation).write()
        ios_studies = {}
        for name, study in new_isa.studies.items():
            ios_studies[name] = io.StringIO()
            StudyWriter.from_stream(study, ios_studies[name]).write()
        ios_assays = {}
        for name, assay in new_isa.assays.items():
            ios_assays[name] = io.StringIO()
            AssayWriter.from_stream(assay, ios_assays[name]).write()

        # Write out updated ISA-tab files using the diff helper.
        i_path = pathlib.Path(self.config.input_investigation_file)
        overwrite_helper(
            i_path,
            io_investigation.getvalue(),
            do_write=not self.config.dry_run,
            show_diff=True,
            show_diff_side_by_side=self.config.show_diff_side_by_side,
            answer_yes=self.config.yes,
        )
        for filename, ios_study in ios_studies.items():
            overwrite_helper(
                i_path.parent / filename,
                ios_study.getvalue(),
                do_write=not self.config.dry_run,
                show_diff=True,
                show_diff_side_by_side=self.config.show_diff_side_by_side,
                answer_yes=self.config.yes,
            )
        for filename, ios_assay in ios_assays.items():
            overwrite_helper(
                i_path.parent / filename,
                ios_assay.getvalue(),
                do_write=not self.config.dry_run,
                show_diff=True,
                show_diff_side_by_side=self.config.show_diff_side_by_side,
                answer_yes=self.config.yes,
            )
Example #4
0
    def _perform_update(self, isa, annotation_map, header_map):
        # Traverse investigation, studies, assays, potentially updating the nodes.
        visitor = SheetUpdateVisitor(
            annotation_map,
            header_map,
            self.config.force_update,
            self.config.target_study,
            self.config.target_assay,
        )
        iwalker = isa_support.InvestigationTraversal(isa.investigation, isa.studies, isa.assays)
        iwalker.run(visitor)
        investigation, studies, assays = iwalker.build_evolved()

        new_isa = attr.evolve(isa, investigation=investigation, studies=studies, assays=assays)

        # Write ISA-tab into string buffers.
        io_investigation = io.StringIO()
        InvestigationWriter.from_stream(isa.investigation, io_investigation).write()
        ios_studies = {}
        for name, study in new_isa.studies.items():
            ios_studies[name] = io.StringIO()
            StudyWriter.from_stream(study, ios_studies[name]).write()
        ios_assays = {}
        for name, assay in new_isa.assays.items():
            ios_assays[name] = io.StringIO()
            AssayWriter.from_stream(assay, ios_assays[name]).write()

        # Write out updated ISA-tab files using the diff helper.
        i_path = pathlib.Path(self.config.input_investigation_file)
        overwrite_helper(
            i_path,
            io_investigation.getvalue(),
            do_write=not self.config.dry_run,
            show_diff=True,
            show_diff_side_by_side=self.config.show_diff_side_by_side,
            answer_yes=self.config.yes,
        )
        for filename, ios_study in ios_studies.items():
            overwrite_helper(
                i_path.parent / filename,
                ios_study.getvalue(),
                do_write=not self.config.dry_run,
                show_diff=True,
                show_diff_side_by_side=self.config.show_diff_side_by_side,
                answer_yes=self.config.yes,
            )
        for filename, ios_assay in ios_assays.items():
            overwrite_helper(
                i_path.parent / filename,
                ios_assay.getvalue(),
                do_write=not self.config.dry_run,
                show_diff=True,
                show_diff_side_by_side=self.config.show_diff_side_by_side,
                answer_yes=self.config.yes,
            )
def test_write_comment_investigation(comment_investigation_file, tmp_path):
    # Read Investigation from file-like object
    investigation = InvestigationReader.from_stream(
        comment_investigation_file).read()
    InvestigationValidator(investigation).validate()
    # Write Investigation to temporary file
    path = tmp_path / "i_comments.txt"
    with open(path, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Compare input and output
    assert filecmp.cmp(comment_investigation_file.name, path, shallow=False)
Example #6
0
def run_writing(args, path_out, investigation, studies, assays):
    # Write investigation
    if args.output_investigation_file.name == "<stdout>":
        InvestigationWriter.from_stream(investigation,
                                        args.output_investigation_file,
                                        quote=args.quotes).write()
    else:
        with open(args.output_investigation_file.name, "wt",
                  newline="") as outputf:
            InvestigationWriter.from_stream(investigation,
                                            outputf,
                                            quote=args.quotes).write()

    # Write studies and assays
    for s, study_info in enumerate(investigation.studies):
        if args.output_investigation_file.name == "<stdout>":
            if study_info.info.path:
                StudyWriter.from_stream(studies[s],
                                        args.output_investigation_file,
                                        quote=args.quotes).write()
            for a, assay_info in enumerate(study_info.assays):
                if assay_info.path:
                    AssayWriter.from_stream(assays[s][a],
                                            args.output_investigation_file,
                                            quote=args.quotes).write()
        else:
            if study_info.info.path:
                with open(os.path.join(path_out, study_info.info.path),
                          "wt",
                          newline="") as outputf:
                    StudyWriter.from_stream(studies[s],
                                            outputf,
                                            quote=args.quotes).write()
            for a, assay_info in enumerate(study_info.assays):
                if assay_info.path:
                    with open(os.path.join(path_out, assay_info.path),
                              "wt",
                              newline="") as outputf:
                        AssayWriter.from_stream(assays[s][a],
                                                outputf,
                                                quote=args.quotes).write()
def test_write_minimal_investigation(minimal_investigation_file, tmp_path):
    # Read Investigation from file-like object
    with pytest.warns(IsaWarning) as record:
        investigation = InvestigationReader.from_stream(
            minimal_investigation_file).read()
        InvestigationValidator(investigation).validate()
    # Check warnings
    assert 1 == len(record)
    # Write Investigation to temporary file
    path = tmp_path / "i_minimal.txt"
    with pytest.warns(IsaWarning) as record:
        with open(path, "wt") as file:
            InvestigationWriter.from_stream(investigation,
                                            file,
                                            lineterminator="\n").write()
    # Check warnings
    assert 6 == len(record)
    msg = "No reference headers available for section STUDY PUBLICATIONS. Applying default order."
    assert record[3].category == WriteIsatabWarning
    assert str(record[3].message) == msg
    # Compare input and output
    assert filecmp.cmp(minimal_investigation_file.name, path, shallow=False)
def test_write_full_investigation(full_investigation_file, tmp_path):
    # Read Investigation from file-like object
    investigation = InvestigationReader.from_stream(
        full_investigation_file).read()
    InvestigationValidator(investigation).validate()
    # Write Investigation to temporary file
    path1 = tmp_path / "i_fullinvest.txt"
    with open(path1, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Read Investigation from temporary file
    with open(path1, "rt") as file:
        reader = InvestigationReader.from_stream(file)
        investigation = reader.read()
    # Write Investigation to second temporary file
    path2 = tmp_path / "i_fullinvest_2.txt"
    with open(path2, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Compare input and output
    assert filecmp.cmp(path1, path2, shallow=False)
def test_write_assays_investigation(assays_investigation_file, tmp_path):
    # Read Investigation from file-like object
    investigation = InvestigationReader.from_stream(
        assays_investigation_file).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()
    # Check warnings
    assert 5 == len(record)
    # Write Investigation to temporary file
    path1 = tmp_path / "i_assays.txt"
    with pytest.warns(IsaWarning) as record:
        with open(path1, "wt") as file:
            InvestigationWriter.from_stream(investigation,
                                            file,
                                            lineterminator="\n").write()
    # Check warnings
    assert 12 == len(record)
    # Read Investigation from temporary file
    with open(path1, "rt") as file:
        reader = InvestigationReader.from_stream(file)
        investigation = reader.read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()
    # Check warnings
    assert 5 == len(record)
    # Write Investigation to second temporary file
    path2 = tmp_path / "i_assays_2.txt"
    with pytest.warns(IsaWarning) as record:
        with open(path2, "wt") as file:
            InvestigationWriter.from_stream(investigation,
                                            file,
                                            lineterminator="\n").write()
    # Check warnings
    assert 12 == len(record)
    # Compare input and output
    assert filecmp.cmp(path1, path2, shallow=False)
Example #10
0
def create_and_write(out_path):
    """Create an investigation with a study and assay and write to ``output_path``."""

    # Prepare one or more study sections
    # Prepare basic study information
    study_info = models.BasicInfo(
        path="s_minimal.txt",
        identifier="s_minimal",
        title="Germline Study",
        description=None,
        submission_date=None,
        public_release_date=None,
        comments=(
            models.Comment(name="Study Grant Number", value=None),
            models.Comment(name="Study Funding Agency", value=None),
        ),
        headers=[],
    )

    # Create one or more assays
    assay_01 = models.AssayInfo(
        measurement_type=models.OntologyTermRef(
            name="exome sequencing assay",
            accession="http://purl.obolibrary.org/obo/OBI_0002118",
            ontology_name="OBI",
        ),
        technology_type=models.OntologyTermRef(
            name="nucleotide sequencing",
            accession="http://purl.obolibrary.org/obo/OBI_0000626",
            ontology_name="OBI",
        ),
        platform=None,
        path="a_minimal.txt",
        comments=(),
        headers=[],
    )

    # Prepare one or more protocols
    protocol_01 = models.ProtocolInfo(
        name="sample collection",
        type=models.OntologyTermRef(name="sample collection"),
        description=None,
        uri=None,
        version=None,
        parameters={},
        components={},
        comments=(),
        headers=[],
    )
    protocol_02 = models.ProtocolInfo(
        name="nucleic acid sequencing",
        type=models.OntologyTermRef(name="nucleic acid sequencing"),
        description=None,
        uri=None,
        version=None,
        parameters={},
        components={},
        comments=(),
        headers=[],
    )

    # Create study object
    study_01 = models.StudyInfo(
        info=study_info,
        designs=(),
        publications=(),
        factors={},
        assays=(assay_01, ),
        protocols={
            protocol_01.name: protocol_01,
            protocol_02.name: protocol_02
        },
        contacts=(),
    )

    # Prepare other investigation section
    # Prepare one or more ontology term source references
    onto_ref_01 = models.OntologyRef(
        name="OBI",
        file="http://data.bioontology.org/ontologies/OBI",
        version="31",
        description="Ontology for Biomedical Investigations",
        comments=(),
        headers=[],
    )

    # Prepare basic investigation information
    invest_info = models.BasicInfo(
        path="i_minimal.txt",
        identifier="i_minimal",
        title="Minimal Investigation",
        description=None,
        submission_date=None,
        public_release_date=None,
        comments=(),
        headers=[],
    )

    # Create investigation object
    investigation = models.InvestigationInfo(
        ontology_source_refs={onto_ref_01.name: onto_ref_01},
        info=invest_info,
        publications=(),
        contacts=(),
        studies=(study_01, ),
    )

    # Validate investigation
    InvestigationValidator(investigation).validate()

    # Write the investigation as ISA-Tab txt file
    with open(join(out_path, investigation.info.path), "wt",
              newline="") as outputf:
        InvestigationWriter.from_stream(investigation=investigation,
                                        output_file=outputf).write()

    # Create a corresponding Study graph

    # Create at least on source, one sample and one collection process
    # Unique names are required for unambiguous node identification
    source_01 = models.Material(
        type="Source Name",
        unique_name="S1-source-0815",
        name="0815",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SOURCE_NAME],
    )

    sample_01 = models.Material(
        type="Sample Name",
        unique_name="S1-sample-0815-N1",
        name="0815-N1",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SAMPLE_NAME],
    )

    process_01 = models.Process(
        protocol_ref="sample collection",
        unique_name="S1-sample collection-2-1",
        name=None,
        name_type=None,
        date=None,
        performer=None,
        parameter_values=(),
        comments=(),
        array_design_ref=None,
        first_dimension=None,
        second_dimension=None,
        headers=[table_headers.PROTOCOL_REF],
    )

    # Create the arcs to connect the material and process nodes, referenced by the unique name
    arc_01 = models.Arc(tail="S1-source-0815", head="S1-sample collection-2-1")
    arc_02 = models.Arc(tail="S1-sample collection-2-1",
                        head="S1-sample-0815-N1")

    # Create the study graph object
    study_graph_01 = models.Study(
        file=investigation.studies[0].info.path,
        header=None,
        materials={
            source_01.unique_name: source_01,
            sample_01.unique_name: sample_01
        },
        processes={process_01.unique_name: process_01},
        arcs=(arc_01, arc_02),
    )

    # Validate study graph
    StudyValidator(investigation=investigation,
                   study_info=investigation.studies[0],
                   study=study_graph_01).validate()

    # Write the study as ISA-Tab txt file
    with open(join(out_path, investigation.studies[0].info.path),
              "wt",
              newline="") as outputf:
        StudyWriter.from_stream(study_or_assay=study_graph_01,
                                output_file=outputf).write()

    # Create a corresponding Assay graph

    # Create at least on samples, one output material and one collection process
    # Unique names are required for unambiguous node identification
    # Explicit header definition per node is currently required to enable export to ISA-Tab
    sample_01 = models.Material(
        type="Sample Name",
        unique_name="S1-sample-0815-N1",
        name="0815-N1",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SAMPLE_NAME],
    )

    data_file_01 = models.Material(
        type="Raw Data File",
        unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        name="0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.RAW_DATA_FILE],
    )

    data_file_02 = models.Material(
        type="Raw Data File",
        unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        name="0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.RAW_DATA_FILE],
    )

    process_01 = models.Process(
        protocol_ref="nucleic acid sequencing",
        unique_name="S1-A1-0815-N1-DNA1-WES1-3",
        name="0815-N1-DNA1-WES1",
        name_type="Assay Name",
        date=None,
        performer=None,
        parameter_values=(),
        comments=(),
        array_design_ref=None,
        first_dimension=None,
        second_dimension=None,
        headers=[table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )

    # Create the arcs to connect the material and process nodes, referenced by the unique name
    arcs = (
        models.Arc(tail="S1-sample-0815-N1", head="S1-A1-0815-N1-DNA1-WES1-3"),
        models.Arc(
            tail="S1-A1-0815-N1-DNA1-WES1-3",
            head="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        ),
        models.Arc(
            tail="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
            head="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        ),
    )

    # Create the assay graph object
    assay_graph_01 = models.Assay(
        file=investigation.studies[0].assays[0].path,
        header=None,
        materials={
            sample_01.unique_name: sample_01,
            data_file_01.unique_name: data_file_01,
            data_file_02.unique_name: data_file_02,
        },
        processes={process_01.unique_name: process_01},
        arcs=arcs,
    )

    # Validate assay graph
    AssayValidator(
        investigation=investigation,
        study_info=investigation.studies[0],
        assay_info=investigation.studies[0].assays[0],
        assay=assay_graph_01,
    ).validate()

    # Write the assay as ISA-Tab txt file
    with open(join(out_path, investigation.studies[0].assays[0].path),
              "wt",
              newline="") as outputf:
        AssayWriter.from_stream(study_or_assay=assay_graph_01,
                                output_file=outputf).write()