def _perform_update(self, isa, ped_donors): # Traverse investigation, studies, assays, potentially updating the nodes. donor_map = self._build_donor_map(ped_donors) visitor = SheetUpdateVisitor(donor_map, self.config) iwalker = isa_support.InvestigationTraversal(isa.investigation, isa.studies, isa.assays) iwalker.run(visitor) investigation, studies, assays = iwalker.build_evolved() # Add records to study and assay for donors not seen so far. todo_ped_donors = [ donor for donor in donor_map.values() if donor.name not in visitor.seen_source_names ] studies, assays = isa_germline_append_donors( studies, assays, tuple(todo_ped_donors), tuple(visitor.seen_sample_names), self.config ) new_isa = attr.evolve(isa, investigation=investigation, studies=studies, assays=assays) # Write ISA-tab into string buffers. io_investigation = io.StringIO() InvestigationWriter.from_stream(isa.investigation, io_investigation).write() ios_studies = {} for name, study in new_isa.studies.items(): ios_studies[name] = io.StringIO() StudyWriter.from_stream(study, ios_studies[name]).write() ios_assays = {} for name, assay in new_isa.assays.items(): ios_assays[name] = io.StringIO() AssayWriter.from_stream(assay, ios_assays[name]).write() # Write out updated ISA-tab files using the diff helper. i_path = pathlib.Path(self.config.input_investigation_file) overwrite_helper( i_path, io_investigation.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, ) for filename, ios_study in ios_studies.items(): overwrite_helper( i_path.parent / filename, ios_study.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, ) for filename, ios_assay in ios_assays.items(): overwrite_helper( i_path.parent / filename, ios_assay.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, )
def _perform_update(self, isa, annotation_map, header_map): # Traverse investigation, studies, assays, potentially updating the nodes. visitor = SheetUpdateVisitor( annotation_map, header_map, self.config.force_update, self.config.target_study, self.config.target_assay, ) iwalker = isa_support.InvestigationTraversal(isa.investigation, isa.studies, isa.assays) iwalker.run(visitor) investigation, studies, assays = iwalker.build_evolved() new_isa = attr.evolve(isa, investigation=investigation, studies=studies, assays=assays) # Write ISA-tab into string buffers. io_investigation = io.StringIO() InvestigationWriter.from_stream(isa.investigation, io_investigation).write() ios_studies = {} for name, study in new_isa.studies.items(): ios_studies[name] = io.StringIO() StudyWriter.from_stream(study, ios_studies[name]).write() ios_assays = {} for name, assay in new_isa.assays.items(): ios_assays[name] = io.StringIO() AssayWriter.from_stream(assay, ios_assays[name]).write() # Write out updated ISA-tab files using the diff helper. i_path = pathlib.Path(self.config.input_investigation_file) overwrite_helper( i_path, io_investigation.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, ) for filename, ios_study in ios_studies.items(): overwrite_helper( i_path.parent / filename, ios_study.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, ) for filename, ios_assay in ios_assays.items(): overwrite_helper( i_path.parent / filename, ios_assay.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, )
def _parse_write_assert_assay(investigation_file, tmp_path, quote=None, normalize=False, skip=None): # Load investigation investigation = InvestigationReader.from_stream(investigation_file).read() InvestigationValidator(investigation).validate() directory = os.path.normpath(os.path.dirname(investigation_file.name)) # Iterate assays for s, study_info in enumerate(investigation.studies): for a, assay_info in enumerate(study_info.assays): if skip and str(assay_info.path) in skip: continue # Load assay path_in = os.path.join(directory, assay_info.path) with open(path_in, "rt") as inputf: assay = AssayReader.from_stream("S{}".format(s + 1), "A{}".format(a + 1), inputf).read() AssayValidator(investigation, study_info, assay_info, assay).validate() # Write assay to temporary file path_out = tmp_path / assay_info.path with open(path_out, "wt", newline="") as file: AssayWriter.from_stream(assay, file, quote=quote).write() if normalize: # Read and write assay again path_in = path_out with open(path_out, "rt") as inputf: assay = AssayReader.from_stream("S{}".format(s + 1), "A{}".format(a + 1), inputf).read() AssayValidator(investigation, study_info, assay_info, assay).validate() path_out = tmp_path / (assay_info.path.name + "_b") with open(path_out, "wt", newline="") as file: AssayWriter.from_stream(assay, file, quote=quote).write() # Sort and compare input and output path_in_s = tmp_path / (assay_info.path.name + ".in.sorted") path_out_s = tmp_path / (assay_info.path.name + ".out.sorted") assert filecmp.cmp(sort_file(path_in, path_in_s), sort_file(path_out, path_out_s), shallow=False)
def run_writing(args, path_out, investigation, studies, assays): # Write investigation if args.output_investigation_file.name == "<stdout>": InvestigationWriter.from_stream(investigation, args.output_investigation_file, quote=args.quotes).write() else: with open(args.output_investigation_file.name, "wt", newline="") as outputf: InvestigationWriter.from_stream(investigation, outputf, quote=args.quotes).write() # Write studies and assays for s, study_info in enumerate(investigation.studies): if args.output_investigation_file.name == "<stdout>": if study_info.info.path: StudyWriter.from_stream(studies[s], args.output_investigation_file, quote=args.quotes).write() for a, assay_info in enumerate(study_info.assays): if assay_info.path: AssayWriter.from_stream(assays[s][a], args.output_investigation_file, quote=args.quotes).write() else: if study_info.info.path: with open(os.path.join(path_out, study_info.info.path), "wt", newline="") as outputf: StudyWriter.from_stream(studies[s], outputf, quote=args.quotes).write() for a, assay_info in enumerate(study_info.assays): if assay_info.path: with open(os.path.join(path_out, assay_info.path), "wt", newline="") as outputf: AssayWriter.from_stream(assays[s][a], outputf, quote=args.quotes).write()
def create_and_write(out_path): """Create an investigation with a study and assay and write to ``output_path``.""" # Prepare one or more study sections # Prepare basic study information study_info = models.BasicInfo( path="s_minimal.txt", identifier="s_minimal", title="Germline Study", description=None, submission_date=None, public_release_date=None, comments=( models.Comment(name="Study Grant Number", value=None), models.Comment(name="Study Funding Agency", value=None), ), headers=[], ) # Create one or more assays assay_01 = models.AssayInfo( measurement_type=models.OntologyTermRef( name="exome sequencing assay", accession="http://purl.obolibrary.org/obo/OBI_0002118", ontology_name="OBI", ), technology_type=models.OntologyTermRef( name="nucleotide sequencing", accession="http://purl.obolibrary.org/obo/OBI_0000626", ontology_name="OBI", ), platform=None, path="a_minimal.txt", comments=(), headers=[], ) # Prepare one or more protocols protocol_01 = models.ProtocolInfo( name="sample collection", type=models.OntologyTermRef(name="sample collection"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) protocol_02 = models.ProtocolInfo( name="nucleic acid sequencing", type=models.OntologyTermRef(name="nucleic acid sequencing"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) # Create study object study_01 = models.StudyInfo( info=study_info, designs=(), publications=(), factors={}, assays=(assay_01, ), protocols={ protocol_01.name: protocol_01, protocol_02.name: protocol_02 }, contacts=(), ) # Prepare other investigation section # Prepare one or more ontology term source references onto_ref_01 = models.OntologyRef( name="OBI", file="http://data.bioontology.org/ontologies/OBI", version="31", description="Ontology for Biomedical Investigations", comments=(), headers=[], ) # Prepare basic investigation information invest_info = models.BasicInfo( path="i_minimal.txt", identifier="i_minimal", title="Minimal Investigation", description=None, submission_date=None, public_release_date=None, comments=(), headers=[], ) # Create investigation object investigation = models.InvestigationInfo( ontology_source_refs={onto_ref_01.name: onto_ref_01}, info=invest_info, publications=(), contacts=(), studies=(study_01, ), ) # Validate investigation InvestigationValidator(investigation).validate() # Write the investigation as ISA-Tab txt file with open(join(out_path, investigation.info.path), "wt", newline="") as outputf: InvestigationWriter.from_stream(investigation=investigation, output_file=outputf).write() # Create a corresponding Study graph # Create at least on source, one sample and one collection process # Unique names are required for unambiguous node identification source_01 = models.Material( type="Source Name", unique_name="S1-source-0815", name="0815", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SOURCE_NAME], ) sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) process_01 = models.Process( protocol_ref="sample collection", unique_name="S1-sample collection-2-1", name=None, name_type=None, date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arc_01 = models.Arc(tail="S1-source-0815", head="S1-sample collection-2-1") arc_02 = models.Arc(tail="S1-sample collection-2-1", head="S1-sample-0815-N1") # Create the study graph object study_graph_01 = models.Study( file=investigation.studies[0].info.path, header=None, materials={ source_01.unique_name: source_01, sample_01.unique_name: sample_01 }, processes={process_01.unique_name: process_01}, arcs=(arc_01, arc_02), ) # Validate study graph StudyValidator(investigation=investigation, study_info=investigation.studies[0], study=study_graph_01).validate() # Write the study as ISA-Tab txt file with open(join(out_path, investigation.studies[0].info.path), "wt", newline="") as outputf: StudyWriter.from_stream(study_or_assay=study_graph_01, output_file=outputf).write() # Create a corresponding Assay graph # Create at least on samples, one output material and one collection process # Unique names are required for unambiguous node identification # Explicit header definition per node is currently required to enable export to ISA-Tab sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) data_file_01 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", name="0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) data_file_02 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", name="0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) process_01 = models.Process( protocol_ref="nucleic acid sequencing", unique_name="S1-A1-0815-N1-DNA1-WES1-3", name="0815-N1-DNA1-WES1", name_type="Assay Name", date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arcs = ( models.Arc(tail="S1-sample-0815-N1", head="S1-A1-0815-N1-DNA1-WES1-3"), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1-3", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", ), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", ), ) # Create the assay graph object assay_graph_01 = models.Assay( file=investigation.studies[0].assays[0].path, header=None, materials={ sample_01.unique_name: sample_01, data_file_01.unique_name: data_file_01, data_file_02.unique_name: data_file_02, }, processes={process_01.unique_name: process_01}, arcs=arcs, ) # Validate assay graph AssayValidator( investigation=investigation, study_info=investigation.studies[0], assay_info=investigation.studies[0].assays[0], assay=assay_graph_01, ).validate() # Write the assay as ISA-Tab txt file with open(join(out_path, investigation.studies[0].assays[0].path), "wt", newline="") as outputf: AssayWriter.from_stream(study_or_assay=assay_graph_01, output_file=outputf).write()