Example #1
0
def validate_assocs(ont, file, outfile, p, args):
    gafwriter = GafWriter(file=outfile)

    with open(file) as gafsource:
        associations = p.association_generator(file=gafsource)
        for assoc in associations:
            gafwriter.write_assoc(assoc)
Example #2
0
def produce_gaf(dataset, source_gaf, ontology_graph, gpipath=None, paint=False, group="unknown"):
    filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w")

    config = assocparser.AssocParserConfig(
        ontology=ontology_graph,
        filter_out_evidence=["IEA"],
        filtered_evidence_file=filtered_associations,
        gpi_authority_path=gpipath,
        paint=paint
    )
    validated_gaf_path = os.path.join(os.path.split(source_gaf)[0], "{}_valid.gaf".format(dataset))
    outfile = open(validated_gaf_path, "w")
    gafwriter = GafWriter(file=outfile)

    click.echo("Validating source GAF: {}".format(source_gaf))
    parser = GafParser(config=config, group=group, dataset=dataset)
    with open(source_gaf) as sg:
        lines = sum(1 for line in sg)

    with open(source_gaf) as gaf:
        with click.progressbar(iterable=parser.association_generator(file=gaf), length=lines) as associations:
            for assoc in associations:
                gafwriter.write_assoc(assoc)

    outfile.close()
    filtered_associations.close()

    with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.md".format(dataset)), "w") as report_md:
        report_md.write(parser.report.to_markdown())

    with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.json".format(dataset)), "w") as report_json:
        report_json.write(json.dumps(parser.report.to_report_json(), indent=4))

    return [validated_gaf_path, filtered_associations.name]
Example #3
0
def produce_gaf(dataset, source_gaf, ontology_graph, gpipath=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None):
    filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w")

    config = assocparser.AssocParserConfig(
        ontology=ontology_graph,
        filter_out_evidence=["IEA"],
        filtered_evidence_file=filtered_associations,
        gpi_authority_path=gpipath,
        paint=paint,
        rule_metadata=rule_metadata,
        goref_metadata=goref_metadata,
        entity_idspaces=db_entities,
        group_idspace=group_idspace,
        suppress_rule_reporting_tags=suppress_rule_reporting_tags,
        annotation_inferences=annotation_inferences
    )
    split_source = os.path.split(source_gaf)[0]
    validated_gaf_path = os.path.join(split_source, "{}_valid.gaf".format(dataset))
    outfile = open(validated_gaf_path, "w")
    gafwriter = GafWriter(file=outfile, source=dataset)

    click.echo("Validating source {}: {}".format(format, source_gaf))
    parser = create_parser(config, group, dataset, format)
    with open(source_gaf) as sg:
        lines = sum(1 for line in sg)

    with open(source_gaf) as gaf:
        with click.progressbar(iterable=parser.association_generator(file=gaf), length=lines) as associations:
            for assoc in associations:
                gafwriter.write_assoc(assoc)

    outfile.close()
    filtered_associations.close()

    with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.md".format(dataset)), "w") as report_md:
        report_md.write(parser.report.to_markdown())

    with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.json".format(dataset)), "w") as report_json:
        report_json.write(json.dumps(parser.report.to_report_json(), indent=4))

    return [validated_gaf_path, filtered_associations.name]
Example #4
0
def infer(ontology_path, target, gaf):
    ontology_graph = ontology(ontology_path)

    writer = GafWriter(file=target)
    assoc_generator = gafparser_generator(ontology_graph, gaf)
    line_count = 0
    for association in assoc_generator:
        if association["relation"]["id"] != ENABLES:
            continue
            # Skip all non enables annotations

        inferred_associations = materialize_inferences(ontology_graph,
                                                       association)
        if len(inferred_associations) > 0:
            click.echo("Materialized {} associations".format(
                len(inferred_associations)))

        for inferred in inferred_associations:
            writer.write_assoc(inferred)

        line_count += 1
        if line_count % 100 == 0:
            click.echo("Processed {} lines".format(line_count))
Example #5
0
def produce_gaf(dataset,
                source_gaf,
                ontology_graph,
                gpipath=None,
                paint=False,
                group="unknown",
                rule_metadata=None,
                goref_metadata=None,
                db_entities=None,
                group_idspace=None,
                format="gaf",
                suppress_rule_reporting_tags=[],
                annotation_inferences=None,
                group_metadata=None,
                extensions_constraints=None,
                rule_contexts=[]):
    filtered_associations = open(
        os.path.join(
            os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w")
    config = assocparser.AssocParserConfig(
        ontology=ontology_graph,
        filter_out_evidence=["IEA"],
        filtered_evidence_file=filtered_associations,
        gpi_authority_path=gpipath,
        paint=paint,
        rule_metadata=rule_metadata,
        goref_metadata=goref_metadata,
        entity_idspaces=db_entities,
        group_idspace=group_idspace,
        suppress_rule_reporting_tags=suppress_rule_reporting_tags,
        annotation_inferences=annotation_inferences,
        group_metadata=group_metadata,
        extensions_constraints=extensions_constraints,
        rule_contexts=rule_contexts)
    logger.info("Producing {}".format(source_gaf))
    # logger.info("AssocParserConfig used: {}".format(config))
    split_source = os.path.split(source_gaf)[0]
    validated_gaf_path = os.path.join(split_source,
                                      "{}_valid.gaf".format(dataset))
    outfile = open(validated_gaf_path, "w")
    gafwriter = GafWriter(file=outfile, source=dataset)

    click.echo("Validating source {}: {}".format(format, source_gaf))
    parser = create_parser(config, group, dataset, format)
    with open(source_gaf) as sg:
        lines = sum(1 for line in sg)

    with open(source_gaf) as gaf:
        with click.progressbar(iterable=parser.association_generator(file=gaf),
                               length=lines) as associations:
            for assoc in associations:
                gafwriter.write_assoc(assoc)

    outfile.close()
    filtered_associations.close()

    report_markdown_path = os.path.join(
        os.path.split(source_gaf)[0], "{}.report.md".format(dataset))
    logger.info(
        "About to write markdown report to {}".format(report_markdown_path))
    with open(report_markdown_path, "w") as report_md:
        logger.info("Opened for writing {}".format(report_markdown_path))
        report_md.write(parser.report.to_markdown())

    logger.info("markdown {} written out".format(report_markdown_path))
    logger.info("Markdown current stack:")
    if logger.getEffectiveLevel() == logging.INFO:
        traceback.print_stack()

    report_json_path = os.path.join(
        os.path.split(source_gaf)[0], "{}.report.json".format(dataset))
    logger.info("About to write json report to {}".format(report_json_path))
    with open(report_json_path, "w") as report_json:
        logger.info("Opened for writing {}".format(report_json_path))
        report_json.write(json.dumps(parser.report.to_report_json(), indent=4))

    logger.info("json {} written out".format(report_markdown_path))
    logger.info("gorule-13 first 10 messages: {}".format(
        json.dumps(parser.report.to_report_json()["messages"].get(
            "gorule-0000013", [])[:10],
                   indent=4)))
    logger.info("json current Stack:")
    if logger.getEffectiveLevel() == logging.INFO:
        traceback.print_stack()

    return [validated_gaf_path, filtered_associations.name]
Example #6
0
    def find_iea_iba_matches(self, filename):
        all_bp_ev_counts = {}
        grouped_annots = {}
        leftover_annots = []
        log(filename)
        mod_annots = self.parser.parse(filename, skipheader=True)
        self.all_annots = self.all_annots + mod_annots
        for a in mod_annots:
            term = a["object"]["id"]
            aspect = a["aspect"]
            if aspect == "P" or aspect == "F":
                grouped_annots, using_annot = file_away(grouped_annots, a)
                if aspect == "P":
                    if not using_annot:
                        leftover_annots.append(a)
                    evidence_code = a["evidence"]["type"]
                    if evidence_code not in all_bp_ev_counts:
                        all_bp_ev_counts[evidence_code] = 1
                    else:
                        all_bp_ev_counts[evidence_code] += 1

        dismissed_annots = []
        # match_rows = []
        base_f = os.path.basename(f)
        match_outfile = base_f + "_matches.tsv"
        if args.match_output_suffix:
            match_outfile = "{}.{}.tsv".format(base_f,
                                               args.match_output_suffix)
        with open(match_outfile, 'w') as mof:
            writer = csv.writer(mof, delimiter="\t")
            for ec in grouped_annots:
                match_rows = []
                ### For each evi_code, count unique annots that have with_matches (flatten dict)
                log("BP {} withs count: {}".format(
                    ec,
                    len(
                        match_aspect(
                            flatten_with_dict(grouped_annots[ec],
                                              uniqify=True), 'P'))))
                ### Loop through with_value annots, segregate BPs from MFs, if len(BPs) > 0 and len(MFs) > 0 this with_value set gets written out
                for with_value in grouped_annots[ec]:
                    bp_annots = match_aspect(grouped_annots[ec][with_value],
                                             'P')
                    mf_annots = match_aspect(grouped_annots[ec][with_value],
                                             'F')
                    if len(bp_annots) < 1:
                        grouped_annots[ec][with_value] = []  # Delete this key
                    elif len(mf_annots) < 1:
                        dismissed_annots = dismissed_annots + bp_annots  # Cleanup (uniqify, remove annots promoted elsewhere) later
                        grouped_annots[ec][with_value] = []  # Delete this key
                    else:  # Continue on promoting
                        for a in bp_annots:
                            gene_id = a["subject"]["id"]
                            gene_id_bits = gene_id.split(":")
                            id_ns = gene_id_bits[0]
                            id = gene_id_bits[-1]
                            # Find 'with-matched' MF annotations to same gene product
                            gene_mf_annots = annots_by_subject(
                                mf_annots, gene_id)
                            if len(gene_mf_annots) == 0:
                                # Should probably add this BP annot back to unused list
                                if a not in leftover_annots:
                                    leftover_annots.append(a)
                                continue

                            gene_symbol = a["subject"]["label"]
                            relation = first_qualifier(a)
                            bp_term = a["object"]["id"]
                            bp_term_label = self.ontology.label(bp_term)
                            bp_evidence_code = a["evidence"]["type"]
                            bp_reference = ",".join(
                                a["evidence"]["has_supporting_reference"])
                            bp_assigned_by = a["provided_by"]
                            for mfa in gene_mf_annots:
                                mf_term = mfa["object"]["id"]
                                mf_term_label = self.ontology.label(mf_term)
                                mf_evidence_code = mfa["evidence"]["type"]
                                mf_reference = ",".join(
                                    mfa["evidence"]
                                    ["has_supporting_reference"])
                                mf_assigned_by = mfa["provided_by"]
                                out_fields = [
                                    with_value, id_ns, id, gene_symbol,
                                    relation, bp_term, bp_term_label,
                                    bp_evidence_code, bp_reference,
                                    bp_assigned_by, mf_term, mf_term_label,
                                    mf_evidence_code, mf_reference,
                                    mf_assigned_by
                                ]
                                match_rows.append(out_fields)
                match_rows.sort(key=lambda k: k[2])
                for mr in match_rows:
                    writer.writerow(mr)
        # print("Total:", len(all_annots))
        # print("Leftovers:", len(leftover_annots))

        all_promoted_annots = []
        for ev in grouped_annots:
            promoted_bp_annots = match_aspect(
                flatten_with_dict(grouped_annots[ev], uniqify=True), 'P')
            all_promoted_annots = all_promoted_annots + promoted_bp_annots
            log("{} {} BP annotations inputted".format(all_bp_ev_counts[ev],
                                                       ev))
            # 5000 IEA BP annotations ‘involved in’
            log("{} {} BP annotations ‘involved in’".format(
                len(promoted_bp_annots), ev))
        # self.all_promoted_annots[filename] = all_promoted_annots
        self.all_promoted_annots = self.all_promoted_annots + all_promoted_annots

        ## Something going on below is super slow!
        ### Cleanup leftovers
        for da in dismissed_annots:
            if da not in leftover_annots and da not in all_promoted_annots:
                leftover_annots.append(da)

        # print("Leftovers:", len(leftover_annots))
        log("Leftovers: {}".format(len(leftover_annots)))
        outfile = base_f + "_leftovers.gaf"
        if args.leftover_output:
            outfile = "{}.{}_leftovers.gaf".format(base_f,
                                                   args.leftover_output)
        with open(outfile, "w") as lf:
            gaf_writer = GafWriter(lf)
            for a in leftover_annots:
                gaf_writer.write_assoc(a)