Ejemplo n.º 1
0
def test_obsolete_term_repair_withfrom():

    vals = ["ZFIN",
            "ZFIN:ZDB-GENE-980526-362",
            "acts_upstream_of_or_within",
            "GO:0007155",
            "PMID:15494018",
            "ECO:0000305",
            "GO:0005913|GO:1,GO:4|ZFIN:ZDB-MRPHLNO-010101-1,MGI:1232453",
            "",
            "20041026",
            "ZFIN",
            "",
            "contributor=GOC:zfin_curators|model-state=production|noctua-model-id=gomodel:ZFIN_ZDB-GENE-980526-362"
            ]
    ont = OntologyFactory().create(ALT_ID_ONT)
    config = assocparser.AssocParserConfig(ontology=ont, rule_set=assocparser.RuleSet.ALL)
    parser = GpadParser(config=config)
    result = parser.parse_line("\t".join(vals))
    assoc = result.associations[0]
    # GO:0005913 should be repaired to its replacement term, GO:00005912
    assert [ConjunctiveSet(elements=[Curie(namespace='GO', identity='0005912')]),
            # repaired test GO elements
            ConjunctiveSet(elements=[Curie(namespace='GO', identity='2'), Curie(namespace='GO', identity='3')]),
            # non GO elements stay the same, could be obsolete or not
            ConjunctiveSet(elements=[Curie(namespace='ZFIN', identity='ZDB-MRPHLNO-010101-1'),
                                     Curie(namespace='MGI', identity='1232453')])] == assoc.evidence.with_support_from
Ejemplo n.º 2
0
def test_aspect_fill_for_obsolete_terms():
    # Test null aspect on an obsolete term
    # GO:4 is obsolete and has no aspect (hasOBONamespace) in obsolete.json ontology
    # GO:3 is it's replacement term
    # Note that GPAD lines contain no aspect data
    vals = [
        "MGI",
        "MGI:105128",
        "involved_in",
        "GO:4",
        "PMID:25901318",
        "ECO:0000314",
        "",
        "",
        "20190517",
        "MGI",
        "",
        "contributor=http://orcid.org/0000-0002-9796-7693|model-state=production|noctua-model-id=gomodel:5c4605cc00004132"
    ]
    ont = OntologyFactory().create(ALT_ID_ONT)
    config = assocparser.AssocParserConfig(ontology=ont, rule_set=assocparser.RuleSet.ALL)
    parser = GpadParser(config=config)
    result = parser.parse_line("\t".join(vals))
    assoc = result.associations[0]

    assert assoc.object.id == Curie("GO", "3")  # GO:4 should be repaired to its replacement term, GO:3
    assert assoc.aspect == 'P'  # Aspect should not be empty
def test_parse():
    p = GpadParser(config=assocparser.AssocParserConfig(
        group_metadata=yaml.load(open("tests/resources/mgi.dataset.yaml"),
                                 Loader=yaml.FullLoader)))
    test_gpad_file = "tests/resources/mgi.test.gpad"
    results = p.parse(open(test_gpad_file, "r"))
    print(p.report.to_markdown())
Ejemplo n.º 4
0
def test_skim_gpad():
    p = GpadParser()
    p.config.ecomap = EcoMap()
    results = p.skim(open(POMBASE_GPAD, "r"))
    assert len(results) == 1984
    for r in results:
        print(str(r))
        (s, sn, o) = r
        assert o.startswith('GO:')
        assert s.startswith('PomBase:') or s.startswith('PR:')
Ejemplo n.º 5
0
def test_invalid_goid_in_gpad():
    # Note: this ontology is a subset of GO extracted using the GAF, not GPAD
    p = GpadParser()
    p.config.ontology = OntologyFactory().create(ONT)
    results = p.parse(open(POMBASE_GPAD, "r"), skipheader=True)

    # we expect errors since ONT is not tuned for the GPAD file
    # for m in p.report.messages:
    #     print("MESSAGE: {}".format(m))
    assert len(p.report.messages) > 500
    print(p.report.to_markdown())
def filter_rule_validate_lines(annots, assoc_filter):
    filtered = []
    # Converts split GPAD line into ontobio assoc obj for passing into standard FilterRule validation
    gpad_parser = GpadParser()
    for a in annots:
        parse_result = gpad_parser.parse_line("\t".join(a))
        if len(parse_result.associations) > 0:
            # Right now, GpadParser only returns 0 or 1 associations
            assoc = parse_result.associations[0]
            assoc = extract_properties(assoc)
            if "annotation_properties" in assoc:
                a.append(assoc["annotation_properties"])
            if assoc_filter.validate_line(assoc):
                filtered.append(a)
    return filtered
Ejemplo n.º 7
0
    def create_from_file(self, file=None, fmt='gaf', skim=True, **args):
        """
        Creates from a file.

        Arguments
        ---------
        file : str or file
            input file or filename
        format : str
            name of format e.g. gaf

        """
        p = None
        if fmt == 'gaf':
            p = GafParser()
        elif fmt == 'gpad':
            p = GpadParser()
        elif fmt == 'hpoa':
            p = HpoaParser()
        else:
            logging.error("Format not recognized: {}".format(fmt))

        logging.info("Parsing {} with {}/{}".format(file, fmt, p))
        if skim:
            results = p.skim(file)
            return self.create_from_tuples(results, **args)
        else:
            assocs = p.parse(file, skipheader=True)
            return self.create_from_assocs(assocs, **args)
Ejemplo n.º 8
0
def test_unmapped_eco_to_gaf_codes():
    # By default, ECO codes in GPAD need to be convertible to an ECO GAF code (e.g. IDA, ISO)
    vals = [
        "MGI",
        "MGI:88276",
        "is_active_in",
        "GO:0098831",
        "PMID:8909549",
        "ECO:0000164",
        "",
        "",
        "20180711",
        "SynGO",
        "part_of(UBERON:0000956)",
        ""
    ]
    parser = GpadParser(config=assocparser.AssocParserConfig())
    result = parser.parse_line("\t".join(vals))
    assert len(result.associations) == 0
    messages = parser.report.messages
    assert messages[0]["type"] == parser.report.UNKNOWN_EVIDENCE_CLASS

    parser.config.allow_unmapped_eco = True
    result = parser.parse_line("\t".join(vals))
    assert len(result.associations) == 1

    parser.config.allow_unmapped_eco = False
    vals[5] = "ECO:0000314"  # maps to IDA
    result = parser.parse_line("\t".join(vals))
    assert len(result.associations) == 1

    vals[5] = "ECO:0006003"  # indirectly maps to IDA via gaf-eco-mapping-derived.txt
    result = parser.parse_line("\t".join(vals))
    assert len(result.associations) == 1
Ejemplo n.º 9
0
def test_convert_gaf_to_gpad():
    p = GafParser()
    p.config.ecomap = EcoMap()
    w = GpadWriter()
    p2 = GpadParser()
    convert(POMBASE, p, w, p2)
Ejemplo n.º 10
0
def test_skim():
    p = GpadParser()
    results = p.skim(open(POMBASE, "r"))
    print(str(results))
Ejemplo n.º 11
0
def test_parse():
    p = GpadParser()
    results = p.parse(open(POMBASE, "r"))
    for r in results:
        print(str(r))
Ejemplo n.º 12
0
import logging
from typing import List
from ontobio.io import assocparser
from ontobio.io.gpadparser import GpadParser
from ontobio.model.association import GoAssociation, Date
from ontobio.rdfgen.gocamgen import errors
from ontobio import ecomap

logger = logging.getLogger(__name__)

ecomapping = ecomap.EcoMap()
ipi_eco = ecomapping.coderef_to_ecoclass("IPI")

GPAD_PARSER = GpadParser()
BINDING_ROOT = "GO:0005488"  # binding
IPI_ECO_CODE = ipi_eco


class GoAssocWithFrom:
    """
    Separate with/from column values into
    header vs line arrangement.
    Used for explicit placement in
    annotation assertions.
    """
    def __init__(self, header=None, line=None):
        if header is None:
            header = []
        if line is None:
            line = []
        self.header = sorted(header)
Ejemplo n.º 13
0
                        match_score = 4
                        if sorted(r.upper() for r in source['evidence']['has_supporting_reference']) == \
                                sorted(r.upper() for r in target['evidence']['has_supporting_reference']):
                            match_score = 5
    return match_score


if __name__ == '__main__':
    f = open("compare.txt", "w")
    print("Starting comparison ")
    parser = argparse.ArgumentParser()
    parser.add_argument('-g1', '--gpad_file1', help="Filepath of GPAD file 1", required=True)
    parser.add_argument('-g2', '--gpad_file2', help="Filepath of GPAD file 2", required=True)
    args = parser.parse_args()

    gpad_parser = GpadParser()
    assocs1 = gpad_parser.parse(args.gpad_file1, skipheader=True)
    assocs2 = gpad_parser.parse(args.gpad_file2, skipheader=True)
    for a in assocs1:
        #gene_id_a = a["subject"]["id"]
        #ont_id_a = a["object"]["id"]
        #print("a" + gene_id_a + " "+ont_id_a)
        match = is_assoc_in_list(a, assocs2)
        if match.__eq__("exact match"):
            exact_matches = exact_matches + 1
        elif match.__eq__("close match"):
            close_matches = close_matches + 1
        elif match.__eq__("no match"):
            no_matches = no_matches + 1
        f.write(match + "\t" + a["source_line"])
        input_lines = input_lines + 1
Ejemplo n.º 14
0
 def __init__(self, gpad_file, filter_rule: FilterRule):
     gpad_parser = GpadParser()
     assocs = gpad_parser.parse(gpad_file, skipheader=True)
     self.assocs = extract_properties_from_assocs(assocs)
     self.assoc_filter = AssocFilter(filter_rule)
Ejemplo n.º 15
0
def test_parse_gpad():
    parse_with(POMBASE_GPAD, GpadParser())
Ejemplo n.º 16
0
def main():
    """
    Wrapper for Assoc Parsing
    """
    parser = argparse.ArgumentParser(
        description='Wrapper for obographs assocmodel library'
        """
                                                 By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint
                                                 """,
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-r',
                        '--resource',
                        type=str,
                        required=False,
                        help='Name of ontology')
    parser.add_argument(
        '-f',
        '--file',
        type=str,
        required=False,
        help='Name of input file for associations - currently GAF is assumed')
    parser.add_argument('-F',
                        '--format',
                        type=str,
                        required=False,
                        help='Format of assoc file. One of GAF, GPAD or HPOA')
    parser.add_argument('-o',
                        '--outfile',
                        type=str,
                        required=False,
                        help='Path to output file')
    parser.add_argument('-m',
                        '--messagefile',
                        type=str,
                        required=False,
                        help='Path to messages (report) markdown file')
    parser.add_argument('-t',
                        '--to',
                        type=str,
                        required=False,
                        help='Output to (tree, dot, ...)')
    parser.add_argument(
        "--filter-out",
        nargs="+",
        required=False,
        default=[],
        metavar="EVIDENCE",
        help=
        "List of any evidence codes to filter out of the GAF. E.G. --filter-out IEA IMP"
    )
    parser.add_argument("--filtered-file",
                        required=False,
                        default=None,
                        metavar="FILTERED_FILE",
                        help="File to write the filtered out evidence GAF to")
    parser.add_argument(
        '-T',
        '--taxon',
        nargs='*',
        required=False,
        help='valid taxon (NCBITaxon ID) - validate against this')
    parser.add_argument('--subject_prefix',
                        nargs='*',
                        required=False,
                        help='E.g PomBase - validate against this')
    parser.add_argument('--object_prefix',
                        nargs='*',
                        required=False,
                        help='E.g GO - validate against this')
    parser.add_argument('-v',
                        '--verbosity',
                        default=0,
                        action='count',
                        help='Increase output verbosity')

    subparsers = parser.add_subparsers(dest='subcommand',
                                       help='sub-command help')

    parser_n = subparsers.add_parser('validate', help='Validate associations')
    parser_n.set_defaults(function=validate_assocs)

    parser_n = subparsers.add_parser('filter', help='Filter associations')
    parser_n.set_defaults(function=filter_assocs)

    parser_n = subparsers.add_parser('convert', help='Convert associations')
    parser_n.set_defaults(function=convert_assocs)
    parser_n.add_argument('-t',
                          '--to',
                          type=str,
                          required=True,
                          help='Format to convert to')

    parser_n = subparsers.add_parser('map2slim', help='Map to a subset/slim')
    parser_n.set_defaults(function=map2slim)
    parser_n.add_argument('-p',
                          '--properties',
                          nargs='*',
                          type=str,
                          required=False,
                          help='Properties')
    parser_n.add_argument('-s',
                          '--subset',
                          type=str,
                          required=True,
                          help='subset (e.g. map2slim)')

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)

    logging.info("Welcome!")

    handle = args.resource

    # Ontology Factory
    ofactory = OntologyFactory()
    logging.info("Creating ont object from: {} {}".format(handle, ofactory))
    ont = ofactory.create(handle)
    logging.info("ont: {}".format(ont))

    func = args.function

    # Upper case all evidence codes
    args.filter_out = [code.upper() for code in args.filter_out]

    # set configuration
    filtered_evidence_file = open(args.filtered_file,
                                  "w") if args.filtered_file else None
    config = assocparser.AssocParserConfig(
        valid_taxa=args.taxon,
        ontology=ont,
        class_idspaces=args.object_prefix,
        entity_idspaces=args.subject_prefix,
        filter_out_evidence=args.filter_out,
        filtered_evidence_file=filtered_evidence_file)
    p = None
    fmt = None
    if args.format is None:
        fmt = 'gaf'
    else:
        fmt = args.format.lower()

    # TODO: use a factory
    if fmt == 'gaf':
        p = GafParser()
    elif fmt == 'gpad':
        p = GpadParser()
    elif fmt == 'hpoa':
        p = HpoaParser()
    elif fmt == "gpi":
        p = entityparser.GpiParser()
        func = validate_entity

    p.config = config

    outfh = None
    if args.outfile is not None:
        two_mb = 2097152
        outfh = open(args.outfile, "w", buffering=two_mb)
    func(ont, args.file, outfh, p, args)
    if filtered_evidence_file:
        filtered_evidence_file.close()

    if outfh is not None:
        outfh.close()
    if args.messagefile is not None:
        mfh = open(args.messagefile, "w")
        mfh.write(p.report.to_markdown())
        mfh.close()
    else:
        print(p.report.to_markdown())
Ejemplo n.º 17
0
def main():
    """
    Wrapper for Assoc Parsing
    """
    parser = argparse.ArgumentParser(description='Wrapper for obographs assocmodel library'
                                                 """
                                                 By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint
                                                 """,
                                     formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-r', '--resource', type=str, required=False,
                        help='Name of ontology')
    parser.add_argument('-f', '--file', type=str, required=False,
                        help='Name of input file for associations - currently GAF is assumed')
    parser.add_argument('-F', '--format', type=str, required=False,
                        help='Format of assoc file. One of GAF, GPAD or HPOA')
    parser.add_argument('-o', '--outfile', type=str, required=False,
                        help='Path to output file')
    parser.add_argument("--report-md", type=str, required=False, dest="report_md",
                        help="Path to report markdown file")
    parser.add_argument("--report-json", type=str, required=False, dest="report_json",
                        help="Path to report JSON file")
    parser.add_argument('-t', '--to', type=str, required=False,
                        help='Output to (tree, dot, ...)')
    parser.add_argument("--filter-out", nargs="+", required=False, default=[], metavar="EVIDENCE",
                        help="List of any evidence codes to filter out of the GAF. E.G. --filter-out IEA IMP")
    parser.add_argument("--filtered-file", required=False, default=None, metavar="FILTERED_FILE",
                        help="File to write the filtered out evidence GAF to")
    parser.add_argument('-T', '--taxon', nargs='*', required=False,
                        help='valid taxon (NCBITaxon ID) - validate against this')
    parser.add_argument('--subject_prefix', nargs='*', required=False,
                        help='E.g PomBase - validate against this')
    parser.add_argument('--object_prefix', nargs='*', required=False,
                        help='E.g GO - validate against this')
    parser.add_argument("-I", "--gaferencer-file", type=argparse.FileType('r'), required=False,
                        help="Output from Gaferencer run on a set of GAF annotations")
    parser.add_argument('-v', '--verbosity', default=0, action='count',
                        help='Increase output verbosity')
    parser.add_argument("--allow_paint", required=False, action="store_const", const=True,
                        help="Allow IBAs in parser")
    parser.add_argument("-g", "--gpi", type=str, required=False, default=None,
                        help="GPI file")
    parser.add_argument("-l", "--rule", action="append", required=None, default=[], dest="rule_set",
                        help="Set of rules to be run. Default is no rules to be run, with the exception \
                            of gorule-0000027 and gorule-0000020. See command line documentation in the \
                                ontobio project or readthedocs for more information")


    subparsers = parser.add_subparsers(dest='subcommand', help='sub-command help')

    parser_n = subparsers.add_parser('validate', help='Validate associations')
    parser_n.set_defaults(function=validate_assocs)

    parser_n = subparsers.add_parser('filter', help='Filter associations')
    parser_n.set_defaults(function=filter_assocs)

    parser_n = subparsers.add_parser('convert', help='Convert associations')
    parser_n.set_defaults(function=convert_assocs)
    parser_n.add_argument('-t', '--to', type=str, required=True, choices=["GAF", "GPAD", "gaf", "gpad"],
                          help='Format to convert to')
    parser_n.add_argument("-n", "--format-version", dest="version", type=str, required=False, default=None,
                          help="Version for the file format. GAF default is 2.1, GPAD default is 1.2")

    parser_n = subparsers.add_parser('map2slim', help='Map to a subset/slim')
    parser_n.set_defaults(function=map2slim)
    parser_n.add_argument('-p', '--properties', nargs='*', type=str, default=['subClassOf', 'BFO:0000050'],
                          help='Properties')
    parser_n.add_argument('-s', '--subset', type=str, required=True,
                          help='subset (e.g. map2slim)')

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)

    logging.info("Welcome!")

    # Ontology Factory
    ont = None
    if args.resource is not None:
        ofactory = OntologyFactory()
        logging.info("Creating ont object from: {} {}".format(args.resource, ofactory))
        ont = ofactory.create(args.resource)
        logging.info("ont: {}".format(ont))


    func = args.function

    # Upper case all evidence codes
    args.filter_out = [code.upper() for code in args.filter_out]

    gaferences = None
    if args.gaferencer_file:
        gaferences = gaference.build_annotation_inferences(json.load(args.gaferencer_file))

    rule_set = args.rule_set
    if rule_set == ["all"]:
        rule_set = assocparser.RuleSet.ALL

    # set configuration
    filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None
    config = assocparser.AssocParserConfig(
        valid_taxa=args.taxon,
        ontology=ont,
        class_idspaces=args.object_prefix,
        entity_idspaces=args.subject_prefix,
        filter_out_evidence=args.filter_out,
        filtered_evidence_file=filtered_evidence_file,
        annotation_inferences=gaferences,
        paint=args.allow_paint,
        gpi_authority_path=args.gpi,
        rule_set=rule_set
    )
    p = None
    fmt = None
    if args.format is None:
        fmt = 'gaf'
    else:
        fmt = args.format.lower()

    # TODO: use a factory
    if fmt == 'gaf':
        p = GafParser(config=config, dataset=args.file)
    elif fmt == 'gpad':
        p = GpadParser(config=config)
    elif fmt == 'hpoa':
        p = HpoaParser(config=config)
    elif fmt == "gpi":
        p = entityparser.GpiParser()
        func = validate_entity

    outfh = None
    if args.outfile is not None:
        two_mb = 2097152
        outfh = open(args.outfile, "w", buffering=two_mb)
    func(ont, args.file, outfh, p, args)
    if filtered_evidence_file:
        filtered_evidence_file.close()

    if outfh is not None:
        outfh.close()

    if args.report_md is not None:
        report_md = open(args.report_md, "w")
        report_md.write(p.report.to_markdown())
        report_md.close()
    if args.report_json is not None:
        report_json = open(args.report_json, "w")
        report_json.write(json.dumps(p.report.to_report_json(), indent=4))
        report_json.close()
    if not (args.report_md or args.report_json):
        print(p.report.to_markdown())
Ejemplo n.º 18
0
def create_parser(config, group, dataset, format="gaf"):
    if format == "gpad":
        return GpadParser(config=config, group=group, dataset=dataset)
    else:
        # We assume it's gaf as we only support in this instant gaf and gpad
        return GafParser(config=config, group=group, dataset=dataset)
Ejemplo n.º 19
0
        for fname in os.listdir(args.dir):
            # print("Loading file:", fname)
            nono_in_fname = False
            for nono in d:
                if nono in fname:
                    nono_in_fname = True
            if fname.endswith(".tsv") or nono_in_fname:
                continue
            # filenames.append(args.dir + fname)
            filter_name = get_filter_name(fname)
            filenames[args.dir + fname] = get_filter_rule(filter_name)
            # data = data + GafParser().parse(fname, skipheader=True)

    # all_dict = {}
    extensions_mapper = ExtensionsMapper()
    gpad_parser = GpadParser()
    print("Creating extension dictionary...")
    ext_dict = {}
    ext_dict['F'] = {}
    ext_dict['P'] = {}
    ext_dict['C'] = {}
    for fname in filenames:
        with open(fname) as f:
            data = []
            print("Loading file:", fname)
            for l in f.readlines():
                if not l.startswith("!"):
                    parts = l.split("\t")
                    # if parts[15] != "" and parts[6] in acceptable_evidence_codes:
                    data.append(parts)
            print("# of GPAD lines in file:", len(data))