Esempio n. 1
0
    def __init__(self, config=None, group="unknown", dataset="unknown"):
        """
        Arguments:
        ---------

        config : a AssocParserConfig object
        """
        if config is None:
            config = assocparser.AssocParserConfig()
        self.config = config
        self.report = assocparser.Report(group=group, dataset=dataset)
        self.gpi = None
        if self.config.gpi_authority_path is not None:
            self.gpi = dict()
            parser = entityparser.GpiParser()
            with open(self.config.gpi_authority_path) as gpi_f:
                entities = parser.parse(file=gpi_f)
                for entity in entities:
                    self.gpi[entity["id"]] = {
                        "symbol": entity["label"],
                        "name": entity["full_name"],
                        "synonyms": entitywriter.stringify(entity["synonyms"]),
                        "type": entity["type"]
                    }

                print("Loaded {} entities from {}".format(len(self.gpi.keys()), self.config.gpi_authority_path))
Esempio n. 2
0
    def load_from_file(BioEntities, path: str):
        entities = dict()  # type: Dict[Curie, Subject]
        try:
            gpi_parser = entityparser.GpiParser()
            with open(path) as gpi:
                for line in gpi:
                    _, ents = gpi_parser.parse_line(line)
                    for entity in ents:
                        # entity will be a well-formed curie
                        entity_id = Curie.from_str(entity["id"])
                        entities[entity_id] = Subject(
                            entity_id, entity["label"], entity["full_name"],
                            entity["synonyms"], entity["type"],
                            Curie.from_str(entity["taxon"]["id"]))
        except Exception as e:
            logger.error("Failed to read GPI file: {}".format(str(e)))

        return BioEntities(entities)
Esempio n. 3
0
    def load_from_file(BioEntities, path: str):
        entities = dict()  # type: Dict[Curie, Subject]
        print("loading from {}".format(path))
        try:
            gpi_parser = entityparser.GpiParser()
            with open(path) as gpi:
                for line in gpi:

                    ents = gpi_parser.line_as_entity_subject(line)
                    if ents is None:
                        continue

                    for entity in ents:
                        entity_id = entity.id
                        entities[entity_id] = entity

        except Exception as e:
            logger.error("Failed to read GPI file: {}".format(str(e)))

        return BioEntities(entities)
Esempio n. 4
0
def main():
    """
    Wrapper for Assoc Parsing
    """
    parser = argparse.ArgumentParser(
        description='Wrapper for obographs assocmodel library'
        """
                                                 By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint
                                                 """,
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-r',
                        '--resource',
                        type=str,
                        required=False,
                        help='Name of ontology')
    parser.add_argument(
        '-f',
        '--file',
        type=str,
        required=False,
        help='Name of input file for associations - currently GAF is assumed')
    parser.add_argument('-F',
                        '--format',
                        type=str,
                        required=False,
                        help='Format of assoc file. One of GAF, GPAD or HPOA')
    parser.add_argument('-o',
                        '--outfile',
                        type=str,
                        required=False,
                        help='Path to output file')
    parser.add_argument('-m',
                        '--messagefile',
                        type=str,
                        required=False,
                        help='Path to messages (report) markdown file')
    parser.add_argument('-t',
                        '--to',
                        type=str,
                        required=False,
                        help='Output to (tree, dot, ...)')
    parser.add_argument(
        "--filter-out",
        nargs="+",
        required=False,
        default=[],
        metavar="EVIDENCE",
        help=
        "List of any evidence codes to filter out of the GAF. E.G. --filter-out IEA IMP"
    )
    parser.add_argument("--filtered-file",
                        required=False,
                        default=None,
                        metavar="FILTERED_FILE",
                        help="File to write the filtered out evidence GAF to")
    parser.add_argument(
        '-T',
        '--taxon',
        nargs='*',
        required=False,
        help='valid taxon (NCBITaxon ID) - validate against this')
    parser.add_argument('--subject_prefix',
                        nargs='*',
                        required=False,
                        help='E.g PomBase - validate against this')
    parser.add_argument('--object_prefix',
                        nargs='*',
                        required=False,
                        help='E.g GO - validate against this')
    parser.add_argument('-v',
                        '--verbosity',
                        default=0,
                        action='count',
                        help='Increase output verbosity')

    subparsers = parser.add_subparsers(dest='subcommand',
                                       help='sub-command help')

    parser_n = subparsers.add_parser('validate', help='Validate associations')
    parser_n.set_defaults(function=validate_assocs)

    parser_n = subparsers.add_parser('filter', help='Filter associations')
    parser_n.set_defaults(function=filter_assocs)

    parser_n = subparsers.add_parser('convert', help='Convert associations')
    parser_n.set_defaults(function=convert_assocs)
    parser_n.add_argument('-t',
                          '--to',
                          type=str,
                          required=True,
                          help='Format to convert to')

    parser_n = subparsers.add_parser('map2slim', help='Map to a subset/slim')
    parser_n.set_defaults(function=map2slim)
    parser_n.add_argument('-p',
                          '--properties',
                          nargs='*',
                          type=str,
                          required=False,
                          help='Properties')
    parser_n.add_argument('-s',
                          '--subset',
                          type=str,
                          required=True,
                          help='subset (e.g. map2slim)')

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)

    logging.info("Welcome!")

    handle = args.resource

    # Ontology Factory
    ofactory = OntologyFactory()
    logging.info("Creating ont object from: {} {}".format(handle, ofactory))
    ont = ofactory.create(handle)
    logging.info("ont: {}".format(ont))

    func = args.function

    # Upper case all evidence codes
    args.filter_out = [code.upper() for code in args.filter_out]

    # set configuration
    filtered_evidence_file = open(args.filtered_file,
                                  "w") if args.filtered_file else None
    config = assocparser.AssocParserConfig(
        valid_taxa=args.taxon,
        ontology=ont,
        class_idspaces=args.object_prefix,
        entity_idspaces=args.subject_prefix,
        filter_out_evidence=args.filter_out,
        filtered_evidence_file=filtered_evidence_file)
    p = None
    fmt = None
    if args.format is None:
        fmt = 'gaf'
    else:
        fmt = args.format.lower()

    # TODO: use a factory
    if fmt == 'gaf':
        p = GafParser()
    elif fmt == 'gpad':
        p = GpadParser()
    elif fmt == 'hpoa':
        p = HpoaParser()
    elif fmt == "gpi":
        p = entityparser.GpiParser()
        func = validate_entity

    p.config = config

    outfh = None
    if args.outfile is not None:
        two_mb = 2097152
        outfh = open(args.outfile, "w", buffering=two_mb)
    func(ont, args.file, outfh, p, args)
    if filtered_evidence_file:
        filtered_evidence_file.close()

    if outfh is not None:
        outfh.close()
    if args.messagefile is not None:
        mfh = open(args.messagefile, "w")
        mfh.write(p.report.to_markdown())
        mfh.close()
    else:
        print(p.report.to_markdown())
def main():
    """
    Wrapper for Assoc Parsing
    """
    parser = argparse.ArgumentParser(description='Wrapper for obographs assocmodel library'
                                                 """
                                                 By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint
                                                 """,
                                     formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-r', '--resource', type=str, required=False,
                        help='Name of ontology')
    parser.add_argument('-f', '--file', type=str, required=False,
                        help='Name of input file for associations - currently GAF is assumed')
    parser.add_argument('-F', '--format', type=str, required=False,
                        help='Format of assoc file. One of GAF, GPAD or HPOA')
    parser.add_argument('-o', '--outfile', type=str, required=False,
                        help='Path to output file')
    parser.add_argument("--report-md", type=str, required=False, dest="report_md",
                        help="Path to report markdown file")
    parser.add_argument("--report-json", type=str, required=False, dest="report_json",
                        help="Path to report JSON file")
    parser.add_argument('-t', '--to', type=str, required=False,
                        help='Output to (tree, dot, ...)')
    parser.add_argument("--filter-out", nargs="+", required=False, default=[], metavar="EVIDENCE",
                        help="List of any evidence codes to filter out of the GAF. E.G. --filter-out IEA IMP")
    parser.add_argument("--filtered-file", required=False, default=None, metavar="FILTERED_FILE",
                        help="File to write the filtered out evidence GAF to")
    parser.add_argument('-T', '--taxon', nargs='*', required=False,
                        help='valid taxon (NCBITaxon ID) - validate against this')
    parser.add_argument('--subject_prefix', nargs='*', required=False,
                        help='E.g PomBase - validate against this')
    parser.add_argument('--object_prefix', nargs='*', required=False,
                        help='E.g GO - validate against this')
    parser.add_argument("-I", "--gaferencer-file", type=argparse.FileType('r'), required=False,
                        help="Output from Gaferencer run on a set of GAF annotations")
    parser.add_argument('-v', '--verbosity', default=0, action='count',
                        help='Increase output verbosity')
    parser.add_argument("--allow_paint", required=False, action="store_const", const=True,
                        help="Allow IBAs in parser")
    parser.add_argument("-g", "--gpi", type=str, required=False, default=None,
                        help="GPI file")
    parser.add_argument("-l", "--rule", action="append", required=None, default=[], dest="rule_set",
                        help="Set of rules to be run. Default is no rules to be run, with the exception \
                            of gorule-0000027 and gorule-0000020. See command line documentation in the \
                                ontobio project or readthedocs for more information")


    subparsers = parser.add_subparsers(dest='subcommand', help='sub-command help')

    parser_n = subparsers.add_parser('validate', help='Validate associations')
    parser_n.set_defaults(function=validate_assocs)

    parser_n = subparsers.add_parser('filter', help='Filter associations')
    parser_n.set_defaults(function=filter_assocs)

    parser_n = subparsers.add_parser('convert', help='Convert associations')
    parser_n.set_defaults(function=convert_assocs)
    parser_n.add_argument('-t', '--to', type=str, required=True, choices=["GAF", "GPAD", "gaf", "gpad"],
                          help='Format to convert to')
    parser_n.add_argument("-n", "--format-version", dest="version", type=str, required=False, default=None,
                          help="Version for the file format. GAF default is 2.1, GPAD default is 1.2")

    parser_n = subparsers.add_parser('map2slim', help='Map to a subset/slim')
    parser_n.set_defaults(function=map2slim)
    parser_n.add_argument('-p', '--properties', nargs='*', type=str, default=['subClassOf', 'BFO:0000050'],
                          help='Properties')
    parser_n.add_argument('-s', '--subset', type=str, required=True,
                          help='subset (e.g. map2slim)')

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)

    logging.info("Welcome!")

    # Ontology Factory
    ont = None
    if args.resource is not None:
        ofactory = OntologyFactory()
        logging.info("Creating ont object from: {} {}".format(args.resource, ofactory))
        ont = ofactory.create(args.resource)
        logging.info("ont: {}".format(ont))


    func = args.function

    # Upper case all evidence codes
    args.filter_out = [code.upper() for code in args.filter_out]

    gaferences = None
    if args.gaferencer_file:
        gaferences = gaference.build_annotation_inferences(json.load(args.gaferencer_file))

    rule_set = args.rule_set
    if rule_set == ["all"]:
        rule_set = assocparser.RuleSet.ALL

    # set configuration
    filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None
    config = assocparser.AssocParserConfig(
        valid_taxa=args.taxon,
        ontology=ont,
        class_idspaces=args.object_prefix,
        entity_idspaces=args.subject_prefix,
        filter_out_evidence=args.filter_out,
        filtered_evidence_file=filtered_evidence_file,
        annotation_inferences=gaferences,
        paint=args.allow_paint,
        gpi_authority_path=args.gpi,
        rule_set=rule_set
    )
    p = None
    fmt = None
    if args.format is None:
        fmt = 'gaf'
    else:
        fmt = args.format.lower()

    # TODO: use a factory
    if fmt == 'gaf':
        p = GafParser(config=config, dataset=args.file)
    elif fmt == 'gpad':
        p = GpadParser(config=config)
    elif fmt == 'hpoa':
        p = HpoaParser(config=config)
    elif fmt == "gpi":
        p = entityparser.GpiParser()
        func = validate_entity

    outfh = None
    if args.outfile is not None:
        two_mb = 2097152
        outfh = open(args.outfile, "w", buffering=two_mb)
    func(ont, args.file, outfh, p, args)
    if filtered_evidence_file:
        filtered_evidence_file.close()

    if outfh is not None:
        outfh.close()

    if args.report_md is not None:
        report_md = open(args.report_md, "w")
        report_md.write(p.report.to_markdown())
        report_md.close()
    if args.report_json is not None:
        report_json = open(args.report_json, "w")
        report_json.write(json.dumps(p.report.to_report_json(), indent=4))
        report_json.close()
    if not (args.report_md or args.report_json):
        print(p.report.to_markdown())