def __init__(self, config=None, group="unknown", dataset="unknown"): """ Arguments: --------- config : a AssocParserConfig object """ if config is None: config = assocparser.AssocParserConfig() self.config = config self.report = assocparser.Report(group=group, dataset=dataset) self.gpi = None if self.config.gpi_authority_path is not None: self.gpi = dict() parser = entityparser.GpiParser() with open(self.config.gpi_authority_path) as gpi_f: entities = parser.parse(file=gpi_f) for entity in entities: self.gpi[entity["id"]] = { "symbol": entity["label"], "name": entity["full_name"], "synonyms": entitywriter.stringify(entity["synonyms"]), "type": entity["type"] } print("Loaded {} entities from {}".format(len(self.gpi.keys()), self.config.gpi_authority_path))
def load_from_file(BioEntities, path: str): entities = dict() # type: Dict[Curie, Subject] try: gpi_parser = entityparser.GpiParser() with open(path) as gpi: for line in gpi: _, ents = gpi_parser.parse_line(line) for entity in ents: # entity will be a well-formed curie entity_id = Curie.from_str(entity["id"]) entities[entity_id] = Subject( entity_id, entity["label"], entity["full_name"], entity["synonyms"], entity["type"], Curie.from_str(entity["taxon"]["id"])) except Exception as e: logger.error("Failed to read GPI file: {}".format(str(e))) return BioEntities(entities)
def load_from_file(BioEntities, path: str): entities = dict() # type: Dict[Curie, Subject] print("loading from {}".format(path)) try: gpi_parser = entityparser.GpiParser() with open(path) as gpi: for line in gpi: ents = gpi_parser.line_as_entity_subject(line) if ents is None: continue for entity in ents: entity_id = entity.id entities[entity_id] = entity except Exception as e: logger.error("Failed to read GPI file: {}".format(str(e))) return BioEntities(entities)
def main(): """ Wrapper for Assoc Parsing """ parser = argparse.ArgumentParser( description='Wrapper for obographs assocmodel library' """ By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint """, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-r', '--resource', type=str, required=False, help='Name of ontology') parser.add_argument( '-f', '--file', type=str, required=False, help='Name of input file for associations - currently GAF is assumed') parser.add_argument('-F', '--format', type=str, required=False, help='Format of assoc file. One of GAF, GPAD or HPOA') parser.add_argument('-o', '--outfile', type=str, required=False, help='Path to output file') parser.add_argument('-m', '--messagefile', type=str, required=False, help='Path to messages (report) markdown file') parser.add_argument('-t', '--to', type=str, required=False, help='Output to (tree, dot, ...)') parser.add_argument( "--filter-out", nargs="+", required=False, default=[], metavar="EVIDENCE", help= "List of any evidence codes to filter out of the GAF. E.G. --filter-out IEA IMP" ) parser.add_argument("--filtered-file", required=False, default=None, metavar="FILTERED_FILE", help="File to write the filtered out evidence GAF to") parser.add_argument( '-T', '--taxon', nargs='*', required=False, help='valid taxon (NCBITaxon ID) - validate against this') parser.add_argument('--subject_prefix', nargs='*', required=False, help='E.g PomBase - validate against this') parser.add_argument('--object_prefix', nargs='*', required=False, help='E.g GO - validate against this') parser.add_argument('-v', '--verbosity', default=0, action='count', help='Increase output verbosity') subparsers = parser.add_subparsers(dest='subcommand', help='sub-command help') parser_n = subparsers.add_parser('validate', help='Validate associations') parser_n.set_defaults(function=validate_assocs) parser_n = subparsers.add_parser('filter', help='Filter associations') parser_n.set_defaults(function=filter_assocs) parser_n = subparsers.add_parser('convert', help='Convert associations') parser_n.set_defaults(function=convert_assocs) parser_n.add_argument('-t', '--to', type=str, required=True, help='Format to convert to') parser_n = subparsers.add_parser('map2slim', help='Map to a subset/slim') parser_n.set_defaults(function=map2slim) parser_n.add_argument('-p', '--properties', nargs='*', type=str, required=False, help='Properties') parser_n.add_argument('-s', '--subset', type=str, required=True, help='subset (e.g. map2slim)') args = parser.parse_args() if args.verbosity >= 2: logging.basicConfig(level=logging.DEBUG) elif args.verbosity == 1: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) logging.info("Welcome!") handle = args.resource # Ontology Factory ofactory = OntologyFactory() logging.info("Creating ont object from: {} {}".format(handle, ofactory)) ont = ofactory.create(handle) logging.info("ont: {}".format(ont)) func = args.function # Upper case all evidence codes args.filter_out = [code.upper() for code in args.filter_out] # set configuration filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None config = assocparser.AssocParserConfig( valid_taxa=args.taxon, ontology=ont, class_idspaces=args.object_prefix, entity_idspaces=args.subject_prefix, filter_out_evidence=args.filter_out, filtered_evidence_file=filtered_evidence_file) p = None fmt = None if args.format is None: fmt = 'gaf' else: fmt = args.format.lower() # TODO: use a factory if fmt == 'gaf': p = GafParser() elif fmt == 'gpad': p = GpadParser() elif fmt == 'hpoa': p = HpoaParser() elif fmt == "gpi": p = entityparser.GpiParser() func = validate_entity p.config = config outfh = None if args.outfile is not None: two_mb = 2097152 outfh = open(args.outfile, "w", buffering=two_mb) func(ont, args.file, outfh, p, args) if filtered_evidence_file: filtered_evidence_file.close() if outfh is not None: outfh.close() if args.messagefile is not None: mfh = open(args.messagefile, "w") mfh.write(p.report.to_markdown()) mfh.close() else: print(p.report.to_markdown())
def main(): """ Wrapper for Assoc Parsing """ parser = argparse.ArgumentParser(description='Wrapper for obographs assocmodel library' """ By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint """, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-r', '--resource', type=str, required=False, help='Name of ontology') parser.add_argument('-f', '--file', type=str, required=False, help='Name of input file for associations - currently GAF is assumed') parser.add_argument('-F', '--format', type=str, required=False, help='Format of assoc file. One of GAF, GPAD or HPOA') parser.add_argument('-o', '--outfile', type=str, required=False, help='Path to output file') parser.add_argument("--report-md", type=str, required=False, dest="report_md", help="Path to report markdown file") parser.add_argument("--report-json", type=str, required=False, dest="report_json", help="Path to report JSON file") parser.add_argument('-t', '--to', type=str, required=False, help='Output to (tree, dot, ...)') parser.add_argument("--filter-out", nargs="+", required=False, default=[], metavar="EVIDENCE", help="List of any evidence codes to filter out of the GAF. E.G. --filter-out IEA IMP") parser.add_argument("--filtered-file", required=False, default=None, metavar="FILTERED_FILE", help="File to write the filtered out evidence GAF to") parser.add_argument('-T', '--taxon', nargs='*', required=False, help='valid taxon (NCBITaxon ID) - validate against this') parser.add_argument('--subject_prefix', nargs='*', required=False, help='E.g PomBase - validate against this') parser.add_argument('--object_prefix', nargs='*', required=False, help='E.g GO - validate against this') parser.add_argument("-I", "--gaferencer-file", type=argparse.FileType('r'), required=False, help="Output from Gaferencer run on a set of GAF annotations") parser.add_argument('-v', '--verbosity', default=0, action='count', help='Increase output verbosity') parser.add_argument("--allow_paint", required=False, action="store_const", const=True, help="Allow IBAs in parser") parser.add_argument("-g", "--gpi", type=str, required=False, default=None, help="GPI file") parser.add_argument("-l", "--rule", action="append", required=None, default=[], dest="rule_set", help="Set of rules to be run. Default is no rules to be run, with the exception \ of gorule-0000027 and gorule-0000020. See command line documentation in the \ ontobio project or readthedocs for more information") subparsers = parser.add_subparsers(dest='subcommand', help='sub-command help') parser_n = subparsers.add_parser('validate', help='Validate associations') parser_n.set_defaults(function=validate_assocs) parser_n = subparsers.add_parser('filter', help='Filter associations') parser_n.set_defaults(function=filter_assocs) parser_n = subparsers.add_parser('convert', help='Convert associations') parser_n.set_defaults(function=convert_assocs) parser_n.add_argument('-t', '--to', type=str, required=True, choices=["GAF", "GPAD", "gaf", "gpad"], help='Format to convert to') parser_n.add_argument("-n", "--format-version", dest="version", type=str, required=False, default=None, help="Version for the file format. GAF default is 2.1, GPAD default is 1.2") parser_n = subparsers.add_parser('map2slim', help='Map to a subset/slim') parser_n.set_defaults(function=map2slim) parser_n.add_argument('-p', '--properties', nargs='*', type=str, default=['subClassOf', 'BFO:0000050'], help='Properties') parser_n.add_argument('-s', '--subset', type=str, required=True, help='subset (e.g. map2slim)') args = parser.parse_args() if args.verbosity >= 2: logging.basicConfig(level=logging.DEBUG) elif args.verbosity == 1: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) logging.info("Welcome!") # Ontology Factory ont = None if args.resource is not None: ofactory = OntologyFactory() logging.info("Creating ont object from: {} {}".format(args.resource, ofactory)) ont = ofactory.create(args.resource) logging.info("ont: {}".format(ont)) func = args.function # Upper case all evidence codes args.filter_out = [code.upper() for code in args.filter_out] gaferences = None if args.gaferencer_file: gaferences = gaference.build_annotation_inferences(json.load(args.gaferencer_file)) rule_set = args.rule_set if rule_set == ["all"]: rule_set = assocparser.RuleSet.ALL # set configuration filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None config = assocparser.AssocParserConfig( valid_taxa=args.taxon, ontology=ont, class_idspaces=args.object_prefix, entity_idspaces=args.subject_prefix, filter_out_evidence=args.filter_out, filtered_evidence_file=filtered_evidence_file, annotation_inferences=gaferences, paint=args.allow_paint, gpi_authority_path=args.gpi, rule_set=rule_set ) p = None fmt = None if args.format is None: fmt = 'gaf' else: fmt = args.format.lower() # TODO: use a factory if fmt == 'gaf': p = GafParser(config=config, dataset=args.file) elif fmt == 'gpad': p = GpadParser(config=config) elif fmt == 'hpoa': p = HpoaParser(config=config) elif fmt == "gpi": p = entityparser.GpiParser() func = validate_entity outfh = None if args.outfile is not None: two_mb = 2097152 outfh = open(args.outfile, "w", buffering=two_mb) func(ont, args.file, outfh, p, args) if filtered_evidence_file: filtered_evidence_file.close() if outfh is not None: outfh.close() if args.report_md is not None: report_md = open(args.report_md, "w") report_md.write(p.report.to_markdown()) report_md.close() if args.report_json is not None: report_json = open(args.report_json, "w") report_json.write(json.dumps(p.report.to_report_json(), indent=4)) report_json.close() if not (args.report_md or args.report_json): print(p.report.to_markdown())