def translate_file_to_ontology(handle, **args): if handle.endswith(".json"): g = obograph_util.convert_json_file(handle, **args) return Ontology(handle=handle, payload=g) elif handle.endswith(".ttl"): from ontobio.sparql.rdf2nx import RdfMapper logging.info("RdfMapper: {}".format(args)) m = RdfMapper(**args) return m.convert(handle, 'ttl') else: if not (handle.endswith(".obo") or handle.endswith(".owl")): logging.info( "Attempting to parse non obo or owl file with owltools: " + handle) encoded = hashlib.sha256(handle.encode()).hexdigest() logging.info(" encoded: " + str(encoded)) fn = '/tmp/' + encoded if not os.path.isfile(fn): cmd = ['owltools', handle, '-o', '-f', 'json', fn] cp = subprocess.run(cmd, check=True) logging.info(cp) else: logging.info("using cached file: " + fn) g = obograph_util.convert_json_file(fn, **args) return Ontology(handle=handle, payload=g)
def create_ontology(handle=None, **args): ont = None logging.info("Determining strategy to load '{}' into memory...".format(handle)) if handle.find("+") > -1: handles = handle.split("+") onts = [create_ontology(ont) for ont in handles] ont = onts.pop() ont.merge(onts) return ont # TODO: consider replacing with plugin architecture if handle.find(".") > 0 and os.path.isfile(handle): logging.info("Fetching obograph-json file from filesystem") ont = translate_file_to_ontology(handle, **args) elif handle.startswith("obo:"): logging.info("Fetching from OBO PURL") if handle.find(".") == -1: handle += '.owl' fn = '/tmp/'+handle if not os.path.isfile(fn): url = handle.replace("obo:","http://purl.obolibrary.org/obo/") cmd = ['owltools',url,'-o','-f','json',fn] cp = subprocess.run(cmd, check=True) logging.info(cp) else: logging.info("using cached file: "+fn) g = obograph_util.convert_json_file(fn) ont = Ontology(handle=handle, payload=g) elif handle.startswith("wdq:"): from ontobio.sparql.wikidata_ontology import EagerWikidataOntology logging.info("Fetching from Wikidata") ont = EagerWikidataOntology(handle=handle) elif handle.startswith("scigraph:"): from ontobio.neo.scigraph_ontology import RemoteScigraphOntology logging.info("Fetching from SciGraph") ont = RemoteScigraphOntology(handle=handle) elif handle.startswith("http:"): logging.info("Fetching from Web PURL: "+handle) encoded = hashlib.sha256(handle.encode()).hexdigest() #encoded = binascii.hexlify(bytes(handle, 'utf-8')) #base64.b64encode(bytes(handle, 'utf-8')) logging.info(" encoded: "+str(encoded)) fn = '/tmp/'+encoded if not os.path.isfile(fn): cmd = ['owltools',handle,'-o','-f','json',fn] cp = subprocess.run(cmd, check=True) logging.info(cp) else: logging.info("using cached file: "+fn) g = obograph_util.convert_json_file(fn) ont = Ontology(handle=handle, payload=g) else: logging.info("Fetching from SPARQL") ont = EagerRemoteSparqlOntology(handle=handle) #g = get_digraph(handle, None, True) return ont
def rdfgraph_to_ontol(rg): """ Return an Ontology object from an rdflib graph object Status: Incomplete """ digraph = networkx.MultiDiGraph() from rdflib.namespace import RDF label_map = {} for c in rg.subjects(RDF.type, OWL.Class): cid = contract_uri_wrap(c) logger.info("C={}".format(cid)) for lit in rg.objects(c, RDFS.label): label_map[cid] = lit.value digraph.add_node(cid, label=lit.value) for s in rg.objects(c, RDFS.subClassOf): # todo - blank nodes sid = contract_uri_wrap(s) digraph.add_edge(sid, cid, pred='subClassOf') logger.info("G={}".format(digraph)) payload = { 'graph': digraph, #'xref_graph': xref_graph, #'graphdoc': obographdoc, #'logical_definitions': logical_definitions } ont = Ontology(handle='wd', payload=payload) return ont
def convert(self, filename=None, format='ttl'): if filename is not None: self.parse_rdf(filename=filename, format=format) g = networkx.MultiDiGraph() ont = Ontology(graph=g) self.add_triples(ont) return ont
def process_rdfgraph(self, rg, ont=None): """ Transform a skos terminology expressed in an rdf graph into an Ontology object Arguments --------- rg: rdflib.Graph graph object Returns ------- Ontology """ # TODO: ontology metadata if ont is None: ont = Ontology() subjs = list(rg.subjects(RDF.type, SKOS.ConceptScheme)) if len(subjs) == 0: logging.warning("No ConceptScheme") else: ont.id = self._uri2id(subjs[0]) subset_map = {} for concept in rg.subjects(RDF.type, SKOS.Concept): for s in self._get_schemes(rg, concept): subset_map[self._uri2id(s)] = s for concept in sorted(list(rg.subjects(RDF.type, SKOS.Concept))): concept_uri = str(concept) id=self._uri2id(concept) logging.info("ADDING: {}".format(id)) ont.add_node(id, self._get_label(rg,concept)) for defn in rg.objects(concept, SKOS.definition): if (defn.language == self.lang): td = TextDefinition(id, escape_value(defn.value)) ont.add_text_definition(td) for s in rg.objects(concept, SKOS.broader): ont.add_parent(id, self._uri2id(s)) for s in rg.objects(concept, SKOS.related): ont.add_parent(id, self._uri2id(s), self._uri2id(SKOS.related)) for m in rg.objects(concept, SKOS.exactMatch): ont.add_xref(id, self._uri2id(m)) for m in rg.objects(concept, SKOS.altLabel): syn = Synonym(id, val=self._uri2id(m)) ont.add_synonym(syn) for s in self._get_schemes(rg,concept): ont.add_to_subset(id, self._uri2id(s)) return ont
def __init__(self, wsmap=default_wsmap(), config=None): """ Arguments --------- wdmap: dict maps words to normalized synonyms. config: dict A configuration conforming to LexicalMapConfigSchema """ # maps label or syn value to Synonym object self.lmap = {} # maps node id to synonym objects self.smap = {} self.wsmap = wsmap self.npattern = re.compile('[\W_]+') self.exclude_obsolete = True self.ontology_pairs = None self.id_to_ontology_map = defaultdict(list) self.merged_ontology = Ontology() self.config = config if config is not None else {} self.stats = {}
def create_ontology_from_obograph(og): ont = None g = obograph_util.convert_json_object(og) ont = Ontology(handle=None, payload=g) return ont
def subontology(self, nodes, **args): if nodes is None: nodes = [] g = self.subgraph(nodes, **args) ont = Ontology(graph=g) return ont
def subontology(self, nodes=[], **args): g = self.subgraph(nodes, **args) ont = Ontology(graph=g) return ont
def main(): """ Wrapper for OGR """ parser = argparse.ArgumentParser(description='Wrapper for ontobio lexical mapping' """ Lexically maps one or more ontologies. Ontologies can be local or remote, any input handle can be specified, see docs for more details on handles. If multiple ontologies are specified, then each ontology in the list is compared against the first one. If a simgle ontology is specified, then all pairs in that ontology will be compared Output format to be documented - see lexmap.py for the various scoring attributes for now. """, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-o', '--outfile', type=str, nargs='*', default=[], required=False, help='Path to output file') parser.add_argument('-t', '--to', type=str, required=False, default='tsv', help='Output to (tree, dot, ...)') parser.add_argument('-l', '--labels', type=str, help='If set, then include node labels in results. DEPRECATED') parser.add_argument('-s', '--scoring', default='sim', type=str, help='Score weighting scheme. Default=sim') parser.add_argument('-P', '--prefix', type=str, required=False, help='Prefix to constrain traversal on, e.g. PATO, ENVO') parser.add_argument('-c', '--config', type=str, required=False, help='lexmap configuration file (yaml). See schema for details') parser.add_argument('-X', '--xref_weights', type=str, required=False, help='csv of curated per-xref weights') parser.add_argument('-u', '--unmapped', type=str, required=False, help='File to export unmapped nodes to') parser.add_argument('-A', '--all-by-all', dest='all_by_all', action='store_true', help='compare all ontologies against all.') parser.add_argument('-v', '--verbosity', default=0, action='count', help='Increase output verbosity') parser.add_argument('ontologies',nargs='*', help='one or more ontologies to be aligned. Any input handle can be specified') args = parser.parse_args() if args.verbosity >= 2: logging.basicConfig(level=logging.DEBUG) elif args.verbosity == 1: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) logging.info("Welcome!") factory = OntologyFactory() onts = [filter_by_prefix(factory.create(h)) for h in args.ontologies] config = {} if args.config is not None: f = open(args.config,'r') config = yaml.load(f) f.close() if args.xref_weights is not None: if 'xref_weights' not in config: config['xref_weights'] = [] xws = config['xref_weights'] df = pd.read_csv(args.xref_weights) df = df.fillna(0.0) for _, row in df.iterrows(): w = float(row['weight']) WA = np.array((0.0, 0.0, 0.0, 0.0)) if w < 0: WA[2] = w WA[3] = abs(w) else: WA[2] = w WA[3] = -w xws.append({'left':row['left'], 'right':row['right'], 'weights':WA}) logging.info("ALL: {}".format(args.all_by_all)) lexmap = LexicalMapEngine(config=config) if len(onts) == 0: raise ValueException("must pass one or more ontologies") else: logging.info("Indexing ontologies: {}".format(onts)) for ont in onts: lexmap.index_ontology(ont) oid0 = onts[0].id pairs = [(oid0,oid0)] if len(onts) > 1: if args.all_by_all: logging.info("All vs ALL: {}".format(onts)) pairs = [] for i in onts: for j in onts: if i.id < j.id: pairs.append((i.id, j.id)) else: logging.info("All vs first in list: {}".format(oid0)) pairs = [(oid0, ont.id) for ont in onts[1:]] logging.info("Comparing the following pairs of ontologies: {}".format(pairs)) lexmap.ontology_pairs = pairs mo = Ontology() mo.merge(onts) g = lexmap.get_xref_graph() if args.to == 'obo': write_obo(g,mo,args) else: write_tsv(lexmap,g,mo,args) if args.unmapped: udf = lexmap.unmapped_dataframe(g) udf.to_csv(args.unmapped, sep="\t", index=False)
def main(): """ Wrapper for OGR """ parser = argparse.ArgumentParser( description='Wrapper for ontobio lexical mapping' """ ... """, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-o', '--outfile', type=str, required=False, help='Path to output file') parser.add_argument('-t', '--to', type=str, required=False, default='tsv', help='Output to (tree, dot, ...)') parser.add_argument('-l', '--labels', type=str, help='If set, then include node labels in results') parser.add_argument( '-P', '--prefix', type=str, required=False, help='Prefix to constrain traversal on, e.g. PATO, ENVO') parser.add_argument('-v', '--verbosity', default=0, action='count', help='Increase output verbosity') parser.add_argument('ontologies', nargs='*') args = parser.parse_args() if args.verbosity >= 2: logging.basicConfig(level=logging.DEBUG) elif args.verbosity == 1: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) logging.info("Welcome!") factory = OntologyFactory() onts = [factory.create(h) for h in args.ontologies] lexmap = LexicalMapEngine() if len(onts) == 0: raise ValueException("must pass one or more ontologies") else: for ont in onts: lexmap.index_ontology(ont) mo = Ontology() mo.merge(onts) g = lexmap.get_xref_graph() if args.to == 'obo': write_obo(g, mo, args) else: write_tsv(g, mo, args)