コード例 #1
0
def translate_file_to_ontology(handle, **args):
    if handle.endswith(".json"):
        g = obograph_util.convert_json_file(handle, **args)
        return Ontology(handle=handle, payload=g)
    elif handle.endswith(".ttl"):
        from ontobio.sparql.rdf2nx import RdfMapper
        logging.info("RdfMapper: {}".format(args))
        m = RdfMapper(**args)
        return m.convert(handle, 'ttl')
    else:
        if not (handle.endswith(".obo") or handle.endswith(".owl")):
            logging.info(
                "Attempting to parse non obo or owl file with owltools: " +
                handle)
        encoded = hashlib.sha256(handle.encode()).hexdigest()
        logging.info(" encoded: " + str(encoded))
        fn = '/tmp/' + encoded
        if not os.path.isfile(fn):
            cmd = ['owltools', handle, '-o', '-f', 'json', fn]
            cp = subprocess.run(cmd, check=True)
            logging.info(cp)
        else:
            logging.info("using cached file: " + fn)
        g = obograph_util.convert_json_file(fn, **args)
        return Ontology(handle=handle, payload=g)
コード例 #2
0
def create_ontology(handle=None, **args):
    ont = None
    logging.info("Determining strategy to load '{}' into memory...".format(handle))

    if handle.find("+") > -1:
        handles = handle.split("+")
        onts = [create_ontology(ont) for ont in handles]
        ont = onts.pop()
        ont.merge(onts)
        return ont

    # TODO: consider replacing with plugin architecture
    if handle.find(".") > 0 and os.path.isfile(handle):
        logging.info("Fetching obograph-json file from filesystem")
        ont = translate_file_to_ontology(handle, **args)
    elif handle.startswith("obo:"):
        logging.info("Fetching from OBO PURL")
        if handle.find(".") == -1:
            handle += '.owl'
        fn = '/tmp/'+handle
        if not os.path.isfile(fn):
            url = handle.replace("obo:","http://purl.obolibrary.org/obo/")
            cmd = ['owltools',url,'-o','-f','json',fn]
            cp = subprocess.run(cmd, check=True)
            logging.info(cp)
        else:
            logging.info("using cached file: "+fn)
        g = obograph_util.convert_json_file(fn)
        ont = Ontology(handle=handle, payload=g)
    elif handle.startswith("wdq:"):
        from ontobio.sparql.wikidata_ontology import EagerWikidataOntology
        logging.info("Fetching from Wikidata")
        ont = EagerWikidataOntology(handle=handle)
    elif handle.startswith("scigraph:"):
        from ontobio.neo.scigraph_ontology import RemoteScigraphOntology
        logging.info("Fetching from SciGraph")
        ont = RemoteScigraphOntology(handle=handle)
    elif handle.startswith("http:"):
        logging.info("Fetching from Web PURL: "+handle)
        encoded = hashlib.sha256(handle.encode()).hexdigest()
        #encoded = binascii.hexlify(bytes(handle, 'utf-8'))
        #base64.b64encode(bytes(handle, 'utf-8'))
        logging.info(" encoded: "+str(encoded))
        fn = '/tmp/'+encoded
        if not os.path.isfile(fn):
            cmd = ['owltools',handle,'-o','-f','json',fn]
            cp = subprocess.run(cmd, check=True)
            logging.info(cp)
        else:
            logging.info("using cached file: "+fn)
        g = obograph_util.convert_json_file(fn)
        ont = Ontology(handle=handle, payload=g)
    else:
        logging.info("Fetching from SPARQL")
        ont = EagerRemoteSparqlOntology(handle=handle)
        #g = get_digraph(handle, None, True)
    return ont
コード例 #3
0
ファイル: rdflib_bridge.py プロジェクト: valearna/ontobio
def rdfgraph_to_ontol(rg):
    """
    Return an Ontology object from an rdflib graph object

    Status: Incomplete
    """
    digraph = networkx.MultiDiGraph()
    from rdflib.namespace import RDF
    label_map = {}
    for c in rg.subjects(RDF.type, OWL.Class):
        cid = contract_uri_wrap(c)
        logger.info("C={}".format(cid))
        for lit in rg.objects(c, RDFS.label):
            label_map[cid] = lit.value
            digraph.add_node(cid, label=lit.value)
        for s in rg.objects(c, RDFS.subClassOf):
            # todo - blank nodes
            sid = contract_uri_wrap(s)
            digraph.add_edge(sid, cid, pred='subClassOf')

    logger.info("G={}".format(digraph))
    payload = {
        'graph': digraph,
        #'xref_graph': xref_graph,
        #'graphdoc': obographdoc,
        #'logical_definitions': logical_definitions
    }

    ont = Ontology(handle='wd', payload=payload)
    return ont
コード例 #4
0
 def convert(self, filename=None, format='ttl'):
     if filename is not None:
         self.parse_rdf(filename=filename, format=format)
     g = networkx.MultiDiGraph()
     ont = Ontology(graph=g)
     self.add_triples(ont)
     return ont
コード例 #5
0
    def process_rdfgraph(self, rg, ont=None):
        """
        Transform a skos terminology expressed in an rdf graph into an Ontology object

        Arguments
        ---------
        rg: rdflib.Graph
            graph object

        Returns
        -------
        Ontology
        """
        # TODO: ontology metadata
        if ont is None:
            ont = Ontology()
            subjs = list(rg.subjects(RDF.type, SKOS.ConceptScheme))
            if len(subjs) == 0:
                logging.warning("No ConceptScheme")
            else:
                ont.id = self._uri2id(subjs[0])
            
        subset_map = {}
        for concept in rg.subjects(RDF.type, SKOS.Concept):
            for s in self._get_schemes(rg, concept):
                subset_map[self._uri2id(s)] = s
                
        for concept in sorted(list(rg.subjects(RDF.type, SKOS.Concept))):
            concept_uri = str(concept)
            id=self._uri2id(concept)
            logging.info("ADDING: {}".format(id))
            ont.add_node(id, self._get_label(rg,concept))
                    
            for defn in rg.objects(concept, SKOS.definition):
                if (defn.language == self.lang):
                    td = TextDefinition(id, escape_value(defn.value))
                    ont.add_text_definition(td)
                    
            for s in rg.objects(concept, SKOS.broader):
                ont.add_parent(id, self._uri2id(s))
                
            for s in rg.objects(concept, SKOS.related):
                ont.add_parent(id, self._uri2id(s), self._uri2id(SKOS.related))
                
            for m in rg.objects(concept, SKOS.exactMatch):
                ont.add_xref(id, self._uri2id(m))
                
            for m in rg.objects(concept, SKOS.altLabel):
                syn = Synonym(id, val=self._uri2id(m))
                ont.add_synonym(syn)
                
            for s in self._get_schemes(rg,concept):
                ont.add_to_subset(id, self._uri2id(s))
                
        return ont
コード例 #6
0
ファイル: lexmap.py プロジェクト: meftaul/ontobio
 def __init__(self, wsmap=default_wsmap(), config=None):
     """
     Arguments
     ---------
     wdmap: dict
         maps words to normalized synonyms.
     config: dict
         A configuration conforming to LexicalMapConfigSchema
     """
     # maps label or syn value to Synonym object
     self.lmap = {}
     # maps node id to synonym objects
     self.smap = {}
     self.wsmap = wsmap
     self.npattern = re.compile('[\W_]+')
     self.exclude_obsolete = True
     self.ontology_pairs = None
     self.id_to_ontology_map = defaultdict(list)
     self.merged_ontology = Ontology()
     self.config = config if config is not None else {}
     self.stats = {}
コード例 #7
0
def create_ontology_from_obograph(og):
    ont = None
    g = obograph_util.convert_json_object(og)
    ont = Ontology(handle=None, payload=g)
    return ont
コード例 #8
0
ファイル: scigraph_ontology.py プロジェクト: valearna/ontobio
 def subontology(self, nodes, **args):
     if nodes is None:
         nodes = []
     g = self.subgraph(nodes, **args)
     ont = Ontology(graph=g)
     return ont
コード例 #9
0
 def subontology(self, nodes=[], **args):
     g = self.subgraph(nodes, **args)
     ont = Ontology(graph=g)
     return ont
コード例 #10
0
ファイル: ontobio-lexmap.py プロジェクト: putmantime/ontobio
def main():
    """
    Wrapper for OGR
    """
    parser = argparse.ArgumentParser(description='Wrapper for ontobio lexical mapping'
                                                 """
                                                 Lexically maps one or more ontologies. Ontologies can be local or remote,
                                                 any input handle can be specified, see docs for more details on handles.

                                                 If multiple ontologies are specified, then each ontology in the list is compared against the first one.

                                                 If a simgle ontology is specified, then all pairs in that ontology will be compared
                                                 
                                                 Output format to be documented - see lexmap.py for the various scoring attributes for now.
                                                 """,
                                     formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-o', '--outfile', type=str, nargs='*', default=[], required=False,
                        help='Path to output file')
    parser.add_argument('-t', '--to', type=str, required=False, default='tsv',
                        help='Output to (tree, dot, ...)')
    parser.add_argument('-l', '--labels', type=str,
                        help='If set, then include node labels in results. DEPRECATED')
    parser.add_argument('-s', '--scoring', default='sim', type=str,
                        help='Score weighting scheme. Default=sim')
    parser.add_argument('-P', '--prefix', type=str, required=False,
                        help='Prefix to constrain traversal on, e.g. PATO, ENVO')
    parser.add_argument('-c', '--config', type=str, required=False,
                        help='lexmap configuration file (yaml). See schema for details')
    parser.add_argument('-X', '--xref_weights', type=str, required=False,
                        help='csv of curated per-xref weights')
    parser.add_argument('-u', '--unmapped', type=str, required=False,
                        help='File to export unmapped nodes to')
    parser.add_argument('-A', '--all-by-all', dest='all_by_all', action='store_true',
                        help='compare all ontologies against all.')
    parser.add_argument('-v', '--verbosity', default=0, action='count',
                        help='Increase output verbosity')

    parser.add_argument('ontologies',nargs='*',
                        help='one or more ontologies to be aligned. Any input handle can be specified')

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)
        
    logging.info("Welcome!")

    factory = OntologyFactory()
    onts = [filter_by_prefix(factory.create(h)) for h in args.ontologies]

    
    config = {}
    if args.config is not None:
        f = open(args.config,'r')
        config = yaml.load(f)
        f.close()

    if args.xref_weights is not None:
        if 'xref_weights' not in config:
            config['xref_weights'] = []
        xws = config['xref_weights']
        df = pd.read_csv(args.xref_weights)
        df = df.fillna(0.0)
        for _, row in df.iterrows():
            w = float(row['weight'])
            WA = np.array((0.0, 0.0, 0.0, 0.0))
            if w < 0:
                WA[2] = w
                WA[3] = abs(w)
            else:
                WA[2] = w
                WA[3] = -w
            xws.append({'left':row['left'],
                        'right':row['right'],
                        'weights':WA})
        
    logging.info("ALL: {}".format(args.all_by_all))
    
    lexmap = LexicalMapEngine(config=config)
    if len(onts) == 0:
        raise ValueException("must pass one or more ontologies")
    else:
        logging.info("Indexing ontologies: {}".format(onts))
        for ont in onts:
            lexmap.index_ontology(ont)
        oid0 = onts[0].id
        pairs = [(oid0,oid0)]
        if len(onts) > 1:
            if args.all_by_all:
                logging.info("All vs ALL: {}".format(onts))
                pairs = []
                for i in onts:
                    for j in onts:
                        if i.id < j.id:
                            pairs.append((i.id, j.id))
            else:
                logging.info("All vs first in list: {}".format(oid0))
                pairs = [(oid0, ont.id) for ont in onts[1:]]
        logging.info("Comparing the following pairs of ontologies: {}".format(pairs))
        lexmap.ontology_pairs = pairs
    mo = Ontology()
    mo.merge(onts)
    
    g = lexmap.get_xref_graph()
    
    if args.to == 'obo':
        write_obo(g,mo,args)
    else:
        write_tsv(lexmap,g,mo,args)

        
    if args.unmapped:
        udf = lexmap.unmapped_dataframe(g)
        udf.to_csv(args.unmapped, sep="\t", index=False)
コード例 #11
0
def main():
    """
    Wrapper for OGR
    """
    parser = argparse.ArgumentParser(
        description='Wrapper for ontobio lexical mapping'
        """
                                                 ...
                                                 """,
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-o',
                        '--outfile',
                        type=str,
                        required=False,
                        help='Path to output file')
    parser.add_argument('-t',
                        '--to',
                        type=str,
                        required=False,
                        default='tsv',
                        help='Output to (tree, dot, ...)')
    parser.add_argument('-l',
                        '--labels',
                        type=str,
                        help='If set, then include node labels in results')
    parser.add_argument(
        '-P',
        '--prefix',
        type=str,
        required=False,
        help='Prefix to constrain traversal on, e.g. PATO, ENVO')
    parser.add_argument('-v',
                        '--verbosity',
                        default=0,
                        action='count',
                        help='Increase output verbosity')

    parser.add_argument('ontologies', nargs='*')

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)

    logging.info("Welcome!")

    factory = OntologyFactory()
    onts = [factory.create(h) for h in args.ontologies]

    lexmap = LexicalMapEngine()
    if len(onts) == 0:
        raise ValueException("must pass one or more ontologies")
    else:
        for ont in onts:
            lexmap.index_ontology(ont)

    mo = Ontology()
    mo.merge(onts)

    g = lexmap.get_xref_graph()

    if args.to == 'obo':
        write_obo(g, mo, args)
    else:
        write_tsv(g, mo, args)