Beispiel #1
0
 def all_synonyms(self, include_label=False):
     logging.debug("Fetching all syns...")
     # TODO: include_label in cache
     if self.all_synonyms_cache == None:
         syntups = fetchall_syns(self.graph_name)
         syns = [Synonym(t[0],pred=t[1], val=t[2]) for t in syntups]
         for syn in syns:
             self.add_synonym(syn)
         if include_label:
             lsyns = [Synonym(x, pred='label', val=self.label(x)) for x in self.nodes()]
             syns = syns + lsyns
         self.all_synonyms_cache = syns # TODO: check if still used
     return self.all_synonyms_cache
Beispiel #2
0
    def index_ontology(self, ont):
        """
        Adds an ontology to the index

        This iterates through all labels and synonyms in the ontology, creating an index
        """
        self.merged_ontology.merge([ont])
        syns = ont.all_synonyms(include_label=True)

        include_id = self._is_meaningful_ids()
        logging.info("Include IDs as synonyms: {}".format(include_id))
        if include_id:
            for n in ont.nodes():
                v = n
                # Get fragment
                if v.startswith('http'):
                    v = re.sub('.*/', '', v)
                    v = re.sub('.*#', '', v)
                syns.append(Synonym(n, val=v, pred='label'))

        logging.info("Indexing {} syns in {}".format(len(syns), ont))
        logging.info("Distinct lexical values: {}".format(len(
            self.lmap.keys())))
        for syn in syns:
            self.index_synonym(syn, ont)
        for nid in ont.nodes():
            self.id_to_ontology_map[nid].append(ont)
Beispiel #3
0
    def index_synonym(self, syn, ont):
        """
        Index a synonym

        Typically not called from outside this object; called by `index_ontology`
        """
        if not syn.val:
            if syn.pred == 'label':
                if not self._is_meaningful_ids():
                    if not ont.is_obsolete(syn.class_id):
                        pass
                        #logging.error('Use meaningful ids if label not present: {}'.format(syn))
            else:
                logging.warning("Incomplete syn: {}".format(syn))
            return
        if self.exclude_obsolete and ont.is_obsolete(syn.class_id):
            return

        syn.ontology = ont
        prefix, _ = ont.prefix_fragment(syn.class_id)

        v = syn.val

        caps_match = re.match('[A-Z]+', v)
        if caps_match:
            # if > 75% of length is caps, assume abbreviation
            if caps_match.span()[1] >= len(v) / 3:
                syn.is_abbreviation(True)

        # chebi 'synonyms' are often not real synonyms
        # https://github.com/ebi-chebi/ChEBI/issues/3294
        if not re.match('.*[a-zA-Z]', v):
            if prefix != 'CHEBI':
                logging.warning('Ignoring suspicous synonym: {}'.format(syn))
            return

        v = self._standardize_label(v)

        # TODO: do this once ahead of time
        wsmap = {}
        for w, s in self.wsmap.items():
            wsmap[w] = s
        for ss in self._get_config_val(prefix, 'synsets', []):
            # TODO: weights
            wsmap[ss['synonym']] = ss['word']
        nv = self._normalize_label(v, wsmap)

        self._index_synonym_val(syn, v)
        nweight = self._get_config_val(prefix, 'normalized_form_confidence',
                                       0.8)
        if nweight > 0 and not syn.is_abbreviation():
            if nv != v:
                nsyn = Synonym(syn.class_id,
                               val=syn.val,
                               pred=syn.pred,
                               lextype=syn.lextype,
                               ontology=ont,
                               confidence=syn.confidence * nweight)
                self._index_synonym_val(nsyn, nv)
Beispiel #4
0
    def process_rdfgraph(self, rg, ont=None):
        """
        Transform a skos terminology expressed in an rdf graph into an Ontology object

        Arguments
        ---------
        rg: rdflib.Graph
            graph object

        Returns
        -------
        Ontology
        """
        # TODO: ontology metadata
        if ont is None:
            ont = Ontology()
            subjs = list(rg.subjects(RDF.type, SKOS.ConceptScheme))
            if len(subjs) == 0:
                logging.warning("No ConceptScheme")
            else:
                ont.id = self._uri2id(subjs[0])
            
        subset_map = {}
        for concept in rg.subjects(RDF.type, SKOS.Concept):
            for s in self._get_schemes(rg, concept):
                subset_map[self._uri2id(s)] = s
                
        for concept in sorted(list(rg.subjects(RDF.type, SKOS.Concept))):
            concept_uri = str(concept)
            id=self._uri2id(concept)
            logging.info("ADDING: {}".format(id))
            ont.add_node(id, self._get_label(rg,concept))
                    
            for defn in rg.objects(concept, SKOS.definition):
                if (defn.language == self.lang):
                    td = TextDefinition(id, escape_value(defn.value))
                    ont.add_text_definition(td)
                    
            for s in rg.objects(concept, SKOS.broader):
                ont.add_parent(id, self._uri2id(s))
                
            for s in rg.objects(concept, SKOS.related):
                ont.add_parent(id, self._uri2id(s), self._uri2id(SKOS.related))
                
            for m in rg.objects(concept, SKOS.exactMatch):
                ont.add_xref(id, self._uri2id(m))
                
            for m in rg.objects(concept, SKOS.altLabel):
                syn = Synonym(id, val=self._uri2id(m))
                ont.add_synonym(syn)
                
            for s in self._get_schemes(rg,concept):
                ont.add_to_subset(id, self._uri2id(s))
                
        return ont
Beispiel #5
0
    def index_synonym(self, syn, ont):
        """
        Index a synonym

        Typically not called from outside this object; called by `index_ontology`
        """
        if not syn.val:
            logging.debug("Incomplete syn: {}".format(syn))
            return
        if self.exclude_obsolete and ont.is_obsolete(syn.class_id):
            return
        syn.ontology = ont
        v = syn.val.lower()
        nv = self._normalize(v, self.wsmap)
        if self.nweight > 0:
            self._index_synonym_val(syn, v)
            if nv != v:
                nsyn = Synonym(syn.class_id,
                               val=syn.val,
                               pred=syn.pred,
                               lextype=syn.lextype,
                               ontology=ont)
                self._index_synonym_val(nsyn, nv)