Beispiel #1
0
def tag(t, head=None):
    "Tries to determine the type of a term."
    t = language.lemmatize(t)
    if head:
        head = language.lemmatize(language.real_head(t, head, blacklist))
    res = None
    if t in terms:
        closest[t] = set()
        closest[t].add(t)

        res = set(i for i in terms[t].subsets())
        if not res:
            discarded[t] = head
            reasons[t] = 'Term known but no type associated: %s, %s' %(t, head)
            return
        else:
            reasons[t] = 'Term known and type associated: %s, %s, %s' %(t, head, '|'.join(res))

    elif head:
        # Comment the next 2 lines if you don't want any special treatment of terms with 'of'
        #if head not in heads:
        #    head = language.get_new_head(t, head, blacklist)
        if head in heads:
            res = subsets_by_head(t, head)
            if not res:
                discarded[t] = head
                reasons[t] = 'Term unknown, head known but no type associated: %s, %s' %(t, head)
                return
            else:
                reasons[t] = 'Term unknown, head known and type associated: %s, %s, %s' %(t, head, '|'.join(str(i) for i in res))
    if res:
        tagged[t] = res
    else:
        discarded[t] = head
        reasons[t] = 'Term and head unknown: %s, %s' %(t, head)
Beispiel #2
0
 def __init__(self, name, **kwargs):
     self.name = language.lemmatize(name)
     kwargs = onto_utils.clean_dict(kwargs)
     if 'parents' in kwargs:
         kwargs['parents'] = set(language.lemmatize(p) for p in kwargs['parents'])
     self.__dict__.update(kwargs)
     if 'head' in kwargs:
         #self.head = language.lemmatize(language.real_head(self.name, self.head, blacklist))
         self.head = language.lemmatize(self.head)
         heads.setdefault(self.head, set()).add(self.name)
     if 'synonyms' in kwargs:
         self.synonyms = set(language.lemmatize(s) for s in self.synonyms)
         kwargs.pop('synonyms')
         for s in self.synonyms:
             kwargs['synonym_of'] = set()
             kwargs['synonym_of'].add(name)
             Term(name=s, **kwargs)
     if self.name in terms:
         terms[self.name].fusion(self)
     else:
         terms[self.name] = self
def read_heads(fd, action):
    s = set((term.strip(), head.strip()) for term,head in (line.split('\t') for line in fd if not line.startswith('#')))
    # We want to add the terms to the knowledge base
    if action == 'learn':
        for term, head in s:
            if language.lemmatize(term) in test.terms:
                test.Term(name=term, head=head)
            # It's a new term and needs to be inserted somewhere (to be improved)
            else:
                test.orphans[term] = head
                #test.Term(name=term, head=head, parents=set((candidate,)))
    # We just want to tag
    elif action == 'tag':
        for term, head in s:
            test.tag(term, head)
    else:
        print "read_heads: Action unknown!"
def read_blacklist(fd):
    test.blacklist.update(set(language.lemmatize(line.strip()) for line in fd if not line.startswith('#')))