def tag(t, head=None): "Tries to determine the type of a term." t = language.lemmatize(t) if head: head = language.lemmatize(language.real_head(t, head, blacklist)) res = None if t in terms: closest[t] = set() closest[t].add(t) res = set(i for i in terms[t].subsets()) if not res: discarded[t] = head reasons[t] = 'Term known but no type associated: %s, %s' %(t, head) return else: reasons[t] = 'Term known and type associated: %s, %s, %s' %(t, head, '|'.join(res)) elif head: # Comment the next 2 lines if you don't want any special treatment of terms with 'of' #if head not in heads: # head = language.get_new_head(t, head, blacklist) if head in heads: res = subsets_by_head(t, head) if not res: discarded[t] = head reasons[t] = 'Term unknown, head known but no type associated: %s, %s' %(t, head) return else: reasons[t] = 'Term unknown, head known and type associated: %s, %s, %s' %(t, head, '|'.join(str(i) for i in res)) if res: tagged[t] = res else: discarded[t] = head reasons[t] = 'Term and head unknown: %s, %s' %(t, head)
def __init__(self, name, **kwargs): self.name = language.lemmatize(name) kwargs = onto_utils.clean_dict(kwargs) if 'parents' in kwargs: kwargs['parents'] = set(language.lemmatize(p) for p in kwargs['parents']) self.__dict__.update(kwargs) if 'head' in kwargs: #self.head = language.lemmatize(language.real_head(self.name, self.head, blacklist)) self.head = language.lemmatize(self.head) heads.setdefault(self.head, set()).add(self.name) if 'synonyms' in kwargs: self.synonyms = set(language.lemmatize(s) for s in self.synonyms) kwargs.pop('synonyms') for s in self.synonyms: kwargs['synonym_of'] = set() kwargs['synonym_of'].add(name) Term(name=s, **kwargs) if self.name in terms: terms[self.name].fusion(self) else: terms[self.name] = self
def read_heads(fd, action): s = set((term.strip(), head.strip()) for term,head in (line.split('\t') for line in fd if not line.startswith('#'))) # We want to add the terms to the knowledge base if action == 'learn': for term, head in s: if language.lemmatize(term) in test.terms: test.Term(name=term, head=head) # It's a new term and needs to be inserted somewhere (to be improved) else: test.orphans[term] = head #test.Term(name=term, head=head, parents=set((candidate,))) # We just want to tag elif action == 'tag': for term, head in s: test.tag(term, head) else: print "read_heads: Action unknown!"
def read_blacklist(fd): test.blacklist.update(set(language.lemmatize(line.strip()) for line in fd if not line.startswith('#')))