Esempio n. 1
0
def get_knowledge_tokens(text):
    components = _split_text(text)
    doc, nc, pos, tag = _ig("doc", "nc", "pos", "tag")(components)
    try:
        terms = pos[[i[0] for i in enumerate(tag) if i[1][1] == "VBZ"][0] + 1:]
    except:
        return " ".join(map(lambda x: x.text, nc[1:] if len(nc) > 1 else doc[2:] if len(doc) > 2 else doc))
    return " ".join([term[0].text for term in terms if "ADP" not in term[1]])
Esempio n. 2
0
 def get_knowledge_tokens(self, text):
     components = self._split_text(text)
     doc, nc, pos, tag = _ig("doc", "nc", "pos", "tag")(components)
     try:
         terms = pos[[i[0] for i in enumerate(tag) if i[1][1] == "VBZ"][0] + 1:]
     except:
         return " ".join(map(lambda x: x.text, nc[1:] if len(nc) > 1 else doc[2:] if len(doc) > 2 else doc))
     return " ".join([term[0].text for term in terms if "ADP" not in term[1]])
Esempio n. 3
0
 def get_news_tokens(self, text):
     components = self._split_text(text)
     doc, nc, pos = _ig("doc", "nc", "pos")(components)
     index = zip(*pos)[1].index("ADP") + 1
     multi_adp = (_c(zip(*pos)[1]).get("ADP") or 0) > 1
     source = nc[-1].text.lower() if multi_adp else "nyt"
     index = nc.index([i for i in nc if doc[index].lemma_ in i.lemma_][0])
     query = " ".join(map(lambda x: x.lemma_, nc[index:-1] if multi_adp else nc[index:]))
     return source, query
Esempio n. 4
0
 def get_news_tokens(self, text):
     components = self._split_text(text)
     doc, nc, pos = _ig("doc", "nc", "pos")(components)
     index = zip(*pos)[1].index("ADP") + 1
     multi_adp = (_c(zip(*pos)[1]).get("ADP") or 0) > 1
     source = nc[-1].text.lower() if multi_adp else "nyt"
     index = nc.index([i for i in nc if doc[index].lemma_ in i.lemma_][0])
     query = " ".join(map(lambda x: x.lemma_, nc[index:-1] if multi_adp else nc[index:]))
     return source, query