Example #1
0
 def get_news_tokens(self, text):
     components = self._split_text(text)
     doc, nc, pos = _ig("doc", "nc", "pos")(components)
     index = zip(*pos)[1].index("ADP") + 1
     multi_adp = (_c(zip(*pos)[1]).get("ADP") or 0) > 1
     source = nc[-1].text.lower() if multi_adp else "nyt"
     index = nc.index([i for i in nc if doc[index].lemma_ in i.lemma_][0])
     query = " ".join(map(lambda x: x.lemma_, nc[index:-1] if multi_adp else nc[index:]))
     return source, query
Example #2
0
 def get_news_tokens(self, text):
     components = self._split_text(text)
     doc, nc, pos = _ig("doc", "nc", "pos")(components)
     index = zip(*pos)[1].index("ADP") + 1
     multi_adp = (_c(zip(*pos)[1]).get("ADP") or 0) > 1
     source = nc[-1].text.lower() if multi_adp else "nyt"
     index = nc.index([i for i in nc if doc[index].lemma_ in i.lemma_][0])
     query = " ".join(map(lambda x: x.lemma_, nc[index:-1] if multi_adp else nc[index:]))
     return source, query