def guess(self, s, is_ask = None): assert(self.is_train) keys = list(lang.keyword(s)) if len(keys) == 0: return '' # MUST contain the keys keys = u' '.join(keys) splits = u' '.join(list(lang.tokenizezh(s))) #q = self.parser.parse(splits + ' OR ' + keys) q1 = self.parser.parse(keys) q2 = self.parser.parse(splits) q = q1 | q2 #print unicode(q) if not is_ask: ask = query.Term(u"ask", lang.is_question(s)) else: ask = query.Term(u"ask", is_ask) results = self.searcher.search(q, filter=ask) for hit in results: return hit['key'] return ''
def guess(self, s, is_ask=None): assert (self.is_train) keys = list(lang.keyword(s)) if len(keys) == 0: return '' # MUST contain the keys keys = u' '.join(keys) splits = u' '.join(list(lang.tokenizezh(s))) #q = self.parser.parse(splits + ' OR ' + keys) q1 = self.parser.parse(keys) q2 = self.parser.parse(splits) q = q1 | q2 #print unicode(q) if not is_ask: ask = query.Term(u"ask", lang.is_question(s)) else: ask = query.Term(u"ask", is_ask) results = self.searcher.search(q, filter=ask) for hit in results: return hit['key'] return ''
def test_token(s): an = RegexTokenizer() for token in an(' '.join(list(lang.tokenizezh(s)))): print token.text, print ''
def train(self, key, line): splits = u' '.join(list(lang.tokenizezh(line))) ask = lang.is_question(key) #print ask #print splits self.writer.add_document(key=key, content=splits, ask=ask)