예제 #1
0
    def guess(self, s, is_ask = None):
        assert(self.is_train)

        keys = list(lang.keyword(s))
        if len(keys) == 0:
            return ''
        
        # MUST contain the keys
        keys = u' '.join(keys)
        splits = u' '.join(list(lang.tokenizezh(s)))
        #q = self.parser.parse(splits + ' OR ' + keys)
        q1 = self.parser.parse(keys)
        q2 = self.parser.parse(splits)
        q = q1 | q2
        #print unicode(q)

        if not is_ask:
            ask = query.Term(u"ask", lang.is_question(s))
        else:
            ask = query.Term(u"ask", is_ask)
        results = self.searcher.search(q, filter=ask)
        for hit in results:
            return hit['key']
        return ''
예제 #2
0
    def guess(self, s, is_ask=None):
        assert (self.is_train)

        keys = list(lang.keyword(s))
        if len(keys) == 0:
            return ''

        # MUST contain the keys
        keys = u' '.join(keys)
        splits = u' '.join(list(lang.tokenizezh(s)))
        #q = self.parser.parse(splits + ' OR ' + keys)
        q1 = self.parser.parse(keys)
        q2 = self.parser.parse(splits)
        q = q1 | q2
        #print unicode(q)

        if not is_ask:
            ask = query.Term(u"ask", lang.is_question(s))
        else:
            ask = query.Term(u"ask", is_ask)
        results = self.searcher.search(q, filter=ask)
        for hit in results:
            return hit['key']
        return ''
예제 #3
0
def test_token(s):
    an = RegexTokenizer()
    for token in an(' '.join(list(lang.tokenizezh(s)))):
        print token.text, 
    print ''
예제 #4
0
 def train(self, key, line):
     splits = u' '.join(list(lang.tokenizezh(line)))
     ask = lang.is_question(key)
     #print ask
     #print splits
     self.writer.add_document(key=key, content=splits, ask=ask)
예제 #5
0
def test_token(s):
    an = RegexTokenizer()
    for token in an(' '.join(list(lang.tokenizezh(s)))):
        print token.text,
    print ''
예제 #6
0
 def train(self, key, line):
     splits = u' '.join(list(lang.tokenizezh(line)))
     ask = lang.is_question(key)
     #print ask
     #print splits
     self.writer.add_document(key=key, content=splits, ask=ask)