Beispiel #1
0
def serv(model):
    decoder = NaiveDecoder(model)
    while True:
        query = raw_input('Input your query(must be segmented by SPACE), q to quit:\n').decode('utf-8')
        if query == u'q':
            return
        domains = raw_input('Input the domains you want to compare:\n').decode('utf-8')
        if not domains:
            domains = decoder.model.domains
        else:
            domains = domains.split(' ')
        
        query = extract(query)
        ret = decoder.predict(query)
        print "\n%s\n%s\n" % (ret, '=' * 50)
        
        lst = []
        for domain in domains:
            score, detail = decoder.get_score(query.split(' '), domain)
            lst.append((score, domain, detail))
        lst.sort(key = lambda x: -x[0])
        for domain, score, detail in lst:
            print score, domain
            for term in query.split(' '):
                cate = decoder.model.get_category(term)
                sys.stdout.write('%s(%s, freq:%d, gini:%.3f): %.4f\t' % \
                                     (term, cate, decoder.model.term_count[cate], 
                                      get_gini(cate), detail[cate][1]))
            print '\n%s\n' % ('-' * 20)
Beispiel #2
0
def extract(sent):
    return ' '.join(sorted(list(set(sent.split(' '))),
                           key = lambda term: -get_gini(term_category(term)))[:5])