def serv(model): decoder = NaiveDecoder(model) while True: query = raw_input('Input your query(must be segmented by SPACE), q to quit:\n').decode('utf-8') if query == u'q': return domains = raw_input('Input the domains you want to compare:\n').decode('utf-8') if not domains: domains = decoder.model.domains else: domains = domains.split(' ') query = extract(query) ret = decoder.predict(query) print "\n%s\n%s\n" % (ret, '=' * 50) lst = [] for domain in domains: score, detail = decoder.get_score(query.split(' '), domain) lst.append((score, domain, detail)) lst.sort(key = lambda x: -x[0]) for domain, score, detail in lst: print score, domain for term in query.split(' '): cate = decoder.model.get_category(term) sys.stdout.write('%s(%s, freq:%d, gini:%.3f): %.4f\t' % \ (term, cate, decoder.model.term_count[cate], get_gini(cate), detail[cate][1])) print '\n%s\n' % ('-' * 20)
def extract(sent): return ' '.join(sorted(list(set(sent.split(' '))), key = lambda term: -get_gini(term_category(term)))[:5])