예제 #1
0
파일: wam.py 프로젝트: duanzhenchun/mylab
def load_account_doc():
    nload = 2000
    docs = [
        i[0] for i in statist.get_DB('select doc from %s limit %d' %
                                     (ACCOUNT_LAB, nload + 1))
    ]
    write_doctitle(docs, DOC_NAME, TITLE_NAME)
예제 #2
0
파일: wam.py 프로젝트: freephys/mylab
def t_guess(fdoc, ftest):
    docs = codecs.open(fdoc, encoding='utf-8').readlines()
    idf = get_idf(docs)
    max_idf = max(idf.values())
    if ftest:
        data = codecs.open(ftest, encoding='utf-8').read()
        words = list(gen_nounword(data))
        print len(data), len(words)
        test = u' '.join(words)
    else:
        (screen_name, test) = statist.get_DB(
         'select screen_name, doc from %s where uid=%s limit 1' % (ACCOUNT_LAB, 1711064324))[0]
        print 'screen_name:', screen_name
    print 'test doc:', test
    trans = {}
    res = statist.get_DB('select * from %s where p>%s' % (WORD_DICT, P_THRESHOLD))
    for dw, tw, p in res:
        trans.setdefault(dw, {})
        trans[dw][tw] = p
    title = guess(trans, test, idf, max_idf, 200)
    for i in title:
        print i[0], i[1]           
예제 #3
0
파일: wam.py 프로젝트: duanzhenchun/mylab
def t_guess(fdoc, ftest):
    docs = codecs.open(fdoc, encoding='utf-8').readlines()
    idf = get_idf(docs)
    max_idf = max(idf.values())
    if ftest:
        data = codecs.open(ftest, encoding='utf-8').read()
        words = list(gen_nounword(data))
        print len(data), len(words)
        test = u' '.join(words)
    else:
        (screen_name, test) = statist.get_DB(
            'select screen_name, doc from %s where uid=%s limit 1' %
            (ACCOUNT_LAB, 1711064324))[0]
        print 'screen_name:', screen_name
    print 'test doc:', test
    trans = {}
    res = statist.get_DB('select * from %s where p>%s' %
                         (WORD_DICT, P_THRESHOLD))
    for dw, tw, p in res:
        trans.setdefault(dw, {})
        trans[dw][tw] = p
    title = guess(trans, test, idf, max_idf, 200)
    for i in title:
        print i[0], i[1]
예제 #4
0
파일: wam.py 프로젝트: freephys/mylab
def load_account_doc():
    nload = 2000
    docs = [i[0] for i in statist.get_DB('select doc from %s limit %d' % (ACCOUNT_LAB, nload + 1))]
    write_doctitle(docs, DOC_NAME, TITLE_NAME)