def main():
    import preproc_qqtopic
    import extract_keyword2
    #import worddf
    dbfile = '../data/noise_test.db'
    lognoisefile = '../result/noise_test.log'
    
    noisefile = open(lognoisefile,'w')
    noisefile.write(out_result_header())
    rang = xrange(0,250, 60)
    for num in rang:
        dbfile = '../data/noise%d_test.db' % (num,)
        files = sample_docs(num)
        if os.path.exists(dbfile):
            os.remove(dbfile)

        dbcon = preproc_qqtopic.init_db(dbfile)
        preproc_qqtopic.load_topiclist(dbcon,'../data/topicgj')
        cnt = preproc_qqtopic.load_topic(dbcon,'noise_data',files)
        dbcon.close()
        print 'add number of noise document: %d' % cnt

        eva = WordWeightEvaluation(30, '../data/worddf')
        ke = extract_keyword2.DBKeywordExtractor(dbfile, eva)
        ke.init_db()
        ke.content_keyword()
        ke.title_keyword()
        ke.topic_keyword()
        ke.close_db()
Exemple #2
0
def topics_to_db(topics, dbfile):
    import sqlite3
    if os.path.exists(dbfile): os.remove(dbfile)
    dbcon = preproc_qqtopic.init_db(dbfile)
    for t in topics:
        idx = t.rindex('/')
        if idx == len(t) - 1:
            idx = t.rindex('/', 0, idx)
        tname = t[idx + 1:]
        files = os.listdir(t)
        filelist = [os.path.join(t, f) for f in files]

        preproc_qqtopic.load_topic(dbcon, tname, filelist)
    dbcon.close()

    evaluator = WordWeightEvaluation(30)
    ke = extract_keyword2.DBKeywordExtractor(dbfile, evaluator)
    ke.init_db()
    ke.content_keyword()
    ke.title_keyword()
    ke.topic_keyword()
    ke.close_db()

    return dbfile
Exemple #3
0
def topics_to_db(topics, dbfile):
    import sqlite3
    if os.path.exists(dbfile): os.remove(dbfile)
    dbcon = preproc_qqtopic.init_db(dbfile)
    for t in topics:
        idx = t.rindex('/')
        if idx == len(t)-1:
            idx = t.rindex('/', 0, idx)
        tname =  t[idx+1:]
        files = os.listdir(t)
        filelist = [os.path.join(t,f) for f in files]
        
        preproc_qqtopic.load_topic(dbcon, tname, filelist)
    dbcon.close()

    evaluator = WordWeightEvaluation(30)
    ke = extract_keyword2.DBKeywordExtractor(dbfile,evaluator)
    ke.init_db()
    ke.content_keyword()
    ke.title_keyword()
    ke.topic_keyword()
    ke.close_db()

    return dbfile