def main(): import preproc_qqtopic import extract_keyword2 #import worddf dbfile = '../data/noise_test.db' lognoisefile = '../result/noise_test.log' noisefile = open(lognoisefile,'w') noisefile.write(out_result_header()) rang = xrange(0,250, 60) for num in rang: dbfile = '../data/noise%d_test.db' % (num,) files = sample_docs(num) if os.path.exists(dbfile): os.remove(dbfile) dbcon = preproc_qqtopic.init_db(dbfile) preproc_qqtopic.load_topiclist(dbcon,'../data/topicgj') cnt = preproc_qqtopic.load_topic(dbcon,'noise_data',files) dbcon.close() print 'add number of noise document: %d' % cnt eva = WordWeightEvaluation(30, '../data/worddf') ke = extract_keyword2.DBKeywordExtractor(dbfile, eva) ke.init_db() ke.content_keyword() ke.title_keyword() ke.topic_keyword() ke.close_db()
def topics_to_db(topics, dbfile): import sqlite3 if os.path.exists(dbfile): os.remove(dbfile) dbcon = preproc_qqtopic.init_db(dbfile) for t in topics: idx = t.rindex('/') if idx == len(t) - 1: idx = t.rindex('/', 0, idx) tname = t[idx + 1:] files = os.listdir(t) filelist = [os.path.join(t, f) for f in files] preproc_qqtopic.load_topic(dbcon, tname, filelist) dbcon.close() evaluator = WordWeightEvaluation(30) ke = extract_keyword2.DBKeywordExtractor(dbfile, evaluator) ke.init_db() ke.content_keyword() ke.title_keyword() ke.topic_keyword() ke.close_db() return dbfile
def topics_to_db(topics, dbfile): import sqlite3 if os.path.exists(dbfile): os.remove(dbfile) dbcon = preproc_qqtopic.init_db(dbfile) for t in topics: idx = t.rindex('/') if idx == len(t)-1: idx = t.rindex('/', 0, idx) tname = t[idx+1:] files = os.listdir(t) filelist = [os.path.join(t,f) for f in files] preproc_qqtopic.load_topic(dbcon, tname, filelist) dbcon.close() evaluator = WordWeightEvaluation(30) ke = extract_keyword2.DBKeywordExtractor(dbfile,evaluator) ke.init_db() ke.content_keyword() ke.title_keyword() ke.topic_keyword() ke.close_db() return dbfile