def main(): import preproc_qqtopic import extract_keyword2 #import worddf dbfile = '../data/noise_test.db' lognoisefile = '../result/noise_test.log' noisefile = open(lognoisefile,'w') noisefile.write(out_result_header()) rang = xrange(0,250, 60) for num in rang: dbfile = '../data/noise%d_test.db' % (num,) files = sample_docs(num) if os.path.exists(dbfile): os.remove(dbfile) dbcon = preproc_qqtopic.init_db(dbfile) preproc_qqtopic.load_topiclist(dbcon,'../data/topicgj') cnt = preproc_qqtopic.load_topic(dbcon,'noise_data',files) dbcon.close() print 'add number of noise document: %d' % cnt eva = WordWeightEvaluation(30, '../data/worddf') ke = extract_keyword2.DBKeywordExtractor(dbfile, eva) ke.init_db() ke.content_keyword() ke.title_keyword() ke.topic_keyword() ke.close_db()
def main(): import preproc_qqtopic import extract_keyword2 import worddf dbfile = '../data/steady_test.db' logsteadyfile = '../result/steady_test.log' steadyfile = open(logsteadyfile,'w') steadyfile.write(out_result_header()) if not os.path.exists(dbfile): preproc_qqtopic.load_topiclist(dbfile,'../data/topicgj') ke = extract_keyword2.DBKeywordExtractor(dbfile) ke.init_db() ke.content_keyword() ke.title_keyword() ke.topic_keyword() ke.close_db() cb = CommunityBuilder(dbfile) metrics = list() c = 50 real = cmpcluster.load_doc_labels(dbfile) print 'steady_test' for i in range(c): print 'Time %d' % (i+1) predicted = cb.build() metrics.append(cmp_cluster(predicted,real)) mean,std = mean_std(metrics) meanstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(mean) stdstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(std) steadyfile.write(meanstr) steadyfile.write(stdstr) steadyfile.close() os.system('emacs '+logsteadyfile)
def main(): import preproc_qqtopic import extract_keyword2 import worddf dbfile = '../data/steady_test.db' logsteadyfile = '../result/steady_test.log' steadyfile = open(logsteadyfile, 'w') steadyfile.write(out_result_header()) if not os.path.exists(dbfile): preproc_qqtopic.load_topiclist(dbfile, '../data/topicgj') ke = extract_keyword2.DBKeywordExtractor(dbfile) ke.init_db() ke.content_keyword() ke.title_keyword() ke.topic_keyword() ke.close_db() cb = CommunityBuilder(dbfile) metrics = list() c = 50 real = cmpcluster.load_doc_labels(dbfile) print 'steady_test' for i in range(c): print 'Time %d' % (i + 1) predicted = cb.build() metrics.append(cmp_cluster(predicted, real)) mean, std = mean_std(metrics) meanstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(mean) stdstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(std) steadyfile.write(meanstr) steadyfile.write(stdstr) steadyfile.close() os.system('emacs ' + logsteadyfile)