예제 #1
0
def main():
    import preproc_qqtopic
    import extract_keyword2
    #import worddf
    dbfile = '../data/noise_test.db'
    lognoisefile = '../result/noise_test.log'
    
    noisefile = open(lognoisefile,'w')
    noisefile.write(out_result_header())
    rang = xrange(0,250, 60)
    for num in rang:
        dbfile = '../data/noise%d_test.db' % (num,)
        files = sample_docs(num)
        if os.path.exists(dbfile):
            os.remove(dbfile)

        dbcon = preproc_qqtopic.init_db(dbfile)
        preproc_qqtopic.load_topiclist(dbcon,'../data/topicgj')
        cnt = preproc_qqtopic.load_topic(dbcon,'noise_data',files)
        dbcon.close()
        print 'add number of noise document: %d' % cnt

        eva = WordWeightEvaluation(30, '../data/worddf')
        ke = extract_keyword2.DBKeywordExtractor(dbfile, eva)
        ke.init_db()
        ke.content_keyword()
        ke.title_keyword()
        ke.topic_keyword()
        ke.close_db()
예제 #2
0
def main():
    import preproc_qqtopic
    import extract_keyword2
    import worddf
    dbfile = '../data/steady_test.db'
    logsteadyfile = '../result/steady_test.log'
    
    steadyfile = open(logsteadyfile,'w')
    steadyfile.write(out_result_header())
     
    if not os.path.exists(dbfile):
        preproc_qqtopic.load_topiclist(dbfile,'../data/topicgj')

        ke = extract_keyword2.DBKeywordExtractor(dbfile)
        ke.init_db()
        ke.content_keyword()
        ke.title_keyword()
        ke.topic_keyword()
        ke.close_db()


    cb = CommunityBuilder(dbfile)
    
    metrics = list()
    c = 50
    real = cmpcluster.load_doc_labels(dbfile)
    print 'steady_test'
    for i in range(c):
        print 'Time %d' % (i+1)
        predicted = cb.build()
        metrics.append(cmp_cluster(predicted,real))
    
    mean,std = mean_std(metrics)
    meanstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(mean)
    stdstr =  '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(std)
    steadyfile.write(meanstr)
    steadyfile.write(stdstr)
    steadyfile.close()
    os.system('emacs '+logsteadyfile)
예제 #3
0
def main():
    import preproc_qqtopic
    import extract_keyword2
    import worddf
    dbfile = '../data/steady_test.db'
    logsteadyfile = '../result/steady_test.log'

    steadyfile = open(logsteadyfile, 'w')
    steadyfile.write(out_result_header())

    if not os.path.exists(dbfile):
        preproc_qqtopic.load_topiclist(dbfile, '../data/topicgj')

        ke = extract_keyword2.DBKeywordExtractor(dbfile)
        ke.init_db()
        ke.content_keyword()
        ke.title_keyword()
        ke.topic_keyword()
        ke.close_db()

    cb = CommunityBuilder(dbfile)

    metrics = list()
    c = 50
    real = cmpcluster.load_doc_labels(dbfile)
    print 'steady_test'
    for i in range(c):
        print 'Time %d' % (i + 1)
        predicted = cb.build()
        metrics.append(cmp_cluster(predicted, real))

    mean, std = mean_std(metrics)
    meanstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(mean)
    stdstr = '%.1f \t%.3f \t%.3f \t%.3f \t%.3f\n' % tuple(std)
    steadyfile.write(meanstr)
    steadyfile.write(stdstr)
    steadyfile.close()
    os.system('emacs ' + logsteadyfile)