예제 #1
0
def run(pids):
    global predictor, num_normals, num_spams
    infos = libtieba.get_posts_info(pids)
    for info in infos:
        scores = predictor.predict(info.title, info.content)
        #print scores
        if scores[FLAGS.score_index] > FLAGS.thre:
            num_spams += 1
            print info.postId, info.threadId, info.forumName, info.userName, info.title, info.content
            print 'score: ', scores[0], ' dnn_score: ', scores[
                1], ' adjusted_dnn_score: ', scores[2], ' spam ratio: ', float(
                    num_spams) / (num_spams + num_normals), FLAGS.thre
        else:
            num_normals += 1
        l = [
            str(scores[0]),
            str(scores[2]),
            str(info.postId), info.forumName, info.userName, info.title,
            info.content
        ]
        line = '\t'.join(l)
        if scores[0] > 0.9:
            out.write('%s\n' % line)
        if scores[2] > 0.9:
            out_dnn.write('%s\n' % line)
예제 #2
0
def run(pids):
    #print 'run pids begin'
    global predictor, num_normals, num_spams
    infos = libtieba.get_posts_info(pids)
    #print 'finish get posts info'
    for info in infos:
        score = predictor.predict(info.title, info.content)
        #print 'finish predict'
        if score > FLAGS.thre:
            num_spams += 1
            print info.postId, info.threadId, info.forumName, info.userName, info.title, info.content
            print 'score: ', score, ' spam ratio: ', float(num_spams) / (num_spams + num_normals), FLAGS.thre
            vals_ = []
            if FLAGS.write_db:
                vals_ = db_writer.add(info, score)
                #print 'finish add one to db'
            if FLAGS.realtime_del:
                if len(vals_) == FLAGS.buffer_size:
                    print 'real time delete'
                    exe = '/home/users/chenghuige/forum/orp002/php/bin/php'
                    out = open('delete.temp', 'w')
                    for val in vals_:
                        thread_id = val[1]
                        post_id = val[2]
                        user_id = val[-2]
                        forum_id = val[0]
                        out.write('{}\t{}\t{}\t{}\n'.format(thread_id, post_id, user_id, forum_id))
                    out.close()
                    os.system(exe + ' multi-delete.php delete.temp')
        else:
            num_normals += 1
        l = [str(score), str(info.postId), info.forumName, info.userName, info.title, info.content]
        line = '\t'.join(l)
예제 #3
0
파일: run.py 프로젝트: chenghuige/gezi
def run(pids):
    global predictor, num_normals, num_spams
    infos = libtieba.get_posts_info(pids)
    for info in infos:
        score = predictor.predict(info.title, info.content)
        if score > FLAGS.thre:
            num_spams += 1
            print info.postId, info.threadId, info.forumName, info.userName, info.title, info.content
            print 'score: ', score, ' spam ratio: ', float(num_spams) / (num_spams + num_normals), FLAGS.thre
        else:
            num_normals += 1
        l = [str(score), str(info.postId), info.forumName, info.userName, info.title, info.content]
        line = '\t'.join(l)
예제 #4
0
def run(pids):
    print 'run pids begin'
    global predictor, num_normals, num_spams
    infos = libtieba.get_posts_info(pids)
    print 'finish get posts info'
    for info in infos:
        score = predictor.predict(info.title, info.content)
        print 'finish predict'
        if score > FLAGS.thre:
            num_spams += 1
            print info.postId, info.threadId, info.forumName, info.userName, info.title, info.content
            print 'score: ', score, ' spam ratio: ', float(num_spams) / (
                num_spams + num_normals), FLAGS.thre
            db_writer.add(info, score)
            print 'finish add one to db'
        else:
            num_normals += 1
        l = [
            str(score),
            str(info.postId), info.forumName, info.userName, info.title,
            info.content
        ]
        line = '\t'.join(l)
    print 'run pids end'
예제 #5
0
warnings.simplefilter("ignore", RuntimeWarning)
import libtieba

out = open(sys.argv[2], 'w')
lines = open(sys.argv[1]).readlines()
linelist = [line.strip().split() for line in lines]
pids = [l[0] for l in linelist]

m = {}
for l in linelist:
    m[long(l[0])] = l[1]

pidvec = libtieba.vector_less__long_long_unsigned_int__greater_()
for pid in pids:
    pidvec.append(long(pid))

print len(pidvec)
results = libtieba.get_posts_info(pidvec)

for item in results:
    if (item.postId == 0):
        continue
    item.title = item.title.replace('\n', ' ')
    item.content = item.content.replace('\n', ' ')
    out.write("%s\n" % ("\t".join([
        str(item.threadId), m[item.postId],
        str(item.postId), item.title, item.content
    ])))

print len(results)