def load_subject_weibo_data(): """加载客观微博数据 """ # topicname = u'外滩踩踏-微博' # topicname = u'呼格案-微博' # topicname = u'复旦投毒案-微博' topicname = u'APEC-微博' em = EventManager() topicid = em.getEventIDByName(topicname) event = Event(topicid) # f = open('caitai.jl') # f = open('huge.jl') # f = open('fudan.jl') f = open('apec.jl') for line in f: item = json.loads(line.strip()) item['text'] = item['text'].encode('utf-8') item = subob_classifier(item) if not item['subject']: news = subject_weibo2news(item) event.saveItem(news) f.close()
def one_topic_clear(topicname_start): topicname, start_datetime = topicname_start em = EventManager() topicid = em.getEventIDByName(topicname) start_ts = datetime2ts(start_datetime) event = Event(topicid) event.initialize(start_ts)
def initializeNewsTopic(): """初始化新闻话题 """ em = EventManager() topicname = u'外滩踩踏' start_datetime = "2015-01-02 00:00:00" topicid = em.getEventIDByName(topicname) start_ts = datetime2ts(start_datetime) event = Event(topicid) event.initialize(start_ts)
def initializeWeiboTopic(): """初始化weibo话题 """ em = EventManager() # topicname = u'外滩踩踏-微博' # start_datetime = "2015-01-02 00:00:00" # topicname = u'呼格案-微博' # start_datetime = "2014-12-14 00:00:00" # topicname = u'复旦投毒案-微博' # start_datetime = "2014-12-15 00:00:00" topicname = u'APEC-微博' start_datetime = "2014-12-15 00:00:00" topicid = em.getEventIDByName(topicname) start_ts = datetime2ts(start_datetime) event = Event(topicid) event.initialize(start_ts)
def load_object_weibo_data(): """加载主观微博数据 """ # topicname = u'外滩踩踏-微博' # topicname = u'呼格案-微博' # topicname = u'复旦投毒案-微博' # topicname = u'APEC-微博' # topicname = u'高校宣传思想工作-微博' topicname = u'张灵甫遗骨被埋羊圈-微博' em = EventManager() topicid = em.getEventIDByName(topicname) print topicid eventcomment = EventComments(topicid) # f = open('caitai.jl') # f = open('huge.jl') # f = open('fudan.jl') # f = open('apec.jl') # f = open('items_qiushi.jl') f = open('items_zhang.jl') for line in f: item = json.loads(line.strip()) if 'mid' in item: item['text'] = item['text'].encode('utf-8') item = subob_classifier(item) if item['subject']: comment = api_object_weibo2comment(item) eventcomment.saveItem(comment) """ item['text'] = item['text'].encode('utf-8') item = subob_classifier(item) if item['subject']: weibo = object_weibo2comment(item) eventcomment.saveItem(weibo) """ f.close()
from case.time_utils import ts2datetime, ts2date from xapian_case.xapian_backend import XapianSearch from xapian_case.utils import cut, load_scws from case.dynamic_xapian_weibo import getXapianWeiboByTopic from case.global_config import XAPIAN_USER_DATA_PATH from case.Database import Event, EventManager from case.topic_manage import topics_name_start_end from flask import Blueprint, url_for, render_template, request, abort, flash, session, redirect, make_response scws = load_scws() mod = Blueprint('case', __name__, url_prefix='/index') xapian_search_weibo = getXapianWeiboByTopic() em = EventManager() def acquire_user_by_id(uid): user_search = XapianSearch(path=XAPIAN_USER_DATA_PATH, name='master_timeline_user', schema_version=1) result = user_search.search_by_id(int(uid), fields=[ 'name', 'location', 'followers_count', 'friends_count', 'profile_image_url' ]) user = {} if result:
top_tfidf_para=10, top_percent=0.3) for res_id, mer_id in mids: # 将mer_id下的文本扔入res_id下的簇,remove mer_id的簇,同时重新计算各簇的特征词, 并计算文本权重, 并去重 temp_infos = event.get_subevent_infos(mer_id) for r in temp_infos: news = News(r["_id"], event.id) news.update_news_subeventid(res_id) event.remove_subevents([mer_id]) if __name__ == '__main__': em = EventManager() event_ids_list = [] # 获取做初始聚类的话题 initial_event_ids = em.getInitializingEventIDs() event_ids_list.extend([(id, True) for id in initial_event_ids]) # 获取已做完初始聚类的活跃话题 active_event_ids = em.checkActive() event_ids_list.extend([(id, False) for id in active_event_ids]) # map并行计算 pool = Pool() pool.map(one_topic_merge, event_ids_list) pool.close() pool.join()
event.setModifysuccess(True) # 更新事件的modify_success为True except Exception, e: # 如果做计算时出错,更新last_modify, 并将modify_success设置为False print '[Error]: ', e event.setLastmodify(timestamp) event.setModifysuccess(False) if __name__ == '__main__': from bson.objectid import ObjectId # running_ids = [ObjectId("54c4df61d8b487851c2434f6"), ObjectId("54c34b3d2253270fd4dd5598"), \ # running_ids = [ObjectId("54c5105fd8b487851c2434f7"), ObjectId("54c59c19d8b487851c2434f8")] running_ids = [ObjectId("54cb0b472253277627a8ac43")] # running_ids = [ObjectId("54cb259e2253277bca996516")] em = EventManager() event_ids_list = [] # 获取做初始聚类的话题 initial_event_ids = em.getInitializingEventIDs() event_ids_list.extend([(id, True) for id in initial_event_ids if id in running_ids]) # 获取已做完初始聚类的活跃话题 active_event_ids = em.checkActive() event_ids_list.extend([(id, False) for id in active_event_ids if id in running_ids]) # map并行计算 pool = Pool() pool.map(one_topic_calculation, event_ids_list) pool.close() pool.join()
#-*-coding=utf-8-*- # User: linhaobuaa # Date: 2015-01-02 12:00:00 # Version: 0.1.0 """处理演化过程中出错的话题 """ from Database import EventManager, Event from run import one_topic_calculation def handle_error(eventid): """ """ event = Event(eventid) last_modify = event.getLastmodify() event.setLastmodify(last_modify - 3600) event.setModifysuccess(True) # one_topic_calculation((eventid, False)) if __name__ == '__main__': em = EventManager() # event_ids_list = em.getFalseEventIDs() event_ids_list = em.getAllEventIDs() map(handle_error, event_ids_list)