def save_ws_results_es(topic, ts, during, n_limit, province, city, weibos): #mappings_event_geo_province_weibos() #index_name = index_event_geo_province_weibos #index_type = type_event_geo_province_weibos #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} item['en_name'] = topic item['end_ts'] = ts item['range'] = during item['limit'] = n_limit item['province'] = province item['city'] = city item['weibo'] = json.dumps(weibos) id = topic + '_' + ts try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def save_rt_results_es(topic, repost_list): #mappings_event_geo_city_repost() #index_name = index_event_geo_city_repost #index_type = type_event_geo_city_repost #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} for location in repost_list: item['en_name'] = topic item['original'] = location['original'] item['mid'] = location['mid'] item['timestamp'] = location['ts'] item['origin_location'] = location['origin_location'] item['repost_location'] = location['repost_location'] id = location['mid'] try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def save_rt_results_es(calc, topic, results, during, klimit=TOP_KEYWORDS_LIMIT, wlimit=TOP_WEIBOS_LIMIT): #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results if calc == 'count': #{时间段:{情绪1:值1,情绪2,值2}}{时间段:{情绪1:值1,情绪2,值2}} #mappings_event_sentiment_count() #index_name = index_event_sentiment_count #index_type = type_event_sentiment_count item = {} for time, sen_dict in results.iteritems(): id = topic + '_' + time for sentiment, count in sen_dict.iteritems(): item['en_name'] = topic item['end_ts'] = time item['range'] = during item['sentiment'] = sentiment item['count'] = count try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: # raise e weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def save_results_es(topic, language_results): #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results id = topic try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': { 'language_results': language_results }}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body={'language_results': language_results})
def save_first_nodes_es(topic, date, windowsize, uid, timestamp, user_info, weibo_info, user_domain='other'): #mappings_event_network_first_user() #index_name = index_event_network_first_user #index_type = type_event_network_first_user #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} item['en_name'] = topic item['date'] = date item['windowsize'] = windowsize item['uid'] = uid item['timestamp'] = timestamp item['user_info'] = json.dumps(user_info) item['weibo_info'] = json.dumps(weibo_info) item['user_domain'] = user_domain id = uid try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def save_rt_results_es(topic, results, during, first_item): #mappings_event_geo_city_topic_count() #index_name = index_event_geo_city_topic_count #index_type = type_event_geo_city_topic_count mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} for mtype, time_geo in results.iteritems( ): ##{'message_type':[timestamp,{['province':('provice':cishu),()],'city':[(city:cishu)}]} item['en_name'] = topic item['end_ts'] = time_geo[0] item['range'] = during item['mtype'] = mtype item['ccount'] = time_geo[1] item['first_item'] = first_item id = topic + '_' + ts try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def compute_topic_task(): print time.time() while True: #print r.rpop(topic_queue_name) task = r.rpop(topic_queue_name) if not task: break else: task = json.loads(task) print task topic = task['name'] en_name = task['en_name'] start_ts = int(task['start_ts']) #timestamp end_ts = int(task['end_ts']) #timestamp submit_user = task['submit_user'] comput_status = task['comput_status'] task_id = str(start_ts) + '_' + str( end_ts) + '_' + en_name + '_' + submit_user exist_flag = exist(task_id) #get_topic_weibo(topic,en_name,start_ts,end_ts) if exist_flag: #start compute #try: weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={'doc': { 'comput_status': -1 }}) print 'finish change status' #geo repost_search(en_name, start_ts, end_ts) print 'finish geo_1 analyze' cityTopic(en_name, start_ts, end_ts) print 'finish geo analyze' #language count_fre(en_name, start_ts=start_ts, over_ts=end_ts, news_limit=NEWS_LIMIT, weibo_limit=MAX_LANGUAGE_WEIBO) print 'finish language analyze' #time propagateCronTopic(en_name, start_ts, end_ts) print 'finish time analyze' #network compute_network(en_name, start_ts, end_ts) print 'finish network analyze' #sentiment sentimentTopic(en_name, start_ts=start_ts, over_ts=end_ts) print 'finish sentiment analyze' #finish compute print weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={ 'doc': { 'comput_status': 1, 'finish_ts': int(time.time()) } }) print 'finish change status done' # except: # raise # break else: pass
for time, mtype_dict in results.iteritems(): id = topic + '_' + time for mtype, keyword_dict in mtype_dict.iteritems(): item['en_name'] = topic item['end_ts'] = time item['range'] = during item['mtype'] = mtype item['limit'] = klimit item['kcount'] = json.dumps(keyword_dict) try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item) elif calc == 'weibo': #mappings_event_time_weibo() #index_name = index_event_time_weibo #index_type = type_event_time_weibo item = {} for time, mtype_dict in results.iteritems(): id = topic + '_' + time
def compute_topic_task(): print time.time() while True: #print r.rpop(topic_queue_name) task = r.rpop('event_portrait_task') #if not task: # break if task: continue else: # task = json.loads(task) task = ['雾霾', 'type', '1480003100', '1480176000', '1483500427743'] topic = task[0] #['name'] #en_name = task['en_name'] start_ts = int(task[2]) #timestamp end_ts = int(task[3]) #timestamp submit_ts = int(task[4]) try: keywords = task['keywords'] except: keywords = '' #comput_status = task['status'] task_id = 'event-' + str(start_ts) + '-' + str(end_ts) + '-' + str( submit_ts) en_name = task_id t1 = time.time() exist_flag = exist(task_id) #keywords=keywords.split('&') get_topic_weibo(topic, task_id, start_ts, end_ts, keywords) print exist_flag if exist_flag: #start compute #try: weibo_counts, uid_counts = counts(start_ts, end_ts, topic, en_name, keywords) count_fre(en_name, start_ts=start_ts, over_ts=end_ts, news_limit=NEWS_LIMIT, weibo_limit=MAX_LANGUAGE_WEIBO) weibo_es.index(index='topics', doc_type='text', id=task_id, body={ 'name': topic, 'start_ts': start_ts, 'end_ts': end_ts, 'submit_ts': submit_ts, 'comput_status': 0, 'en_name': task_id }) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={ 'doc': { 'comput_status': -1, 'weibo_counts': weibo_counts, 'uid_counts': uid_counts } }) print 'finish change status' #geo repost_search(en_name, start_ts, end_ts) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={'doc': { 'comput_status': -2 }}) print 'finish geo_1 analyze' cityTopic(en_name, start_ts, end_ts) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={'doc': { 'comput_status': -3 }}) print 'finish geo analyze' #language count_fre(en_name, start_ts=start_ts, over_ts=end_ts, news_limit=NEWS_LIMIT, weibo_limit=MAX_LANGUAGE_WEIBO) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={'doc': { 'comput_status': -4 }}) print 'finish language analyze' #time propagateCronTopic(en_name, start_ts, end_ts) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={'doc': { 'comput_status': -5 }}) print 'finish time analyze' #network compute_network(en_name, start_ts, end_ts) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={'doc': { 'comput_status': -6 }}) print 'finish network analyze' #sentiment sentimentTopic(en_name, start_ts=start_ts, over_ts=end_ts) print 'finish sentiment analyze' #finish compute print weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={ 'doc': { 'comput_status': 1, 'finish_ts': int(time.time()) } }) save_to_es(task_id, start_ts, end_ts, submit_ts, weibo_counts, uid_counts) print 'finish change status done' break t2 = time.time() - t1 print task_id, t2
def compute_topic_task(): create_task() index_name = index_manage_event_analysis index_type = type_manage_event_analysis index_name_results = index_event_analysis_results index_type_results = type_event_analysis_results ''' while True: #print r.rpop(topic_queue_name) task_detail = r_event_analysis.rpop(task_event_analysis) #if not task_detail: # break if task_detail: break else: task_detail = json.loads(task_detail) topic = task_detail[0] en_name = task_detail[1] start_ts = task_detail[2] end_ts = task_detail[3] #keywords = task_detail[4] #event_value_finish = task['event_value_finish'] #mappings_event_analysis_results(en_name) print 'start scan!!' while 1: es_result = weibo_es.get(index=index_name, doc_type=index_type, id=en_name)["_source"] if int(es_result["scan_text_finish"]) == 2: break #跳出该循环,接着往下执行。 else: time.sleep(60) #等待扫描完成(int(es_result["scan_text_finish"]) == 2) t1=time.time() ''' t1 = time.time() ''' topic = '天津老太摆射击摊被判刑' #'毛泽东诞辰纪念日' en_name = 'tian_jin_lao_tai_she_ji_qiang_bei_pan_xing' #"mao_ze_dong_dan_chen_ji_nian_ri" start_ts = 1482768502 #1482681600 end_ts = 1483455435 #1483113600 must_keywords = ["射击","判刑"] #['毛泽东'] should_keywords = ["天津","老太"] #['诞辰','纪念日'] #submit_time = time.time() submit_user = '******' ''' topic = '毛泽东诞辰纪念日' en_name = "mao_ze_dong_dan_chen_ji_nian_ri" start_ts = 1482681600 end_ts = 1483113600 must_keywords = ['毛泽东'] should_keywords = ['诞辰', '纪念日'] #submit_time = time.time() submit_user = '******' #start computes weibo_es.update(index=index_name, doc_type=index_type, id=en_name, body={'doc': { 'event_value_finish': 1 }}) #try: #weibo_counts,uid_counts=counts(start_ts,end_ts,topic,en_name,keywords) weibo_counts, uid_counts = counts_aggs(en_name, start_ts, end_ts) #weibo_es.index(index='topics',doc_type='text',id=en_name,body={'name':topic,'start_ts':start_ts,'end_ts':end_ts,'submit_ts':submit_ts,'comput_status':0,'en_name':en_name}) #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-1,'weibo_counts':weibo_counts,'uid_counts':uid_counts}}) print 'finish change status' item = {} item['topic'] = topic item['en_name'] = en_name item['start_time'] = start_ts item['stop_time'] = end_ts item['weibo_counts'] = weibo_counts item['uid_counts'] = uid_counts item['must_keywords'] = must_keywords item['should_keywords'] = should_keywords item['submit_user'] = submit_user #item['submit_time'] = submit_time weibo_es.index(index=index_name_results, doc_type=index_type_results, id=en_name, body=item) #time time_results = propagateCronTopic(en_name, start_ts, end_ts) #{'during': ,'count':{},'kcount':{},'weibo':{}} time_results = json.dumps(time_results) #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-2}}) print 'finish time analyze' #geo sort_ts_attr, repost_list = repost_search(en_name, start_ts, end_ts) #对每条微博得到转微博、mid、话题、时间、原地理位置、转发地理位置 #repost_list数组中每一项: {original:xx, mid:xx, topic:xx, ts:xx, origin_location:xx, repost_location:xx} #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-}}) print 'finish geo_1 analyze' geo_cityTopic_results = cityTopic(en_name, start_ts, end_ts) # {'geo_weibos':{},'geo_cityCount':{}} geo_results = { 'sort_ts_attr': sort_ts_attr, 'repost_list': repost_list, 'geo_cityTopic_results': geo_cityTopic_results } geo_results = json.dumps(geo_results) id = en_name try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name_results, doc_type=index_type_results, id=id, body={'doc': { 'geo_results': geo_results }}) except Exception, e: weibo_es.index(index=index_name_results, doc_type=index_type_results, id=id, body={'geo_results': geo_results})
over_ts=end_ts) #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-5}}) print 'finish sentiment analyze' #language language_results = count_fre(en_name, start_ts=start_ts, over_ts=end_ts, news_limit=NEWS_LIMIT, weibo_limit=MAX_LANGUAGE_WEIBO) #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-6}}) weibo_es.update( index=index_name, doc_type=index_type, id=en_name, body={'doc': { 'event_value_finish': 2, 'finish_ts': int(time.time()) }}) print 'finish language analyze' #finish compute #save_to_es(task_id,start_ts,end_ts,submit_ts,weibo_counts,uid_counts) print 'finish change status done' t2 = time.time() - t1 print en_name, t2 def counts_aggs(en_name, start_ts, end_ts):
id=id)['_source'] weibo_es.update(index=index_name_results, doc_type=index_type_results, id=id, body={'doc': { 'geo_results': geo_results }}) except Exception, e: weibo_es.index(index=index_name_results, doc_type=index_type_results, id=id, body={'geo_results': geo_results}) weibo_es.update(index=index_name, doc_type=index_type, id=en_name, body={'doc': { 'event_value_finish': -3 }}) print 'finish geo analyze' ''' #network network_results = compute_network(en_name, start_ts, end_ts) # #'new_attribute_dict' 星形源头转发网络需要添加的节点对应的text、reposts_count、comment_count、 attitude_count weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-4}}) print 'finish network analyze' #sentiment sentiment_results = sentimentTopic(en_name, start_ts=start_ts, over_ts=end_ts) weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-5}})
def compute_network(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) ''' #改动的地方从es表中读取话题的拼音也就是表名 network_results = {} if True: print end_ts, type(end_ts) #topicname = topic date = ts2datetime(end_ts) windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 topic_pinyin_name = topic # print 'start topic_name_transfer' #把汉字的时间名换成拼音 奥运会>aoyunhui # topic_pinyin_name = weibo_TopicNameTransfer(topicname, start_ts, end_ts) # print topic_pinyin_name print 'start compute first_nodes' #start_date = ts2datetime(start_ts) # used to compute the first user first_node_results = get_first_node(topic_pinyin_name, start_ts, end_ts, windowsize, date) print 'end compute first_nodes' network_results['first_node_results'] = first_node_results print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict = make_network(topic_pinyin_name, date, windowsize, max_size, attribute_add) #print g,gg,new_attribute_dict network_results['new_attribute_dict'] = new_attribute_dict print 'write gexf file' #real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) real_topic_id = topic_pinyin_name if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf' #fh = open(str(GRAPH_PATH) + str(key) + '_g_graph.gexf', 'w+') #fh.close() #fh = open(str(GRAPH_PATH) + str(key) + '_gg_graph.gexf', 'w+') #fh.close() nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') #nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') #nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') #这里要改一下 不用SSDB了 #save_attribute_dict(new_attribute_dict, 'g') #save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, data_dict, sorted_uids = pagerank_rank( TOPK, date, windowsize, topic_pinyin_name) network_results['pagerank'] = {} network_results['pagerank']['all_uid_pr'] = all_uid_pr network_results['pagerank']['sorted_uids'] = sorted_uids print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' #topic_id = int(topic_id) windowsize = int(windowsize) if not topic_pinyin_name: # 待删 gexf = '' else: gexf= make_network_graph(date, topic_pinyin_name, windowsize, all_uid_pr, data_dict,sorted_uids,\ new_attribute_dict) #gexf = json.dumps(gexf) print 'save gexf' #print '*************************'*10 #print gexf #print '*************************'*10 long_gexf = save_gexf_results(topic_pinyin_name, date, windowsize, gexf, gexf_type) network_results['long_gexf'] = long_gexf print 'start fu_tr' maker_results, pusher_results = get_interval_count( topic_pinyin_name, date, windowsize) print 'update_topic_end' #db_date = date #_update_topic_status2Completed(topic_pinyin_name, start_ts, end_ts, db_date) network_results['maker_results'] = maker_results network_results['pusher_results'] = pusher_results index_name = index_event_analysis_results index_type = type_event_analysis_results network_results = json.dumps(network_results) id = topic try: tem_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': { 'network_results': network_results }}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body={'network_results': network_results}) print 'network_results save done!!' print 'all done!'