def save_rt_results_es(topic, repost_list): #mappings_event_geo_city_repost() #index_name = index_event_geo_city_repost #index_type = type_event_geo_city_repost #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} for location in repost_list: item['en_name'] = topic item['original'] = location['original'] item['mid'] = location['mid'] item['timestamp'] = location['ts'] item['origin_location'] = location['origin_location'] item['repost_location'] = location['repost_location'] id = location['mid'] try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def save_ws_results_es(topic, ts, during, n_limit, province, city, weibos): #mappings_event_geo_province_weibos() #index_name = index_event_geo_province_weibos #index_type = type_event_geo_province_weibos #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} item['en_name'] = topic item['end_ts'] = ts item['range'] = during item['limit'] = n_limit item['province'] = province item['city'] = city item['weibo'] = json.dumps(weibos) id = topic + '_' + ts try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def es2gexf(indexname): try: #result = es.search(index = indexname, doc_type = 'text', body = {}) res = es.get(index=indexname, doc_type='text', id=1) except: return [] #print res return res['_source']
def exist(task_id): try: task_exist = weibo_es.get(index=topic_index_name, doc_type=topic_index_type, id=task_id)['_source'] except: task_exist = {} if not task_exist: return False else: return True
def save_rt_results_es(calc, topic, results, during, klimit=TOP_KEYWORDS_LIMIT, wlimit=TOP_WEIBOS_LIMIT): #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results if calc == 'count': #{时间段:{情绪1:值1,情绪2,值2}}{时间段:{情绪1:值1,情绪2,值2}} #mappings_event_sentiment_count() #index_name = index_event_sentiment_count #index_type = type_event_sentiment_count item = {} for time, sen_dict in results.iteritems(): id = topic + '_' + time for sentiment, count in sen_dict.iteritems(): item['en_name'] = topic item['end_ts'] = time item['range'] = during item['sentiment'] = sentiment item['count'] = count try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: # raise e weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def save_results_es(topic, language_results): #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results id = topic try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': { 'language_results': language_results }}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body={'language_results': language_results})
def save_first_nodes_es(topic, date, windowsize, uid, timestamp, user_info, weibo_info, user_domain='other'): #mappings_event_network_first_user() #index_name = index_event_network_first_user #index_type = type_event_network_first_user #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} item['en_name'] = topic item['date'] = date item['windowsize'] = windowsize item['uid'] = uid item['timestamp'] = timestamp item['user_info'] = json.dumps(user_info) item['weibo_info'] = json.dumps(weibo_info) item['user_domain'] = user_domain id = uid try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def save_rt_results_es(topic, results, during, first_item): #mappings_event_geo_city_topic_count() #index_name = index_event_geo_city_topic_count #index_type = type_event_geo_city_topic_count mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} for mtype, time_geo in results.iteritems( ): ##{'message_type':[timestamp,{['province':('provice':cishu),()],'city':[(city:cishu)}]} item['en_name'] = topic item['end_ts'] = time_geo[0] item['range'] = during item['mtype'] = mtype item['ccount'] = time_geo[1] item['first_item'] = first_item id = topic + '_' + ts try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
#index_type = type_event_time_kcount item = {} for time, mtype_dict in results.iteritems(): id = topic + '_' + time for mtype, keyword_dict in mtype_dict.iteritems(): item['en_name'] = topic item['end_ts'] = time item['range'] = during item['mtype'] = mtype item['limit'] = klimit item['kcount'] = json.dumps(keyword_dict) try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item) elif calc == 'weibo': #mappings_event_time_weibo() #index_name = index_event_time_weibo #index_type = type_event_time_weibo
def compute_topic_task(): create_task() index_name = index_manage_event_analysis index_type = type_manage_event_analysis index_name_results = index_event_analysis_results index_type_results = type_event_analysis_results ''' while True: #print r.rpop(topic_queue_name) task_detail = r_event_analysis.rpop(task_event_analysis) #if not task_detail: # break if task_detail: break else: task_detail = json.loads(task_detail) topic = task_detail[0] en_name = task_detail[1] start_ts = task_detail[2] end_ts = task_detail[3] #keywords = task_detail[4] #event_value_finish = task['event_value_finish'] #mappings_event_analysis_results(en_name) print 'start scan!!' while 1: es_result = weibo_es.get(index=index_name, doc_type=index_type, id=en_name)["_source"] if int(es_result["scan_text_finish"]) == 2: break #跳出该循环,接着往下执行。 else: time.sleep(60) #等待扫描完成(int(es_result["scan_text_finish"]) == 2) t1=time.time() ''' t1 = time.time() ''' topic = '天津老太摆射击摊被判刑' #'毛泽东诞辰纪念日' en_name = 'tian_jin_lao_tai_she_ji_qiang_bei_pan_xing' #"mao_ze_dong_dan_chen_ji_nian_ri" start_ts = 1482768502 #1482681600 end_ts = 1483455435 #1483113600 must_keywords = ["射击","判刑"] #['毛泽东'] should_keywords = ["天津","老太"] #['诞辰','纪念日'] #submit_time = time.time() submit_user = '******' ''' topic = '毛泽东诞辰纪念日' en_name = "mao_ze_dong_dan_chen_ji_nian_ri" start_ts = 1482681600 end_ts = 1483113600 must_keywords = ['毛泽东'] should_keywords = ['诞辰', '纪念日'] #submit_time = time.time() submit_user = '******' #start computes weibo_es.update(index=index_name, doc_type=index_type, id=en_name, body={'doc': { 'event_value_finish': 1 }}) #try: #weibo_counts,uid_counts=counts(start_ts,end_ts,topic,en_name,keywords) weibo_counts, uid_counts = counts_aggs(en_name, start_ts, end_ts) #weibo_es.index(index='topics',doc_type='text',id=en_name,body={'name':topic,'start_ts':start_ts,'end_ts':end_ts,'submit_ts':submit_ts,'comput_status':0,'en_name':en_name}) #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-1,'weibo_counts':weibo_counts,'uid_counts':uid_counts}}) print 'finish change status' item = {} item['topic'] = topic item['en_name'] = en_name item['start_time'] = start_ts item['stop_time'] = end_ts item['weibo_counts'] = weibo_counts item['uid_counts'] = uid_counts item['must_keywords'] = must_keywords item['should_keywords'] = should_keywords item['submit_user'] = submit_user #item['submit_time'] = submit_time weibo_es.index(index=index_name_results, doc_type=index_type_results, id=en_name, body=item) #time time_results = propagateCronTopic(en_name, start_ts, end_ts) #{'during': ,'count':{},'kcount':{},'weibo':{}} time_results = json.dumps(time_results) #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-2}}) print 'finish time analyze' #geo sort_ts_attr, repost_list = repost_search(en_name, start_ts, end_ts) #对每条微博得到转微博、mid、话题、时间、原地理位置、转发地理位置 #repost_list数组中每一项: {original:xx, mid:xx, topic:xx, ts:xx, origin_location:xx, repost_location:xx} #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-}}) print 'finish geo_1 analyze' geo_cityTopic_results = cityTopic(en_name, start_ts, end_ts) # {'geo_weibos':{},'geo_cityCount':{}} geo_results = { 'sort_ts_attr': sort_ts_attr, 'repost_list': repost_list, 'geo_cityTopic_results': geo_cityTopic_results } geo_results = json.dumps(geo_results) id = en_name try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name_results, doc_type=index_type_results, id=id, body={'doc': { 'geo_results': geo_results }}) except Exception, e: weibo_es.index(index=index_name_results, doc_type=index_type_results, id=id, body={'geo_results': geo_results})
def compute_network(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) ''' #改动的地方从es表中读取话题的拼音也就是表名 network_results = {} if True: print end_ts, type(end_ts) #topicname = topic date = ts2datetime(end_ts) windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 topic_pinyin_name = topic # print 'start topic_name_transfer' #把汉字的时间名换成拼音 奥运会>aoyunhui # topic_pinyin_name = weibo_TopicNameTransfer(topicname, start_ts, end_ts) # print topic_pinyin_name print 'start compute first_nodes' #start_date = ts2datetime(start_ts) # used to compute the first user first_node_results = get_first_node(topic_pinyin_name, start_ts, end_ts, windowsize, date) print 'end compute first_nodes' network_results['first_node_results'] = first_node_results print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict = make_network(topic_pinyin_name, date, windowsize, max_size, attribute_add) #print g,gg,new_attribute_dict network_results['new_attribute_dict'] = new_attribute_dict print 'write gexf file' #real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) real_topic_id = topic_pinyin_name if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf' #fh = open(str(GRAPH_PATH) + str(key) + '_g_graph.gexf', 'w+') #fh.close() #fh = open(str(GRAPH_PATH) + str(key) + '_gg_graph.gexf', 'w+') #fh.close() nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') #nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') #nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') #这里要改一下 不用SSDB了 #save_attribute_dict(new_attribute_dict, 'g') #save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, data_dict, sorted_uids = pagerank_rank( TOPK, date, windowsize, topic_pinyin_name) network_results['pagerank'] = {} network_results['pagerank']['all_uid_pr'] = all_uid_pr network_results['pagerank']['sorted_uids'] = sorted_uids print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' #topic_id = int(topic_id) windowsize = int(windowsize) if not topic_pinyin_name: # 待删 gexf = '' else: gexf= make_network_graph(date, topic_pinyin_name, windowsize, all_uid_pr, data_dict,sorted_uids,\ new_attribute_dict) #gexf = json.dumps(gexf) print 'save gexf' #print '*************************'*10 #print gexf #print '*************************'*10 long_gexf = save_gexf_results(topic_pinyin_name, date, windowsize, gexf, gexf_type) network_results['long_gexf'] = long_gexf print 'start fu_tr' maker_results, pusher_results = get_interval_count( topic_pinyin_name, date, windowsize) print 'update_topic_end' #db_date = date #_update_topic_status2Completed(topic_pinyin_name, start_ts, end_ts, db_date) network_results['maker_results'] = maker_results network_results['pusher_results'] = pusher_results index_name = index_event_analysis_results index_type = type_event_analysis_results network_results = json.dumps(network_results) id = topic try: tem_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': { 'network_results': network_results }}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body={'network_results': network_results}) print 'network_results save done!!' print 'all done!'