def save_ws_results_es(topic, ts, during, n_limit, province, city, weibos): #mappings_event_geo_province_weibos() #index_name = index_event_geo_province_weibos #index_type = type_event_geo_province_weibos #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} item['en_name'] = topic item['end_ts'] = ts item['range'] = during item['limit'] = n_limit item['province'] = province item['city'] = city item['weibo'] = json.dumps(weibos) id = topic + '_' + ts try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def save_long_gexf(topic, identifyDate, identifyWindow, identifyGexf): index_name = topic + '_gexffile' get_graph_mappings(index_name) bulk_action = [] #action = {"index":{"_id":999}} source = json.dumps(identifyGexf) action = { #"index":{"_id":999}, #"_source":{ "name": str(identifyDate) + str(identifyWindow), "gexf": source, "date": str(identifyDate), "window": identifyWindow, #} } bulk_action.extend([ action, ]) print bulk_action auto_id = [ str(i) for i in str(identifyDate) + str(identifyWindow) if i.isdigit() ] auto_id = ''.join(auto_id) #es.bulk(bulk_action, index=index_name, doc_type='text', timeout=600) es.index(index=index_name, doc_type='text', id=auto_id, body=action)
def save_rt_results_es(topic, repost_list): #mappings_event_geo_city_repost() #index_name = index_event_geo_city_repost #index_type = type_event_geo_city_repost #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} for location in repost_list: item['en_name'] = topic item['original'] = location['original'] item['mid'] = location['mid'] item['timestamp'] = location['ts'] item['origin_location'] = location['origin_location'] item['repost_location'] = location['repost_location'] id = location['mid'] try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def save_rt_results_es(calc, topic, results, during, klimit=TOP_KEYWORDS_LIMIT, wlimit=TOP_WEIBOS_LIMIT): #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results if calc == 'count': #{时间段:{情绪1:值1,情绪2,值2}}{时间段:{情绪1:值1,情绪2,值2}} #mappings_event_sentiment_count() #index_name = index_event_sentiment_count #index_type = type_event_sentiment_count item = {} for time, sen_dict in results.iteritems(): id = topic + '_' + time for sentiment, count in sen_dict.iteritems(): item['en_name'] = topic item['end_ts'] = time item['range'] = during item['sentiment'] = sentiment item['count'] = count try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: # raise e weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def save_results_es(topic, language_results): #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results id = topic try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': { 'language_results': language_results }}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body={'language_results': language_results})
def save_first_nodes_es(topic, date, windowsize, uid, timestamp, user_info, weibo_info, user_domain='other'): #mappings_event_network_first_user() #index_name = index_event_network_first_user #index_type = type_event_network_first_user #mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} item['en_name'] = topic item['date'] = date item['windowsize'] = windowsize item['uid'] = uid item['timestamp'] = timestamp item['user_info'] = json.dumps(user_info) item['weibo_info'] = json.dumps(weibo_info) item['user_domain'] = user_domain id = uid try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
def save_long_gexf(topic, identifyDate, identifyWindow, identifyGexf): index_name = topic+'_gexffile' get_graph_mappings(index_name) bulk_action = [] #action = {"index":{"_id":999}} source = json.dumps(identifyGexf) action = { #"index":{"_id":999}, #"_source":{ "name":str(identifyDate)+str(identifyWindow), "gexf":source, "date":str(identifyDate), "window":identifyWindow, #} } bulk_action.extend([action,]) print bulk_action auto_id = [str(i)for i in str(identifyDate)+str(identifyWindow) if i.isdigit()] auto_id = ''.join(auto_id) #es.bulk(bulk_action, index=index_name, doc_type='text', timeout=600) es.index(index=index_name, doc_type='text', id=auto_id, body=action)
def save_rt_results_es(topic, results, during, first_item): #mappings_event_geo_city_topic_count() #index_name = index_event_geo_city_topic_count #index_type = type_event_geo_city_topic_count mappings_event_analysis_results(topic) index_name = index_event_analysis_results index_type = type_event_analysis_results item = {} for mtype, time_geo in results.iteritems( ): ##{'message_type':[timestamp,{['province':('provice':cishu),()],'city':[(city:cishu)}]} item['en_name'] = topic item['end_ts'] = time_geo[0] item['range'] = during item['mtype'] = mtype item['ccount'] = time_geo[1] item['first_item'] = first_item id = topic + '_' + ts try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item)
item['range'] = during item['mtype'] = mtype item['limit'] = klimit item['kcount'] = json.dumps(keyword_dict) try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': item}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body=item) elif calc == 'weibo': #mappings_event_time_weibo() #index_name = index_event_time_weibo #index_type = type_event_time_weibo item = {} for time, mtype_dict in results.iteritems(): id = topic + '_' + time for mtype, weibo in mtype_dict.iteritems(): item['en_name'] = topic item['end_ts'] = time item['range'] = during item['mtype'] = mtype
def weibo_comments_list(taskid, start_ts, over_ts, weibo_list, cluster_num=-1, cluster_eva_min_size=default_cluster_eva_min_size, vsm=default_vsm, calculation_label=1): #weibo_list把微博读进来 params = {"taskid": taskid, "cluster_num": cluster_num, "cluster_eva_min_size": cluster_eva_min_size, \ "vsm": vsm, "calculation_label": calculation_label} task_result_file = os.path.join(RESULT_WEIBO_FOLDER, taskid) if os.path.exists(task_result_file) and calculation_label == 0: # 从已有数据文件加载结果集 with open(task_result_file) as dump_file: dump_dict = json.loads(dump_file.read()) ratio_results = dump_dict["ratio"] sentiratio_results = dump_dict["sentiratio"] before_filter_count = dump_dict["before_filter_count"] after_filter_count = dump_dict["after_filter_count"] return json.dumps({"ratio": ratio_results, "sentiratio": sentiratio_results, \ "before_filter_count": before_filter_count, "after_filter_count": after_filter_count}) comments = weibo_list print 'weibo_list:', len(comments) logfile = os.path.join(LOG_WEIBO_FOLDER, taskid + '.log') cal_results = weibo_calculation(comments, logfile=logfile, cluster_num=cluster_num, \ cluster_eva_min_size=int(cluster_eva_min_size), version=vsm) features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() rub_results = [] # 过滤前文本数 before_filter_count = len(item_infos) # 过滤后文本数 after_filter_count = 0 download_items = [] for comment in item_infos: #print comment["clusterid"] download_item = {} #comment = item_infos[comment] download_item["id"] = comment["id"] download_item["text"] = comment["text"] download_item["clusterid"] = comment["clusterid"] download_item["ad_label"] = comment["ad_label"] download_item["comment"] = comment["comment"] download_item["datetime"] = comment["datetime"] download_item["retweeted"] = comment["retweeted"] download_item["uid"] = comment["uid"] # download_item["same_from"] = comment["same_from"] download_items.append(download_item) if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense'): clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] # if ('sentiment' in comment) and (comment['sentiment'] in emotions_vk_v1) and ('clusterid' in comment) \ # and (comment['clusterid'][:8] != 'nonsense'): # sentiment = comment['sentiment'] # try: # senti_ratio[sentiment] += 1 # except KeyError: # senti_ratio[sentiment] = 1 # try: # sentiment_results[sentiment].append(comment) # except KeyError: # sentiment_results[sentiment] = [comment] # after_filter_count += 1 if comment['clusterid'][:8] == 'nonsense': rub_results.append(comment) ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join( feature[:3])] = float(ratio) / float(ratio_total_count) #jln0825 没有情感的东西 不要了 # sentiratio_results = dict() # sentiratio_total_count = sum(senti_ratio.values()) # for sentiment, ratio in senti_ratio.iteritems(): # if sentiment in emotions_vk_v1: # label = emotions_vk_v1[sentiment] # if label and len(label): # sentiratio_results[label] = float(ratio) / float(sentiratio_total_count) # # 情感分类去重 # sentiment_dump_dict = dict() # for sentiment, contents in sentiment_results.iteritems(): # dump_dict = dict() # for comment in contents: # same_from_sentiment = comment["same_from_sentiment"] # try: # dump_dict[same_from_sentiment].append(comment) # except KeyError: # dump_dict[same_from_sentiment] = [comment] # sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): #print clusterid if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] cluster_dump_dict[clusterid] = dump_dict #task = taskid.split('_') for key in features.keys(): print features[key], type(features[key]) keys = ('_').join(features[key]) #index_body={'name':task[0],'start_ts':task[1],'end_ts':task[2],'ratio':json.dumps(ratio_results),'cluster':json.dumps(key),'features':json.dumps(features),'keys':keys,'cluster_dump_dict':json.dumps(cluster_dump_dict[key])} index_body = { 'name': taskid, 'start_ts': start_ts, 'end_ts': over_ts, 'ratio': json.dumps(ratio_results), 'cluster': json.dumps(key), 'features': json.dumps(features), 'keys': keys, 'cluster_dump_dict': json.dumps(cluster_dump_dict[key]) } #print index_body #print subopinion_index_type,subopinion_index_name #jln 0907 weibo_es.index(index=subopinion_index_name, doc_type=subopinion_index_type, id=key, body=index_body) return json.dumps({ "features": features, "ratio": ratio_results, "cluster_dump_dict": cluster_dump_dict }) #features关键词和类的对应
def news_comments_list(taskid, start_ts, over_ts, weibo_list, cluster_num=-1, cluster_eva_min_size=default_cluster_eva_min_size, vsm=default_vsm, calculation_label=1): #weibo_list把微博读进来 """计算饼图数据,并将饼图数据和去重后的推荐文本写到文件 taskid = request.args.get('taskid', default_task_id) cluster_num = request.args.get('cluster_num', default_cluster_num) #若无此参数,取-1;否则取用户设定值 if cluster_num == default_cluster_num: cluster_num = -1 cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size) vsm = request.args.get('vsm', default_vsm) calculation_label = int(request.args.get('calcu', 1)) # 默认进行重新计算, 0表示从从已有结果数据文件加载数据 """ params = {"taskid": taskid, "cluster_num": cluster_num, "cluster_eva_min_size": cluster_eva_min_size, \ "vsm": vsm, "calculation_label": calculation_label} comments = weibo_list logfile = os.path.join(LOG_FOLDER, taskid + '.log') cal_results = comments_calculation_v2(comments, logfile=logfile, cluster_num=cluster_num, \ cluster_eva_min_size=int(cluster_eva_min_size), version=vsm) #print cal_results features = cal_results['cluster_infos']['features'] item_infos = cal_results['item_infos'] cluster_ratio = dict() senti_ratio = dict() sentiment_results = dict() cluster_results = dict() rub_results = [] # 过滤前文本数 before_filter_count = len(item_infos) # 过滤后文本数 after_filter_count = 0 download_items = [] for comment in item_infos: # print comment download_item = {} download_item["id"] = comment["id"] download_item["title"] = comment["title"] download_item["text"] = comment["text"] # download_item["timestamp"] = comment["timestamp"] download_item["datetime"] = comment["datetime"] download_item["clusterid"] = comment["clusterid"] download_item["sentiment"] = comment["sentiment"] download_item["ad_label"] = comment["ad_label"] if (comment["clusterid"][:8] != 'nonsense') and (comment["clusterid"] != 'other'): download_item["duplicate"] = comment["duplicate"] download_item["same_from"] = comment["same_from"] download_items.append(download_item) if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense'): clusterid = comment['clusterid'] try: cluster_ratio[clusterid] += 1 except KeyError: cluster_ratio[clusterid] = 1 try: cluster_results[clusterid].append(comment) except KeyError: cluster_results[clusterid] = [comment] if ('sentiment' in comment) and (comment['sentiment'] in emotions_vk_v1) and ('clusterid' in comment) \ and (comment['clusterid'][:8] != 'nonsense'): sentiment = comment['sentiment'] try: senti_ratio[sentiment] += 1 except KeyError: senti_ratio[sentiment] = 1 try: sentiment_results[sentiment].append(comment) except KeyError: sentiment_results[sentiment] = [comment] after_filter_count += 1 if comment['clusterid'][:8] == 'nonsense': rub_results.append(comment) ratio_results = dict() ratio_total_count = sum(cluster_ratio.values()) for clusterid, ratio in cluster_ratio.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): ratio_results[','.join( feature[:3])] = float(ratio) / float(ratio_total_count) sentiratio_results = dict() sentiratio_total_count = sum(senti_ratio.values()) for sentiment, ratio in senti_ratio.iteritems(): if sentiment in emotions_vk_v1: label = emotions_vk_v1[sentiment] if label and len(label): sentiratio_results[label] = float(ratio) / float( sentiratio_total_count) # 情感分类去重 sentiment_dump_dict = dict() for sentiment, contents in sentiment_results.iteritems(): dump_dict = dict() for comment in contents: same_from_sentiment = comment["same_from_sentiment"] try: dump_dict[same_from_sentiment].append(comment) except KeyError: dump_dict[same_from_sentiment] = [comment] sentiment_dump_dict[sentiment] = dump_dict # 子观点分类去重 cluster_dump_dict = dict() for clusterid, contents in cluster_results.iteritems(): if clusterid in features: feature = features[clusterid] if feature and len(feature): dump_dict = dict() for comment in contents: same_from_cluster = comment["same_from"] try: dump_dict[same_from_cluster].append(comment) except KeyError: dump_dict[same_from_cluster] = [comment] for k, v in dump_dict.iteritems(): sort_dump_dict = sorted(v, key=lambda x: x['weight'], reverse=True) cluster_dump_dict[clusterid] = sort_dump_dict # dump_file = open(task_result_file+'_news', 'w') # dump_file.write(json.dumps({"params": params, "features":features, "senti_dump_dict":sentiment_dump_dict, \ # "cluster_dump_dict":cluster_dump_dict, "ratio":ratio_results, "sentiratio": sentiratio_results, \ # "before_filter_count": before_filter_count, "after_filter_count": after_filter_count})) # dump_file.close() # new_file = open(task_result_file+'_news_2','w') # print task_result_file+'2' #所有的微博 # for i in xrange(0,len(download_items)): # new_file.write(json.dumps(download_items[i])+'\n') # new_file.close #task = taskid.split('_') #index_body={'name':task[0],'start_ts':task[1],'end_ts':task[2],'features':json.dumps(features),'cluster_dump_dict':json.dumps(cluster_dump_dict)} index_body = { 'name': taskid, 'start_ts': start_ts, 'end_ts': over_ts, 'features': json.dumps(features), 'cluster_dump_dict': json.dumps(cluster_dump_dict) } weibo_es.index(index=topics_river_index_name, doc_type=topics_river_index_type, id=taskid, body=index_body) return json.dumps({ "features": features, "cluster_dump_dict": cluster_dump_dict })
def compute_topic_task(): print time.time() while True: #print r.rpop(topic_queue_name) task = r.rpop('event_portrait_task') #if not task: # break if task: continue else: # task = json.loads(task) task = ['雾霾', 'type', '1480003100', '1480176000', '1483500427743'] topic = task[0] #['name'] #en_name = task['en_name'] start_ts = int(task[2]) #timestamp end_ts = int(task[3]) #timestamp submit_ts = int(task[4]) try: keywords = task['keywords'] except: keywords = '' #comput_status = task['status'] task_id = 'event-' + str(start_ts) + '-' + str(end_ts) + '-' + str( submit_ts) en_name = task_id t1 = time.time() exist_flag = exist(task_id) #keywords=keywords.split('&') get_topic_weibo(topic, task_id, start_ts, end_ts, keywords) print exist_flag if exist_flag: #start compute #try: weibo_counts, uid_counts = counts(start_ts, end_ts, topic, en_name, keywords) count_fre(en_name, start_ts=start_ts, over_ts=end_ts, news_limit=NEWS_LIMIT, weibo_limit=MAX_LANGUAGE_WEIBO) weibo_es.index(index='topics', doc_type='text', id=task_id, body={ 'name': topic, 'start_ts': start_ts, 'end_ts': end_ts, 'submit_ts': submit_ts, 'comput_status': 0, 'en_name': task_id }) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={ 'doc': { 'comput_status': -1, 'weibo_counts': weibo_counts, 'uid_counts': uid_counts } }) print 'finish change status' #geo repost_search(en_name, start_ts, end_ts) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={'doc': { 'comput_status': -2 }}) print 'finish geo_1 analyze' cityTopic(en_name, start_ts, end_ts) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={'doc': { 'comput_status': -3 }}) print 'finish geo analyze' #language count_fre(en_name, start_ts=start_ts, over_ts=end_ts, news_limit=NEWS_LIMIT, weibo_limit=MAX_LANGUAGE_WEIBO) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={'doc': { 'comput_status': -4 }}) print 'finish language analyze' #time propagateCronTopic(en_name, start_ts, end_ts) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={'doc': { 'comput_status': -5 }}) print 'finish time analyze' #network compute_network(en_name, start_ts, end_ts) weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={'doc': { 'comput_status': -6 }}) print 'finish network analyze' #sentiment sentimentTopic(en_name, start_ts=start_ts, over_ts=end_ts) print 'finish sentiment analyze' #finish compute print weibo_es.update(index=topic_index_name, doc_type=topic_index_type, id=task_id, body={ 'doc': { 'comput_status': 1, 'finish_ts': int(time.time()) } }) save_to_es(task_id, start_ts, end_ts, submit_ts, weibo_counts, uid_counts) print 'finish change status done' break t2 = time.time() - t1 print task_id, t2
def compute_topic_task(): create_task() index_name = index_manage_event_analysis index_type = type_manage_event_analysis index_name_results = index_event_analysis_results index_type_results = type_event_analysis_results ''' while True: #print r.rpop(topic_queue_name) task_detail = r_event_analysis.rpop(task_event_analysis) #if not task_detail: # break if task_detail: break else: task_detail = json.loads(task_detail) topic = task_detail[0] en_name = task_detail[1] start_ts = task_detail[2] end_ts = task_detail[3] #keywords = task_detail[4] #event_value_finish = task['event_value_finish'] #mappings_event_analysis_results(en_name) print 'start scan!!' while 1: es_result = weibo_es.get(index=index_name, doc_type=index_type, id=en_name)["_source"] if int(es_result["scan_text_finish"]) == 2: break #跳出该循环,接着往下执行。 else: time.sleep(60) #等待扫描完成(int(es_result["scan_text_finish"]) == 2) t1=time.time() ''' t1 = time.time() ''' topic = '天津老太摆射击摊被判刑' #'毛泽东诞辰纪念日' en_name = 'tian_jin_lao_tai_she_ji_qiang_bei_pan_xing' #"mao_ze_dong_dan_chen_ji_nian_ri" start_ts = 1482768502 #1482681600 end_ts = 1483455435 #1483113600 must_keywords = ["射击","判刑"] #['毛泽东'] should_keywords = ["天津","老太"] #['诞辰','纪念日'] #submit_time = time.time() submit_user = '******' ''' topic = '毛泽东诞辰纪念日' en_name = "mao_ze_dong_dan_chen_ji_nian_ri" start_ts = 1482681600 end_ts = 1483113600 must_keywords = ['毛泽东'] should_keywords = ['诞辰', '纪念日'] #submit_time = time.time() submit_user = '******' #start computes weibo_es.update(index=index_name, doc_type=index_type, id=en_name, body={'doc': { 'event_value_finish': 1 }}) #try: #weibo_counts,uid_counts=counts(start_ts,end_ts,topic,en_name,keywords) weibo_counts, uid_counts = counts_aggs(en_name, start_ts, end_ts) #weibo_es.index(index='topics',doc_type='text',id=en_name,body={'name':topic,'start_ts':start_ts,'end_ts':end_ts,'submit_ts':submit_ts,'comput_status':0,'en_name':en_name}) #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-1,'weibo_counts':weibo_counts,'uid_counts':uid_counts}}) print 'finish change status' item = {} item['topic'] = topic item['en_name'] = en_name item['start_time'] = start_ts item['stop_time'] = end_ts item['weibo_counts'] = weibo_counts item['uid_counts'] = uid_counts item['must_keywords'] = must_keywords item['should_keywords'] = should_keywords item['submit_user'] = submit_user #item['submit_time'] = submit_time weibo_es.index(index=index_name_results, doc_type=index_type_results, id=en_name, body=item) #time time_results = propagateCronTopic(en_name, start_ts, end_ts) #{'during': ,'count':{},'kcount':{},'weibo':{}} time_results = json.dumps(time_results) #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-2}}) print 'finish time analyze' #geo sort_ts_attr, repost_list = repost_search(en_name, start_ts, end_ts) #对每条微博得到转微博、mid、话题、时间、原地理位置、转发地理位置 #repost_list数组中每一项: {original:xx, mid:xx, topic:xx, ts:xx, origin_location:xx, repost_location:xx} #weibo_es.update(index=index_name,doc_type=index_type,id=en_name,body={'doc':{'event_value_finish':-}}) print 'finish geo_1 analyze' geo_cityTopic_results = cityTopic(en_name, start_ts, end_ts) # {'geo_weibos':{},'geo_cityCount':{}} geo_results = { 'sort_ts_attr': sort_ts_attr, 'repost_list': repost_list, 'geo_cityTopic_results': geo_cityTopic_results } geo_results = json.dumps(geo_results) id = en_name try: item_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name_results, doc_type=index_type_results, id=id, body={'doc': { 'geo_results': geo_results }}) except Exception, e: weibo_es.index(index=index_name_results, doc_type=index_type_results, id=id, body={'geo_results': geo_results})
def compute_network(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) ''' #改动的地方从es表中读取话题的拼音也就是表名 network_results = {} if True: print end_ts, type(end_ts) #topicname = topic date = ts2datetime(end_ts) windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 topic_pinyin_name = topic # print 'start topic_name_transfer' #把汉字的时间名换成拼音 奥运会>aoyunhui # topic_pinyin_name = weibo_TopicNameTransfer(topicname, start_ts, end_ts) # print topic_pinyin_name print 'start compute first_nodes' #start_date = ts2datetime(start_ts) # used to compute the first user first_node_results = get_first_node(topic_pinyin_name, start_ts, end_ts, windowsize, date) print 'end compute first_nodes' network_results['first_node_results'] = first_node_results print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict = make_network(topic_pinyin_name, date, windowsize, max_size, attribute_add) #print g,gg,new_attribute_dict network_results['new_attribute_dict'] = new_attribute_dict print 'write gexf file' #real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) real_topic_id = topic_pinyin_name if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf' #fh = open(str(GRAPH_PATH) + str(key) + '_g_graph.gexf', 'w+') #fh.close() #fh = open(str(GRAPH_PATH) + str(key) + '_gg_graph.gexf', 'w+') #fh.close() nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') #nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') #nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') #这里要改一下 不用SSDB了 #save_attribute_dict(new_attribute_dict, 'g') #save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, data_dict, sorted_uids = pagerank_rank( TOPK, date, windowsize, topic_pinyin_name) network_results['pagerank'] = {} network_results['pagerank']['all_uid_pr'] = all_uid_pr network_results['pagerank']['sorted_uids'] = sorted_uids print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' #topic_id = int(topic_id) windowsize = int(windowsize) if not topic_pinyin_name: # 待删 gexf = '' else: gexf= make_network_graph(date, topic_pinyin_name, windowsize, all_uid_pr, data_dict,sorted_uids,\ new_attribute_dict) #gexf = json.dumps(gexf) print 'save gexf' #print '*************************'*10 #print gexf #print '*************************'*10 long_gexf = save_gexf_results(topic_pinyin_name, date, windowsize, gexf, gexf_type) network_results['long_gexf'] = long_gexf print 'start fu_tr' maker_results, pusher_results = get_interval_count( topic_pinyin_name, date, windowsize) print 'update_topic_end' #db_date = date #_update_topic_status2Completed(topic_pinyin_name, start_ts, end_ts, db_date) network_results['maker_results'] = maker_results network_results['pusher_results'] = pusher_results index_name = index_event_analysis_results index_type = type_event_analysis_results network_results = json.dumps(network_results) id = topic try: tem_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': { 'network_results': network_results }}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body={'network_results': network_results}) print 'network_results save done!!' print 'all done!'