def main(): topics = _topic_not_calc() if topics and len(topics): topic = topics[0] start_ts = topic.start end_ts = topic.end db_date = topic.db_date topicname = topic.topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) topic_id = acquire_topic_id(topicname, start_ts, end_ts) windowsize = (end_ts - start_ts) / Day date = ts2datetime(end_ts) if windowsize > 7: degree_rank(TOPK, date, topic_id, windowsize) else: pagerank_rank(TOPK, date, topic_id, windowsize) topic_id = int(topic_id) windowsize = int(windowsize) if not topic_id: gexf = '' else: gexf = make_network_graph(date, topic_id, topicname, windowsize) save_gexf_results(topicname, date, windowsize, gexf) _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
def compute_network(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) ''' #改动的地方从es表中读取话题的拼音也就是表名 if True: print end_ts, type(end_ts) #topicname = topic date = ts2datetime(end_ts) windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 topic_pinyin_name = topic # print 'start topic_name_transfer' #把汉字的时间名换成拼音 奥运会>aoyunhui # topic_pinyin_name = weibo_TopicNameTransfer(topicname, start_ts, end_ts) # print topic_pinyin_name print 'start compute first_nodes' #start_date = ts2datetime(start_ts) # used to compute the first user get_first_node(topic_pinyin_name, start_ts, end_ts, windowsize, date) print 'end compute first_nodes' print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict = make_network(topic_pinyin_name, date, windowsize, max_size, attribute_add) #print g,gg,new_attribute_dict print 'write gexf file' #real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) real_topic_id = topic_pinyin_name if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf' #fh = open(str(GRAPH_PATH) + str(key) + '_g_graph.gexf', 'w+') #fh.close() #fh = open(str(GRAPH_PATH) + str(key) + '_gg_graph.gexf', 'w+') #fh.close() nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') #nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') #nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') #这里要改一下 不用SSDB了 save_attribute_dict(new_attribute_dict, 'g') #save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, data = pagerank_rank(TOPK, date, windowsize, topic_pinyin_name) print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' #topic_id = int(topic_id) windowsize = int(windowsize) if not topic_pinyin_name: # 待删 gexf = '' else: gexf = make_network_graph(date, topic_pinyin_name, windowsize, all_uid_pr, data) #gexf = json.dumps(gexf) print 'save gexf' #print '*************************'*10 #print gexf #print '*************************'*10 save_gexf_results(topic_pinyin_name, date, windowsize, gexf, gexf_type) print 'start fu_tr' get_interval_count(topic_pinyin_name, date, windowsize) print 'update_topic_end' db_date = date _update_topic_status2Completed(topic_pinyin_name, start_ts, end_ts, db_date) print 'all done!'
def main(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) print 'start topic2xapianid' topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts) print 'topic_xapian_id:', topic_xapian_id print 'start compute first_nodes' start_date = ts2datetime(start_ts) # used to compute the first user get_first_node(topicname, start_date, date, windowsize, topic_xapian_id) print 'end compute first_nodes' # print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network(topicname, date, windowsize, topic_xapian_id, max_size, attribute_add) print 'write gexf file' real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH)+str(key)+'_g_graph.gexf' nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') save_attribute_dict(new_attribute_dict, 'g') save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank(TOPK, date, topic_id, windowsize, topicname, real_topic_id) print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' topic_id = int(topic_id) windowsize = int(windowsize) if not topic_id: # 待删 gexf = '' else: gexf, ds_gexf = make_network_graph(date, topic_id, topicname, windowsize, all_uid_pr, data, ds_all_uid_pr, ds_data, real_topic_id) print 'save gexf' save_gexf_results(topicname, date, windowsize, gexf, gexf_type) save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type) print 'start fu_tr' get_interval_count(topicname, date, windowsize, topic_xapian_id) print 'update_topic_end' _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
def compute_network(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) ''' #改动的地方从es表中读取话题的拼音也就是表名 network_results = {} if True: print end_ts, type(end_ts) #topicname = topic date = ts2datetime(end_ts) windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 topic_pinyin_name = topic # print 'start topic_name_transfer' #把汉字的时间名换成拼音 奥运会>aoyunhui # topic_pinyin_name = weibo_TopicNameTransfer(topicname, start_ts, end_ts) # print topic_pinyin_name print 'start compute first_nodes' #start_date = ts2datetime(start_ts) # used to compute the first user first_node_results = get_first_node(topic_pinyin_name, start_ts, end_ts, windowsize, date) print 'end compute first_nodes' network_results['first_node_results'] = first_node_results print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict = make_network(topic_pinyin_name, date, windowsize, max_size, attribute_add) #print g,gg,new_attribute_dict network_results['new_attribute_dict'] = new_attribute_dict print 'write gexf file' #real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) real_topic_id = topic_pinyin_name if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf' #fh = open(str(GRAPH_PATH) + str(key) + '_g_graph.gexf', 'w+') #fh.close() #fh = open(str(GRAPH_PATH) + str(key) + '_gg_graph.gexf', 'w+') #fh.close() nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') #nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') #nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') #这里要改一下 不用SSDB了 #save_attribute_dict(new_attribute_dict, 'g') #save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, data_dict, sorted_uids = pagerank_rank( TOPK, date, windowsize, topic_pinyin_name) network_results['pagerank'] = {} network_results['pagerank']['all_uid_pr'] = all_uid_pr network_results['pagerank']['sorted_uids'] = sorted_uids print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' #topic_id = int(topic_id) windowsize = int(windowsize) if not topic_pinyin_name: # 待删 gexf = '' else: gexf= make_network_graph(date, topic_pinyin_name, windowsize, all_uid_pr, data_dict,sorted_uids,\ new_attribute_dict) #gexf = json.dumps(gexf) print 'save gexf' #print '*************************'*10 #print gexf #print '*************************'*10 long_gexf = save_gexf_results(topic_pinyin_name, date, windowsize, gexf, gexf_type) network_results['long_gexf'] = long_gexf print 'start fu_tr' maker_results, pusher_results = get_interval_count( topic_pinyin_name, date, windowsize) print 'update_topic_end' #db_date = date #_update_topic_status2Completed(topic_pinyin_name, start_ts, end_ts, db_date) network_results['maker_results'] = maker_results network_results['pusher_results'] = pusher_results index_name = index_event_analysis_results index_type = type_event_analysis_results network_results = json.dumps(network_results) id = topic try: tem_exist = weibo_es.get(index=index_name, doc_type=index_type, id=id)['_source'] weibo_es.update(index=index_name, doc_type=index_type, id=id, body={'doc': { 'network_results': network_results }}) except Exception, e: weibo_es.index(index=index_name, doc_type=index_type, id=id, body={'network_results': network_results}) print 'network_results save done!!' print 'all done!'
def main(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id( topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) print 'start topic2xapianid' topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts) print 'topic_xapian_id:', topic_xapian_id print 'start compute first_nodes' start_date = ts2datetime(start_ts) # used to compute the first user get_first_node(topicname, start_date, date, windowsize, topic_xapian_id) print 'end compute first_nodes' # print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network( topicname, date, windowsize, topic_xapian_id, max_size, attribute_add) print 'write gexf file' real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf' nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') save_attribute_dict(new_attribute_dict, 'g') save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank( TOPK, date, topic_id, windowsize, topicname, real_topic_id) print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' topic_id = int(topic_id) windowsize = int(windowsize) if not topic_id: # 待删 gexf = '' else: gexf, ds_gexf = make_network_graph(date, topic_id, topicname, windowsize, all_uid_pr, data, ds_all_uid_pr, ds_data, real_topic_id) print 'save gexf' save_gexf_results(topicname, date, windowsize, gexf, gexf_type) save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type) print 'start fu_tr' get_interval_count(topicname, date, windowsize, topic_xapian_id) print 'update_topic_end' _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)