def main(): topics = _topic_not_calc() if topics and len(topics): topic = topics[0] start_ts = topic.start end_ts = topic.end db_date = topic.db_date topicname = topic.topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) topic_id = acquire_topic_id(topicname, start_ts, end_ts) windowsize = (end_ts - start_ts) / Day date = ts2datetime(end_ts) if windowsize > 7: degree_rank(TOPK, date, topic_id, windowsize) else: pagerank_rank(TOPK, date, topic_id, windowsize) topic_id = int(topic_id) windowsize = int(windowsize) if not topic_id: gexf = '' else: gexf = make_network_graph(date, topic_id, topicname, windowsize) save_gexf_results(topicname, date, windowsize, gexf) _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
def main(topic, start_ts, end_ts): #在topic_status中获取还未进行计算的话题 topics = _topic_not_calc(status='-1', module='i_news') topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='i_news' ,\ TopicStatus.status==-1).first() if topic_status_info: topic_id = topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end topicname = topic_status_info.topic db_date = topic_status_info.db_date _update_topic_status2Computing(topicname, start_ts, end_ts, db_date, 'i_news') print 'update_status' #mongodb中topic对应的collection print 'get_dynamic_mongo' news_collection , comment_collection = get_dynamic_mongo(topicname, start_ts, end_ts) #早期参与者 print 'start compute early_join' early_join(topicname, start_ts, end_ts, news_collection) #趋势发起人 print 'start compute trend_user' trend_user(topicname, start_ts, end_ts, news_collection, comment_collection) print 'update_topic_end' _update_topic_status2Completed(topicname, start_ts, end_ts, db_date, 'i_news')
def main(): topics = _topic_not_calc() if topics and len(topics): topic = topics[0] start_ts = topic.start end_ts = topic.end db_date = topic.db_date topicname = topic.topic print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), topicname.encode('utf-8'), 'start' _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) result = calculate(topicname,start_ts,end_ts) print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), topicname.encode('utf-8'), result _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
def _check_run_notcustomize_topic(during=Fifteenminutes): '''定时执行非定制话题表中status为0的话题数量、关键词、关键微博计算 ''' topics = _topic_not_calc() if topics and len(topics): topic = topics[0] start_ts = topic.start end_ts = topic.end db_date = topic.db_date topicname = topic.topic # update status to 0 _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print topicname.encode('utf-8'), ' run realtime job from %s to %s ' % (start_ts, end_ts) sentimentRealTimeTopic(topicname, start_ts, end_ts+24*3600) # update status to 1 _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
def compute_network(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) ''' #改动的地方从es表中读取话题的拼音也就是表名 if True: print end_ts, type(end_ts) #topicname = topic date = ts2datetime(end_ts) windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 topic_pinyin_name = topic # print 'start topic_name_transfer' #把汉字的时间名换成拼音 奥运会>aoyunhui # topic_pinyin_name = weibo_TopicNameTransfer(topicname, start_ts, end_ts) # print topic_pinyin_name print 'start compute first_nodes' #start_date = ts2datetime(start_ts) # used to compute the first user get_first_node(topic_pinyin_name, start_ts, end_ts, windowsize, date) print 'end compute first_nodes' print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict = make_network(topic_pinyin_name, date, windowsize, max_size, attribute_add) #print g,gg,new_attribute_dict print 'write gexf file' #real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) real_topic_id = topic_pinyin_name if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf' #fh = open(str(GRAPH_PATH) + str(key) + '_g_graph.gexf', 'w+') #fh.close() #fh = open(str(GRAPH_PATH) + str(key) + '_gg_graph.gexf', 'w+') #fh.close() nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') #nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') #nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') #这里要改一下 不用SSDB了 save_attribute_dict(new_attribute_dict, 'g') #save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, data = pagerank_rank(TOPK, date, windowsize, topic_pinyin_name) print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' #topic_id = int(topic_id) windowsize = int(windowsize) if not topic_pinyin_name: # 待删 gexf = '' else: gexf = make_network_graph(date, topic_pinyin_name, windowsize, all_uid_pr, data) #gexf = json.dumps(gexf) print 'save gexf' #print '*************************'*10 #print gexf #print '*************************'*10 save_gexf_results(topic_pinyin_name, date, windowsize, gexf, gexf_type) print 'start fu_tr' get_interval_count(topic_pinyin_name, date, windowsize) print 'update_topic_end' db_date = date _update_topic_status2Completed(topic_pinyin_name, start_ts, end_ts, db_date) print 'all done!'
def main(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) print 'start topic2xapianid' topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts) print 'topic_xapian_id:', topic_xapian_id print 'start compute first_nodes' start_date = ts2datetime(start_ts) # used to compute the first user get_first_node(topicname, start_date, date, windowsize, topic_xapian_id) print 'end compute first_nodes' # print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network(topicname, date, windowsize, topic_xapian_id, max_size, attribute_add) print 'write gexf file' real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH)+str(key)+'_g_graph.gexf' nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') save_attribute_dict(new_attribute_dict, 'g') save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank(TOPK, date, topic_id, windowsize, topicname, real_topic_id) print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' topic_id = int(topic_id) windowsize = int(windowsize) if not topic_id: # 待删 gexf = '' else: gexf, ds_gexf = make_network_graph(date, topic_id, topicname, windowsize, all_uid_pr, data, ds_all_uid_pr, ds_data, real_topic_id) print 'save gexf' save_gexf_results(topicname, date, windowsize, gexf, gexf_type) save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type) print 'start fu_tr' get_interval_count(topicname, date, windowsize, topic_xapian_id) print 'update_topic_end' _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
def main(topic, start_ts, end_ts): ''' topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}] ''' topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\ TopicStatus.start==start_ts ,\ TopicStatus.end==end_ts ,\ TopicStatus.module=='identify' ,\ TopicStatus.status==-1).first() if topic_status_info: #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个 print 'topic_id', topic_status_info.id start_ts = topic_status_info.start end_ts = topic_status_info.end db_date = topic_status_info.db_date topicname = topic _update_topic_status2Computing(topicname, start_ts, end_ts, db_date) print 'update_status' topic_id = acquire_topic_id( topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了 windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小 date = ts2datetime(end_ts) print 'start topic2xapianid' topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts) print 'topic_xapian_id:', topic_xapian_id print 'start compute first_nodes' start_date = ts2datetime(start_ts) # used to compute the first user get_first_node(topicname, start_date, date, windowsize, topic_xapian_id) print 'end compute first_nodes' # print 'start make network' max_size = MAX_SIZE attribute_add = True g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network( topicname, date, windowsize, topic_xapian_id, max_size, attribute_add) print 'write gexf file' real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts) if not real_topic_id: print 'the topic not exist' return None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf' nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf') nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf') nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf') save_attribute_dict(new_attribute_dict, 'g') save_attribute_dict(ds_new_attribute_dict, 'ds_g') print 'end make network' print 'start PageRank' all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank( TOPK, date, topic_id, windowsize, topicname, real_topic_id) print 'len(all_uid_pr):', len(all_uid_pr) print 'end PageRank' print 'start make network graph' topic_id = int(topic_id) windowsize = int(windowsize) if not topic_id: # 待删 gexf = '' else: gexf, ds_gexf = make_network_graph(date, topic_id, topicname, windowsize, all_uid_pr, data, ds_all_uid_pr, ds_data, real_topic_id) print 'save gexf' save_gexf_results(topicname, date, windowsize, gexf, gexf_type) save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type) print 'start fu_tr' get_interval_count(topicname, date, windowsize, topic_xapian_id) print 'update_topic_end' _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)