def trendsetter_rank(TOPK, date, topic_id, windowsize, topic, real_topic_id): data = None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) ds_g = nx.read_gexf(str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') tmp_file, ds_N = prepare_data_for_ts(topic_id, topic, date, windowsize, ds_g) top_N = ds_N # 选择保存TS前top_N的数据 print 'trendsetter rank start' if not tmp_file: return data input_tmp_path = tmp_file.name print 'input_tmp_path:', input_tmp_path job_id = generate_job_id( datetime2ts(date), windowsize, topic_id, ts_netwrok_type ) # pagerank中g与ds_g分别对应的network_type分别为1,2.Trendsetter_rank为3 print 'job_id:', job_id iter_count = Trendsetter_iter_max # ???迭代的最大次数---PageRank的迭代次数设置为1 print 'trendsetter rank direct_superior_network' ds_sorted_uids, ds_all_uid_tr = ts_rank(job_id, iter_count, input_tmp_path, top_N) print 'save trendsetter rank result' print 'len(ds_sorted_uids):', len(ds_sorted_uids) print 'len(ds_all_uid_tr):', len(ds_all_uid_tr) data = save_tr_results(topic, date, windowsize, ds_sorted_uids, ds_all_uid_tr) return ds_all_uid_tr, data # 标识保存是否成功
def test_hadoop_job_id(self): date = '2013-03-01' ts = datetime2ts(date) window_size = 1 topic_id = 1 job_id = generate_job_id(ts, window_size, topic_id) self.assertEqual(job_id, '2013_03_01_1_1', 'wrong job id')
def pagerank_rank(top_n, date, topic_id, window_size): data = [] tmp_file = prepare_data_for_pr(topic_id, date, window_size) if not tmp_file: return data input_tmp_path = tmp_file.name job_id = generate_job_id(datetime2ts(date), window_size, topic_id) iter_count = PAGERANK_ITER_MAX sorted_uids = pagerank(job_id, iter_count, input_tmp_path, top_n) print sorted_uids
def pagerank_rank(top_n, date, topic_id, window_size): data = [] tmp_file = prepare_data_for_pr(topic_id, date, window_size) if not tmp_file: return data input_tmp_path = tmp_file.name job_id = generate_job_id(datetime2ts(date), window_size, topic_id) iter_count = PAGERANK_ITER_MAX sorted_uids = pagerank(job_id, iter_count, input_tmp_path, top_n) topicname = acquire_topic_name(topic_id) if not topicname: return data data = save_rank_results(sorted_uids, 'topic', 'pagerank', date, window_size, topicname) return data
def trendsetter_rank(TOPK, date, topic_id, windowsize, topic, real_topic_id): data = None key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) ds_g = nx.read_gexf(str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf') tmp_file, ds_N = prepare_data_for_ts(topic_id, topic, date, windowsize, ds_g) top_N = ds_N # 选择保存TS前top_N的数据 print 'trendsetter rank start' if not tmp_file: return data input_tmp_path = tmp_file.name print 'input_tmp_path:', input_tmp_path job_id = generate_job_id(datetime2ts(date), windowsize, topic_id, ts_netwrok_type) # pagerank中g与ds_g分别对应的network_type分别为1,2.Trendsetter_rank为3 print 'job_id:', job_id iter_count = Trendsetter_iter_max # ???迭代的最大次数---PageRank的迭代次数设置为1 print 'trendsetter rank direct_superior_network' ds_sorted_uids, ds_all_uid_tr = ts_rank(job_id, iter_count, input_tmp_path, top_N) print 'save trendsetter rank result' print 'len(ds_sorted_uids):', len(ds_sorted_uids) print 'len(ds_all_uid_tr):', len(ds_all_uid_tr) data = save_tr_results(topic, date, windowsize, ds_sorted_uids, ds_all_uid_tr) return ds_all_uid_tr, data # 标识保存是否成功