Example #1
0
def trendsetter_rank(TOPK, date, topic_id, windowsize, topic, real_topic_id):
    data = None
    key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
    ds_g = nx.read_gexf(str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf')
    tmp_file, ds_N = prepare_data_for_ts(topic_id, topic, date, windowsize,
                                         ds_g)
    top_N = ds_N  # 选择保存TS前top_N的数据
    print 'trendsetter rank start'
    if not tmp_file:
        return data

    input_tmp_path = tmp_file.name
    print 'input_tmp_path:', input_tmp_path
    job_id = generate_job_id(
        datetime2ts(date), windowsize, topic_id, ts_netwrok_type
    )  # pagerank中g与ds_g分别对应的network_type分别为1,2.Trendsetter_rank为3
    print 'job_id:', job_id

    iter_count = Trendsetter_iter_max  # ???迭代的最大次数---PageRank的迭代次数设置为1
    print 'trendsetter rank direct_superior_network'
    ds_sorted_uids, ds_all_uid_tr = ts_rank(job_id, iter_count, input_tmp_path,
                                            top_N)
    print 'save trendsetter rank result'
    print 'len(ds_sorted_uids):', len(ds_sorted_uids)
    print 'len(ds_all_uid_tr):', len(ds_all_uid_tr)
    data = save_tr_results(topic, date, windowsize, ds_sorted_uids,
                           ds_all_uid_tr)

    return ds_all_uid_tr, data  # 标识保存是否成功
Example #2
0
 def test_hadoop_job_id(self):
     date = '2013-03-01'
     ts = datetime2ts(date)
     window_size = 1
     topic_id = 1
     job_id = generate_job_id(ts, window_size, topic_id)
     self.assertEqual(job_id, '2013_03_01_1_1', 'wrong job id')        
Example #3
0
def pagerank_rank(top_n, date, topic_id, window_size):
    data = []

    tmp_file = prepare_data_for_pr(topic_id, date, window_size)

    if not tmp_file:
        return data

    input_tmp_path = tmp_file.name
    
    
    job_id = generate_job_id(datetime2ts(date), window_size, topic_id)
    iter_count = PAGERANK_ITER_MAX

    sorted_uids = pagerank(job_id, iter_count, input_tmp_path, top_n)

    print sorted_uids
Example #4
0
def pagerank_rank(top_n, date, topic_id, window_size):
    data = []

    tmp_file = prepare_data_for_pr(topic_id, date, window_size)

    if not tmp_file:
        return data

    input_tmp_path = tmp_file.name
    
    job_id = generate_job_id(datetime2ts(date), window_size, topic_id)
    iter_count = PAGERANK_ITER_MAX

    sorted_uids = pagerank(job_id, iter_count, input_tmp_path, top_n)

    topicname = acquire_topic_name(topic_id)
    if not topicname:
        return data

    data = save_rank_results(sorted_uids, 'topic', 'pagerank', date, window_size, topicname)

    return data
Example #5
0
def trendsetter_rank(TOPK, date, topic_id, windowsize, topic, real_topic_id):
    data = None
    key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
    ds_g = nx.read_gexf(str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf')
    tmp_file, ds_N = prepare_data_for_ts(topic_id, topic, date, windowsize, ds_g)
    top_N = ds_N # 选择保存TS前top_N的数据
    print 'trendsetter rank start'
    if not tmp_file:
        return data

    input_tmp_path = tmp_file.name
    print 'input_tmp_path:', input_tmp_path
    job_id = generate_job_id(datetime2ts(date), windowsize, topic_id, ts_netwrok_type) # pagerank中g与ds_g分别对应的network_type分别为1,2.Trendsetter_rank为3
    print 'job_id:', job_id

    iter_count = Trendsetter_iter_max # ???迭代的最大次数---PageRank的迭代次数设置为1
    print 'trendsetter rank direct_superior_network'
    ds_sorted_uids, ds_all_uid_tr = ts_rank(job_id, iter_count, input_tmp_path, top_N)
    print 'save trendsetter rank result'
    print 'len(ds_sorted_uids):', len(ds_sorted_uids)
    print 'len(ds_all_uid_tr):', len(ds_all_uid_tr)
    data = save_tr_results(topic, date, windowsize, ds_sorted_uids, ds_all_uid_tr)

    return ds_all_uid_tr, data # 标识保存是否成功