Beispiel #1
0
 def func(*args, **kw):
     data = method(*args, **kw)
     method_name = method.__name__
     rank_range = method_name.split('_')[0]
     rank_field = method_name.split('_')[1]
     save_rank_results(data, rank_range, rank_field, args[1], args[2])
     return data
Beispiel #2
0
    def func(*args, **kw):
        data = method(*args, **kw)
        method_name = method.__name__
        rank_field = method_name.split('_')[0]
##        try:
##            print 'yuan'
        save_rank_results(data, 'whole', rank_field, args[1], args[2])
##        except:
##            print '%s save results failed, it is ok if not in web environment.' % method.__name__
        return data
Beispiel #3
0
def pagerank_rank(top_n, date, window_size, topicname):
    data = []

    #tmp_file, N, ds_tmp_file, ds_N = prepare_data_for_pr(topic_id, date, window_size, topicname, real_topic_id)
    #print '888888888888888888888888888888'
    print date, window_size, topicname
    tmp_file, N= prepare_data_for_pr( date, window_size, topicname)
    top_n = N
    #ds_top_n = ds_N
    print 'page_rank start'
    if not tmp_file:
        return data

    input_tmp_path = tmp_file.name
    #ds_input_tmp_path = ds_tmp_file.name
    #print input_tmp_path

    iter_count = PAGERANK_ITER_MAX
    print 'pagerank_source_network'
    sorted_uids, all_uid_pr = pagerank(iter_count, input_tmp_path, top_n) # 排序的uid的序列
    #print 'pagerank_direct_superior_network'
    #ds_sorted_uids, ds_all_uid_pr = pagerank(iter_count, ds_input_tmp_path, ds_top_n)
    print 'top_n:', top_n
    #print 'len(sorted_uid):', len(ds_sorted_uids)
    #print 'len(ds_all_uid_pr):', len(ds_all_uid_pr)
    #print 'ds_top_n:', ds_top_n
    #topicname = acquire_topic_name(topic_id)
    print 'topicname:', topicname
    if not topicname:
        return data
    print 'save_rank_results'
    data = save_rank_results(sorted_uids, 'topic', 'spark_pagerank', date, window_size, topicname, all_uid_pr)
    #ds_data = save_ds_rank_results(ds_sorted_uids, 'topic', 'spark_pagerank', date, window_size, topicname, ds_all_uid_pr)

    return all_uid_pr, data
Beispiel #4
0
def pagerank_rank(top_n, date, topic_id, window_size, topicname, real_topic_id):
    data = []

    tmp_file, N, ds_tmp_file, ds_N = prepare_data_for_pr(topic_id, date, window_size, topicname, real_topic_id)
    top_n = N
    ds_top_n = ds_N
    print 'page_rank start'
    if not tmp_file or not ds_tmp_file:
        return data

    input_tmp_path = tmp_file.name
    ds_input_tmp_path = ds_tmp_file.name
    print input_tmp_path, ds_input_tmp_path

    iter_count = PAGERANK_ITER_MAX
    print 'pagerank_source_network'
    sorted_uids, all_uid_pr = pagerank(iter_count, input_tmp_path, top_n) # 排序的uid的序列
    print 'pagerank_direct_superior_network'
    ds_sorted_uids, ds_all_uid_pr = pagerank(iter_count, ds_input_tmp_path, ds_top_n)
    print 'top_n:', top_n
    print 'len(sorted_uid):', len(ds_sorted_uids)
    print 'len(ds_all_uid_pr):', len(ds_all_uid_pr)
    print 'ds_top_n:', ds_top_n
    topicname = acquire_topic_name(topic_id)
    print 'topicname:', topicname
    if not topicname:
        return data
    print 'save_rank_results'
    data = save_rank_results(sorted_uids, 'topic', 'spark_pagerank', date, window_size, topicname, all_uid_pr)
    ds_data = save_ds_rank_results(ds_sorted_uids, 'topic', 'spark_pagerank', date, window_size, topicname, ds_all_uid_pr)

    return all_uid_pr, ds_all_uid_pr, data, ds_data
Beispiel #5
0
def pagerank_rank(top_n, date, topic_id, window_size):
    data = []

    tmp_file = prepare_data_for_pr(topic_id, date, window_size)

    if not tmp_file:
        return data

    input_tmp_path = tmp_file.name
    
    
    job_id = generate_job_id(datetime2ts(date), window_size, topic_id)
    iter_count = PAGERANK_ITER_MAX

    sorted_uids = pagerank(job_id, iter_count, input_tmp_path, top_n)
    
    data = save_rank_results(sorted_uids, 'area', 'pagerank', date, window_size, topic_id=topic_id)

    return data
Beispiel #6
0
def degree_rank(top_n, date, topic_id, window_size):
    data = []
    degree = prepare_data_for_degree(topic_id, date, window_size)

    if not degree:
        return data

    sorted_degree = sorted(degree.iteritems(), key=operator.itemgetter(1), reverse=True)
    sorted_uids = []
    count = 0
    for uid, value in sorted_degree:
        if count >= top_n:
            break
        sorted_uids.append(uid)
        count += 1

    data = save_rank_results(sorted_uids, 'area', 'degree', date, window_size, topic_id=topic_id)

    return data