def func(*args, **kw): data = method(*args, **kw) method_name = method.__name__ rank_range = method_name.split('_')[0] rank_field = method_name.split('_')[1] save_rank_results(data, rank_range, rank_field, args[1], args[2]) return data
def func(*args, **kw): data = method(*args, **kw) method_name = method.__name__ rank_field = method_name.split('_')[0] ## try: ## print 'yuan' save_rank_results(data, 'whole', rank_field, args[1], args[2]) ## except: ## print '%s save results failed, it is ok if not in web environment.' % method.__name__ return data
def pagerank_rank(top_n, date, window_size, topicname): data = [] #tmp_file, N, ds_tmp_file, ds_N = prepare_data_for_pr(topic_id, date, window_size, topicname, real_topic_id) #print '888888888888888888888888888888' print date, window_size, topicname tmp_file, N= prepare_data_for_pr( date, window_size, topicname) top_n = N #ds_top_n = ds_N print 'page_rank start' if not tmp_file: return data input_tmp_path = tmp_file.name #ds_input_tmp_path = ds_tmp_file.name #print input_tmp_path iter_count = PAGERANK_ITER_MAX print 'pagerank_source_network' sorted_uids, all_uid_pr = pagerank(iter_count, input_tmp_path, top_n) # 排序的uid的序列 #print 'pagerank_direct_superior_network' #ds_sorted_uids, ds_all_uid_pr = pagerank(iter_count, ds_input_tmp_path, ds_top_n) print 'top_n:', top_n #print 'len(sorted_uid):', len(ds_sorted_uids) #print 'len(ds_all_uid_pr):', len(ds_all_uid_pr) #print 'ds_top_n:', ds_top_n #topicname = acquire_topic_name(topic_id) print 'topicname:', topicname if not topicname: return data print 'save_rank_results' data = save_rank_results(sorted_uids, 'topic', 'spark_pagerank', date, window_size, topicname, all_uid_pr) #ds_data = save_ds_rank_results(ds_sorted_uids, 'topic', 'spark_pagerank', date, window_size, topicname, ds_all_uid_pr) return all_uid_pr, data
def pagerank_rank(top_n, date, topic_id, window_size, topicname, real_topic_id): data = [] tmp_file, N, ds_tmp_file, ds_N = prepare_data_for_pr(topic_id, date, window_size, topicname, real_topic_id) top_n = N ds_top_n = ds_N print 'page_rank start' if not tmp_file or not ds_tmp_file: return data input_tmp_path = tmp_file.name ds_input_tmp_path = ds_tmp_file.name print input_tmp_path, ds_input_tmp_path iter_count = PAGERANK_ITER_MAX print 'pagerank_source_network' sorted_uids, all_uid_pr = pagerank(iter_count, input_tmp_path, top_n) # 排序的uid的序列 print 'pagerank_direct_superior_network' ds_sorted_uids, ds_all_uid_pr = pagerank(iter_count, ds_input_tmp_path, ds_top_n) print 'top_n:', top_n print 'len(sorted_uid):', len(ds_sorted_uids) print 'len(ds_all_uid_pr):', len(ds_all_uid_pr) print 'ds_top_n:', ds_top_n topicname = acquire_topic_name(topic_id) print 'topicname:', topicname if not topicname: return data print 'save_rank_results' data = save_rank_results(sorted_uids, 'topic', 'spark_pagerank', date, window_size, topicname, all_uid_pr) ds_data = save_ds_rank_results(ds_sorted_uids, 'topic', 'spark_pagerank', date, window_size, topicname, ds_all_uid_pr) return all_uid_pr, ds_all_uid_pr, data, ds_data
def pagerank_rank(top_n, date, topic_id, window_size): data = [] tmp_file = prepare_data_for_pr(topic_id, date, window_size) if not tmp_file: return data input_tmp_path = tmp_file.name job_id = generate_job_id(datetime2ts(date), window_size, topic_id) iter_count = PAGERANK_ITER_MAX sorted_uids = pagerank(job_id, iter_count, input_tmp_path, top_n) data = save_rank_results(sorted_uids, 'area', 'pagerank', date, window_size, topic_id=topic_id) return data
def degree_rank(top_n, date, topic_id, window_size): data = [] degree = prepare_data_for_degree(topic_id, date, window_size) if not degree: return data sorted_degree = sorted(degree.iteritems(), key=operator.itemgetter(1), reverse=True) sorted_uids = [] count = 0 for uid, value in sorted_degree: if count >= top_n: break sorted_uids.append(uid) count += 1 data = save_rank_results(sorted_uids, 'area', 'degree', date, window_size, topic_id=topic_id) return data