def exp(): ''' 编写实验逻辑 ''' filepath = r'./data/netflix5k_result.txt' train_data, test_data = readData(filepath, split=',', train_ratio=0.7) train_data = train_data.rename(columns={0: 'uid', 1: 'iid'}) test_data = test_data.rename(columns={0: 'uid', 1: 'iid'}) train, _, udegree, idegree = process_data(train_data, test_data) # userid从1开始的情况 total_item_score = np.zeros(train.shape[1], dtype=np.float64) for user in tqdm(range(1, train.shape[0]), ascii=True): # userid从0开始的情况 # for user in range(train.shape[0]): one_item_score = massDiffisionForOne(train, user, udegree, idegree, K=1000) total_item_score += one_item_score # 获得度-itemset 分布信息 Ndegree_items = getNdegree_items(idegree, N=20) # 获得testset item度分布 test_item_degree = test_data.iid.value_counts() corr_score = trend_predict(total_item_score, Ndegree_items, test_item_degree, method='pearson') print(corr_score)
def exp(mylambda): ''' 编写实验逻辑 ''' # score_filepath = 'temp/md_delicious111.pkl' # filepath = r'./data/delicious/delicious_subset2.txt' # filepath = r'./data/Amazon/amazon_gpu.csv' # score_filepath = 'temp/md_amazon_nok1.pkl' # filepath = r'./data/netflix5k_result.txt' # score_filepath = 'temp/md_nf_noknn1.pkl' filepath = r'./data/movielen5000_7533_link864581_day0_1096.txt' score_filepath = 'temp/md_ml_noknn.pkl' # train_data, test_data = readData(filepath, split='\t', train_ratio=0.7) train_data, test_data = readData(filepath, split=',', train_ratio=0.7) train_data = train_data.rename(columns={0:'uid',1:'iid'}) test_data = test_data.rename(columns={0:'uid',1:'iid'}) train, _, udegree, idegree = process_data(train_data, test_data) # 获得度-itemset 分布信息 degreedistrev = degree_item_map(idegree) Ndegree_items = getNdegree_items(degreedistrev, N=10) # 获得testset item度分布 test_item_degree = test_data.iid.value_counts() # userid从1开始的情况 total_item_score = np.zeros(train.shape[1], dtype=np.float64) if os.path.exists(score_filepath): item_scores = pickle.load(open(score_filepath,'rb')) for user in tqdm(range(1,train.shape[0]),ascii=True): if udegree.get(user, 0.0) == 0.0: continue one_item_score = item_scores[user] total_item_score += one_item_score * pow(udegree.get(user), mylambda) else: item_scores = {} for user in tqdm(range(1,train.shape[0]),ascii=True): # userid从0开始的情况 # for user in range(train.shape[0]): if udegree.get(user, 0.0) == 0.0: continue one_item_score = massDiffisionForOne(train, user, udegree, idegree, mylambda) total_item_score += one_item_score * pow(udegree.get(user), mylambda) item_scores[user] = one_item_score pickle.dump(item_scores, open(score_filepath,'wb')) corr_score = trend_predict(total_item_score, Ndegree_items, test_item_degree, method='pearson') print(corr_score) return corr_score