def main(k=100, user_sim=None, data_file=r'./data/movielens_data.pkl'): train, test = pickle.load(open(data_file, 'rb+')) train_itemset = stat_train_test_item(train) test_itemset = stat_train_test_item(test) trainset, test, item_len = utils.deal_train(data_file) cf = UserCF(trainset, test, item_len, k=k) degreedistrev = degree_item_map(cf) Ndegree_items, Nitemset = getNdegree_items(degreedistrev, N=10) start = time.time() print('calculate similarity.') if user_sim is None: user_sim = cf.similarity() else: cf.user_sim = user_sim print('calculate similarity finished. cost {:.2f}s'.format(time.time() - start)) start = time.time() recommend_score = cf.get_score(Nitemset) print('calculate recommender score. cost {:.2f}s'.format(time.time() - start)) user_degree = cf.cal_user_degree() item_score = get_item_score(user_degree, recommend_score, item_len) test_item_degree = get_test_degree(test) print('start trend predict.') corr_score = trend_predict(item_score, Ndegree_items, test_item_degree, method='pearson') print(corr_score) return corr_score, recommend_score, user_sim
def main(our_lambda=1,data_file=r'./data/movielens_data.pkl',recommend_score_file=r'./temp/cf_score.pkl', recommend_score=None): train, test = pickle.load(open(data_file, 'rb+')) train_itemset = stat_train_test_item(train) test_itemset = stat_train_test_item(test) trainset, test, item_len = utils.deal_train(data_file) cf = UserCF(trainset, test, item_len) degreedistrev = degree_item_map(cf) # get_item_degree_distribute(cf) print('start cf train.') if os.path.exists(recommend_score_file): # 判断cf是否训练过 with open(recommend_score_file,'rb') as f: recommend_score = pickle.load(f) else: recommend_score = cf.cf_train() with open(recommend_score_file,'wb') as f: pickle.dump(recommend_score, f) # if recommend_score is None: # recommend_score = cf.cf_train() user_degree = cf.cal_user_degree() item_score = get_item_score(user_degree, recommend_score, item_len, our_lambda=our_lambda) Ndegree_items = getNdegree_items(degreedistrev, N=10) test_item_degree = get_test_degree(test) print('start trend predict.') corr_score = trend_predict(item_score, Ndegree_items,test_item_degree, train_itemset, test_itemset, method='pearson') print(corr_score) return corr_score, recommend_score
def degree_item_map(cf): ''' 建立degree-item倒排表,用于统计训练集中item的degree信息 ''' item_degrees = cf.cal_item_degree() degreedistrev = {} for iid, degree in item_degrees.items(): if degree not in degreedistrev: degreedistrev[degree] = [] degreedistrev[degree].append(iid) return degreedistrev if __name__ == "__main__": train_set, test, item_len = utils.deal_train(r'./data/movielens_data.pkl') cf = UserCF(train_set, item_len) cf.cal_user_degree() cf.similarity() # 定义一个浮点跳步器 def frange(x, y, jump): while x < y: yield x x += jump rec_score = cf.get_item_score() degreedistrev = degree_item_map(cf)