def test_fm_sgr_ranking(): w0, w, V, y, X = get_test_problem() X_test = X.copy() X_train = X.copy() import itertools pairs = [p for p in itertools.combinations(range(len(y)), 2)] compares = np.zeros((len(pairs), 2), dtype=np.float64) for i, p in enumerate(pairs): if y[p[0]] > y[p[1]]: compares[i, 0] = p[0] compares[i, 1] = p[1] else: compares[i, 0] = p[1] compares[i, 1] = p[0] print(compares) fm = bpr.FMRecommender(n_iter=2000, init_stdev=0.01, l2_reg_w=.5, l2_reg_V=.5, rank=2, step_size=.002, random_state=11) fm.fit(X_train, compares) y_pred = fm.predict(X_test) y_pred = np.argsort(y_pred) print(y) print(y_pred) print(np.argsort(y)) assert utils.kendall_tau(np.argsort(y), y_pred) == 1
def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123, l2_reg_w=0.1, l2_reg_V=0.1, l2_reg=0, step_size=0.1): self.fastFM = bpr.FMRecommender(n_iter=n_iter, init_stdev=init_stdev, rank=rank, random_state=random_state, l2_reg_w=l2_reg_w, l2_reg_V=l2_reg_V, l2_reg=l2_reg, step_size=step_size) self.URM = None self.ICM = None self.train_x = None self.train_y = None self.test_playlists = None self.test_tracks = None self.test_x = None self.train_length = 222689 self.non_zeros = 0 self.n_playlists = 45649 self.n_tracks = 100000 self.n_attributes = 77040
def train_model(x_train, y_train, n_iter, init_stdev=0.1, rank=2, l2_reg_w=0.1, l2_reg_V=0.5): fm = bpr.FMRecommender(n_iter=n_iter, init_stdev=init_stdev, rank=rank, l2_reg_w=l2_reg_w, l2_reg_V=l2_reg_V) fm.fit(x_train, y_train) return fm
def fastFMJob_bpr(data_path, params, N, vectorizer, train_sets, pairs, items): ndcgs = [] logging.info("Evaluando con params: {0}".format(params)) for i in range(1, 4 + 1): fm = bpr.FMRecommender(n_iter=params['mi'], init_stdev=params['init_stdev'], rank=params['f'], random_state=123, \ l2_reg_w=params['l2_reg_w'], l2_reg_V=params['l2_reg_V'], l2_reg=params['l2_reg'], step_size=params['step_size']) X_tr = vectorizer.transform(train_sets[i]) fm.fit(X_tr, pairs[i]) val_c = consumption( ratings_path='TwitterRatings/funkSVD/data/val/val_N' + str(N) + '.' + str(i), rel_thresh=0, with_ratings=True) train_c = consumption( ratings_path='TwitterRatings/funkSVD/data/train/train_N' + str(N) + '.' + str(i), rel_thresh=0, with_ratings=True) users_ndcgs = [] for userId in val_c: val_data, y_va, _ = loadData_bpr('val/val_N' + str(N) + '.' + str(i), data_path=data_path, test=True, userId_va=userId) X_va = vectorizer.transform(val_data) preds = fm.predict(X_va) preds = np.argsort(-preds) users_ndcgs.append( ndcg_bpr(preds=preds, vectorizer=vectorizer, matrix=X_va, user_data=train_c[userId], user_val=val_c[userId], N=N)) fold_ndcg = mean(users_ndcgs) logging.info("FM fold {0} nDCG: {1}. Solver: BPR".format(i, fold_ndcg)) ndcgs.append(fold_ndcg) return mean(ndcgs)
def fastFM_protocol_evaluation_bpr(data_path, params): # userId = '33120270' solr = "http://localhost:8983/solr/grrecsys" all_data, y_all, items = loadData_bpr("eval_all_N20.data", data_path=data_path) v = DictVectorizer() X_all = v.fit_transform(all_data) test_c = consumption( ratings_path='TwitterRatings/funkSVD/data/test/test_N20.data', rel_thresh=0, with_ratings=True) train_c = consumption( ratings_path='TwitterRatings/funkSVD/data/eval_train_N20.data', rel_thresh=0, with_ratings=False) all_c = consumption( ratings_path='TwitterRatings/funkSVD/data/eval_all_N20.data', rel_thresh=0, with_ratings=True) MRRs = dict((N, []) for N in [5, 10, 15, 20]) nDCGs = dict((N, []) for N in [5, 10, 15, 20]) APs = dict((N, []) for N in [5, 10, 15, 20]) Rprecs = dict((N, []) for N in [5, 10, 15, 20]) train_data, y_tr, _ = loadData_bpr('eval_train_N20.data', data_path=data_path) X_tr = v.transform(train_data) fm = bpr.FMRecommender(n_iter=params['mi'], init_stdev=params['init_stdev'], rank=params['f'], random_state=123, \ l2_reg_w=params['l2_reg_w'], l2_reg_V=params['l2_reg_V'], l2_reg=params['l2_reg'], step_size=params['step_size']) pairs_tr = make_pairs(X_tr, y_tr) fm.fit(X_tr, pairs_tr) p = 0 for userId in test_c: logging.info("#u: {0}/{1}".format(p, len(test_c))) p += 1 user_rows = [{ 'user_id': str(userId), 'item_id': str(itemId) } for itemId in items] X_te = v.transform(user_rows) preds = fm.predict(X_te) preds = np.argsort(-preds) # ordenamos las predicciones book_recs = [] for i in range(len(preds)): print("i={}".format(i)) pred_row = preds[i] l = v.inverse_transform(X_te[pred_row, :])[0].keys() pred_itemId = [s for s in l if "item" in s][0].split('=')[-1] book_recs.append(pred_itemId) if i == 100: break # no necesitamos más de 100 book_recs = remove_consumed(user_consumption=train_c[userId], rec_list=book_recs) book_recs = recs_cleaner(solr=solr, consumpt=train_c[userId], recs=book_recs[:100]) recs = user_ranked_recs(user_recs=book_recs, user_consumpt=test_c[userId]) for N in [5, 10, 15, 20]: mini_recs = dict((k, recs[k]) for k in recs.keys()[:N]) MRRs[N].append(MRR(recs=mini_recs, rel_thresh=1)) nDCGs[N].append( nDCG(recs=mini_recs, alt_form=False, rel_thresh=False)) APs[N].append(AP_at_N(n=N, recs=recs, rel_thresh=1)) Rprecs[N].append(R_precision(n_relevants=N, recs=mini_recs)) for N in [5, 10, 15, 20]: with open('TwitterRatings/fastFM/bpr/clean/protocol.txt', 'a') as file: file.write( "N=%s, nDCG=%s, MAP=%s, MRR=%s, R-precision=%s\n" % \ (N, mean(nDCGs[N]), mean(APs[N]), mean(MRRs[N]), mean(Rprecs[N])) )
np.random.randint(0, nItem, sap)] = 1 beginY = sap * i Y_train[beginY:beginY + sap, 0] = begin Y_train[beginY:beginY + sap, 1] = np.arange(begin + 1, begin + sap + 1) if i % 1000 == 0: print("Constructing %d/%d" % (i, nTrain)) X_train = hstack([X_ItemTrain, X_TargetTrain]).tocsc() # Build Model print('Start training') embed_len = 20 fm = bpr.FMRecommender(n_iter=5000000, init_stdev=0.1, rank=embed_len, l2_reg_w=0, l2_reg_V=0, l2_reg=0, step_size=0.1) fm.fit(X_train, Y_train) print('Start evaluation') X_Target = eye(nItem + 1, dtype=np.float32) atK = np.arange(5, 51, 5) ranklist = [] nTest = len(testLast) recs = [] for i, row in enumerate(testLast): # X_User = lil_matrix((nItem + 1, nUser + 1), dtype=np.float32)