예제 #1
0
def test_fm_sgr_ranking():
    w0, w, V, y, X = get_test_problem()
    X_test = X.copy()
    X_train = X.copy()

    import itertools
    pairs = [p for p in itertools.combinations(range(len(y)), 2)]
    compares = np.zeros((len(pairs), 2), dtype=np.float64)

    for i, p in enumerate(pairs):
        if y[p[0]] > y[p[1]]:
            compares[i, 0] = p[0]
            compares[i, 1] = p[1]
        else:
            compares[i, 0] = p[1]
            compares[i, 1] = p[0]

    print(compares)
    fm = bpr.FMRecommender(n_iter=2000,
                           init_stdev=0.01, l2_reg_w=.5, l2_reg_V=.5, rank=2,
                           step_size=.002, random_state=11)
    fm.fit(X_train, compares)
    y_pred = fm.predict(X_test)
    y_pred = np.argsort(y_pred)
    print(y)
    print(y_pred)
    print(np.argsort(y))
    assert utils.kendall_tau(np.argsort(y), y_pred) == 1
예제 #2
0
    def __init__(self, n_iter=100,
                 init_stdev=0.1,
                 rank=8,
                 random_state=123,
                 l2_reg_w=0.1,
                 l2_reg_V=0.1,
                 l2_reg=0,
                 step_size=0.1):

        self.fastFM = bpr.FMRecommender(n_iter=n_iter,
                                        init_stdev=init_stdev,
                                        rank=rank,
                                        random_state=random_state,
                                        l2_reg_w=l2_reg_w,
                                        l2_reg_V=l2_reg_V,
                                        l2_reg=l2_reg,
                                        step_size=step_size)
        self.URM = None
        self.ICM = None
        self.train_x = None
        self.train_y = None
        self.test_playlists = None
        self.test_tracks = None
        self.test_x = None
        self.train_length = 222689
        self.non_zeros = 0
        self.n_playlists = 45649
        self.n_tracks = 100000
        self.n_attributes = 77040
예제 #3
0
def train_model(x_train,
                y_train,
                n_iter,
                init_stdev=0.1,
                rank=2,
                l2_reg_w=0.1,
                l2_reg_V=0.5):
    fm = bpr.FMRecommender(n_iter=n_iter,
                           init_stdev=init_stdev,
                           rank=rank,
                           l2_reg_w=l2_reg_w,
                           l2_reg_V=l2_reg_V)
    fm.fit(x_train, y_train)
    return fm
예제 #4
0
def fastFMJob_bpr(data_path, params, N, vectorizer, train_sets, pairs, items):
    ndcgs = []
    logging.info("Evaluando con params: {0}".format(params))
    for i in range(1, 4 + 1):
        fm = bpr.FMRecommender(n_iter=params['mi'], init_stdev=params['init_stdev'], rank=params['f'], random_state=123, \
                    l2_reg_w=params['l2_reg_w'], l2_reg_V=params['l2_reg_V'], l2_reg=params['l2_reg'], step_size=params['step_size'])
        X_tr = vectorizer.transform(train_sets[i])
        fm.fit(X_tr, pairs[i])
        val_c = consumption(
            ratings_path='TwitterRatings/funkSVD/data/val/val_N' + str(N) +
            '.' + str(i),
            rel_thresh=0,
            with_ratings=True)
        train_c = consumption(
            ratings_path='TwitterRatings/funkSVD/data/train/train_N' + str(N) +
            '.' + str(i),
            rel_thresh=0,
            with_ratings=True)
        users_ndcgs = []
        for userId in val_c:
            val_data, y_va, _ = loadData_bpr('val/val_N' + str(N) + '.' +
                                             str(i),
                                             data_path=data_path,
                                             test=True,
                                             userId_va=userId)
            X_va = vectorizer.transform(val_data)
            preds = fm.predict(X_va)
            preds = np.argsort(-preds)
            users_ndcgs.append(
                ndcg_bpr(preds=preds,
                         vectorizer=vectorizer,
                         matrix=X_va,
                         user_data=train_c[userId],
                         user_val=val_c[userId],
                         N=N))
        fold_ndcg = mean(users_ndcgs)
        logging.info("FM fold {0} nDCG: {1}. Solver: BPR".format(i, fold_ndcg))
        ndcgs.append(fold_ndcg)
    return mean(ndcgs)
예제 #5
0
def fastFM_protocol_evaluation_bpr(data_path, params):
    # userId = '33120270'
    solr = "http://localhost:8983/solr/grrecsys"
    all_data, y_all, items = loadData_bpr("eval_all_N20.data",
                                          data_path=data_path)
    v = DictVectorizer()
    X_all = v.fit_transform(all_data)

    test_c = consumption(
        ratings_path='TwitterRatings/funkSVD/data/test/test_N20.data',
        rel_thresh=0,
        with_ratings=True)
    train_c = consumption(
        ratings_path='TwitterRatings/funkSVD/data/eval_train_N20.data',
        rel_thresh=0,
        with_ratings=False)
    all_c = consumption(
        ratings_path='TwitterRatings/funkSVD/data/eval_all_N20.data',
        rel_thresh=0,
        with_ratings=True)
    MRRs = dict((N, []) for N in [5, 10, 15, 20])
    nDCGs = dict((N, []) for N in [5, 10, 15, 20])
    APs = dict((N, []) for N in [5, 10, 15, 20])
    Rprecs = dict((N, []) for N in [5, 10, 15, 20])

    train_data, y_tr, _ = loadData_bpr('eval_train_N20.data',
                                       data_path=data_path)
    X_tr = v.transform(train_data)
    fm = bpr.FMRecommender(n_iter=params['mi'], init_stdev=params['init_stdev'], rank=params['f'], random_state=123, \
                l2_reg_w=params['l2_reg_w'], l2_reg_V=params['l2_reg_V'], l2_reg=params['l2_reg'], step_size=params['step_size'])
    pairs_tr = make_pairs(X_tr, y_tr)
    fm.fit(X_tr, pairs_tr)

    p = 0
    for userId in test_c:
        logging.info("#u: {0}/{1}".format(p, len(test_c)))
        p += 1
        user_rows = [{
            'user_id': str(userId),
            'item_id': str(itemId)
        } for itemId in items]
        X_te = v.transform(user_rows)
        preds = fm.predict(X_te)
        preds = np.argsort(-preds)  # ordenamos las predicciones
        book_recs = []
        for i in range(len(preds)):
            print("i={}".format(i))
            pred_row = preds[i]
            l = v.inverse_transform(X_te[pred_row, :])[0].keys()
            pred_itemId = [s for s in l if "item" in s][0].split('=')[-1]
            book_recs.append(pred_itemId)
            if i == 100: break  # no necesitamos más de 100

        book_recs = remove_consumed(user_consumption=train_c[userId],
                                    rec_list=book_recs)
        book_recs = recs_cleaner(solr=solr,
                                 consumpt=train_c[userId],
                                 recs=book_recs[:100])
        recs = user_ranked_recs(user_recs=book_recs,
                                user_consumpt=test_c[userId])

        for N in [5, 10, 15, 20]:
            mini_recs = dict((k, recs[k]) for k in recs.keys()[:N])
            MRRs[N].append(MRR(recs=mini_recs, rel_thresh=1))
            nDCGs[N].append(
                nDCG(recs=mini_recs, alt_form=False, rel_thresh=False))
            APs[N].append(AP_at_N(n=N, recs=recs, rel_thresh=1))
            Rprecs[N].append(R_precision(n_relevants=N, recs=mini_recs))

    for N in [5, 10, 15, 20]:
        with open('TwitterRatings/fastFM/bpr/clean/protocol.txt', 'a') as file:
            file.write( "N=%s, nDCG=%s, MAP=%s, MRR=%s, R-precision=%s\n" % \
             (N, mean(nDCGs[N]), mean(APs[N]), mean(MRRs[N]), mean(Rprecs[N])) )
예제 #6
0
                  np.random.randint(0, nItem, sap)] = 1
    beginY = sap * i
    Y_train[beginY:beginY + sap, 0] = begin
    Y_train[beginY:beginY + sap, 1] = np.arange(begin + 1, begin + sap + 1)
    if i % 1000 == 0:
        print("Constructing %d/%d" % (i, nTrain))

X_train = hstack([X_ItemTrain, X_TargetTrain]).tocsc()

# Build Model
print('Start training')
embed_len = 20
fm = bpr.FMRecommender(n_iter=5000000,
                       init_stdev=0.1,
                       rank=embed_len,
                       l2_reg_w=0,
                       l2_reg_V=0,
                       l2_reg=0,
                       step_size=0.1)
fm.fit(X_train, Y_train)

print('Start evaluation')

X_Target = eye(nItem + 1, dtype=np.float32)

atK = np.arange(5, 51, 5)
ranklist = []
nTest = len(testLast)
recs = []
for i, row in enumerate(testLast):
    # X_User = lil_matrix((nItem + 1, nUser + 1), dtype=np.float32)