Example #1
0
def test_movielens_accuracy_sample_weights_grad_accumulation():

    # Set weights to zero for all even-numbered users
    # and check that they have not accumulated any
    # gradient updates.

    weights = train.copy()
    weights.data = np.ones(train.getnnz(),
                           dtype=np.float32)
    even_users = weights.row % 2 == 0
    weights.data *= even_users

    even_idx = np.arange(train.shape[0]) % 2 == 0
    odd_idx = np.arange(train.shape[0]) % 2 != 0

    for loss in ('logistic', 'bpr', 'warp'):
        model = LightFM(loss=loss, random_state=SEED)

        model.fit_partial(train,
                          sample_weight=weights,
                          epochs=1)

        assert np.allclose(model.user_embedding_gradients[odd_idx], 1.0)
        assert np.allclose(model.user_bias_gradients[odd_idx], 1.0)

        assert not np.allclose(model.user_embedding_gradients[even_idx], 1.0)
        assert not np.allclose(model.user_bias_gradients[even_idx], 1.0)
Example #2
0
def test_auc_score():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss='bpr')
    model.fit_partial(train)

    auc = evaluation.auc_score(model,
                               test,
                               num_threads=2)
    expected_auc = np.array(_auc(model,
                                 test))

    assert auc.shape == expected_auc.shape
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
    assert len(auc) == (test.getnnz(axis=1) > 0).sum()
    assert len(evaluation.auc_score(model,
                                    train,
                                    preserve_rows=True)) == test.shape[0]

    # With omitting train interactions
    auc = evaluation.auc_score(model,
                               test,
                               train_interactions=train,
                               num_threads=2)
    expected_auc = np.array(_auc(model,
                                 test,
                                 train))
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
Example #3
0
def test_input_dtypes():

    dtypes = (np.int32,
              np.int64,
              np.float32,
              np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for dtype in dtypes:
        train = sp.coo_matrix((no_users,
                               no_items),
                              dtype=dtype)

        user_features = sp.coo_matrix((no_users,
                                       no_features),
                                      dtype=dtype)
        item_features = sp.coo_matrix((no_items,
                                       no_features),
                                      dtype=dtype)

        model = LightFM()
        model.fit_partial(train,
                          user_features=user_features,
                          item_features=item_features)

        model.predict(np.random.randint(0, no_users, 10).astype(np.int32),
                      np.random.randint(0, no_items, 10).astype(np.int32),
                      user_features=user_features,
                      item_features=item_features)
Example #4
0
def test_matrix_types():

    mattypes = (sp.coo_matrix, sp.lil_matrix, sp.csr_matrix, sp.csc_matrix)

    dtypes = (np.int32, np.int64, np.float32, np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for mattype in mattypes:
        for dtype in dtypes:
            train = mattype((no_users, no_items), dtype=dtype)

            user_features = mattype((no_users, no_features), dtype=dtype)
            item_features = mattype((no_items, no_features), dtype=dtype)

            model = LightFM()
            model.fit_partial(train, user_features=user_features, item_features=item_features)

            model.predict(
                np.random.randint(0, no_users, 10).astype(np.int32),
                np.random.randint(0, no_items, 10).astype(np.int32),
                user_features=user_features,
                item_features=item_features,
            )
Example #5
0
def test_warp_kos_precision():

    # Remove all negative examples
    training = train.copy()
    training.data[training.data < 1] = 0
    training = training.tocsr()
    training.eliminate_zeros()

    model = LightFM(learning_rate=0.05, k=5,
                    loss='warp-kos',
                    random_state=SEED)

    model.fit_partial(training,
                      epochs=10)

    (train_precision,
     test_precision,
     full_train_auc,
     full_test_auc) = _get_metrics(model,
                                   train,
                                   test)

    assert train_precision > 0.44
    assert test_precision > 0.06

    assert full_train_auc > 0.9
    assert full_test_auc > 0.87
Example #6
0
def test_feature_inference_fails():

    # On predict if we try to use feature inference and supply
    # higher ids than the number of features that were supplied to fit
    # we should complain

    no_users, no_items = (10, 100)
    no_features = 20

    train = sp.coo_matrix((no_users,
                           no_items),
                          dtype=np.int32)

    user_features = sp.csr_matrix((no_users,
                                   no_features),
                                  dtype=np.int32)
    item_features = sp.csr_matrix((no_items,
                                   no_features),
                                  dtype=np.int32)
    model = LightFM()
    model.fit_partial(train,
                      user_features=user_features,
                      item_features=item_features)

    with pytest.raises(AssertionError):
        model.predict(np.array([no_features], dtype=np.int32),
                      np.array([no_features], dtype=np.int32))
Example #7
0
def test_warp_kos_precision():

    # Remove all negative examples
    training = train.copy()
    training.data[training.data < 1] = 0
    training = training.tocsr()
    training.eliminate_zeros()

    model = LightFM(learning_rate=0.05, k=5,
                    loss='warp-kos')

    model.fit_partial(training,
                      epochs=10)

    train_precision = precision_at_k(model,
                                     training,
                                     10)
    test_precision = precision_at_k(model,
                                    test,
                                    10)

    full_train_auc = full_auc(model, training)
    full_test_auc = full_auc(model, test)

    assert train_precision > 0.44
    assert test_precision > 0.06

    assert full_train_auc > 0.9
    assert full_test_auc > 0.87
Example #8
0
def test_movielens_accuracy_sample_weights():
    # Scaling weights down and learning rate up
    # by the same amount should result in
    # roughly the same accuracy

    scale = 1e-01
    weights = train.copy()
    weights.data = np.ones(train.getnnz(),
                           dtype=np.float32) * scale

    for (loss, exp_score) in (('logistic', 0.74),
                              ('bpr', 0.84),
                              ('warp', 0.89)):
        model = LightFM(loss=loss, random_state=SEED)
        model.learning_rate * 1.0 / scale

        model.fit_partial(train,
                          sample_weight=weights,
                          epochs=10)

        (train_precision,
         test_precision,
         full_train_auc,
         full_test_auc) = _get_metrics(model,
                                       train,
                                       test)

        assert full_train_auc > exp_score
Example #9
0
def test_warp_precision_adadelta_multithreaded():

    model = LightFM(learning_schedule='adadelta',
                    rho=0.95,
                    epsilon=0.000001,
                    loss='warp')

    model.fit_partial(train,
                      epochs=10,
                      num_threads=4)

    train_precision = precision_at_k(model,
                                     train,
                                     10)
    test_precision = precision_at_k(model,
                                    test,
                                    10)

    full_train_auc = full_auc(model, train)
    full_test_auc = full_auc(model, test)

    assert train_precision > 0.45
    assert test_precision > 0.07

    assert full_train_auc > 0.94
    assert full_test_auc > 0.9
Example #10
0
def test_training_schedules():

    model = LightFM(no_components=10,
                    learning_schedule='adagrad',
                    random_state=SEED)
    model.fit_partial(train,
                      epochs=0)

    assert (model.item_embedding_gradients == 1).all()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients == 1).all()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients == 1).all()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients == 1).all()
    assert (model.user_bias_momentum == 0).all()

    model.fit_partial(train,
                      epochs=1)

    assert (model.item_embedding_gradients > 1).any()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients > 1).any()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients > 1).any()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients > 1).any()
    assert (model.user_bias_momentum == 0).all()

    model = LightFM(no_components=10,
                    learning_schedule='adadelta',
                    random_state=SEED)
    model.fit_partial(train,
                      epochs=0)

    assert (model.item_embedding_gradients == 0).all()
    assert (model.item_embedding_momentum == 0).all()
    assert (model.item_bias_gradients == 0).all()
    assert (model.item_bias_momentum == 0).all()

    assert (model.user_embedding_gradients == 0).all()
    assert (model.user_embedding_momentum == 0).all()
    assert (model.user_bias_gradients == 0).all()
    assert (model.user_bias_momentum == 0).all()

    model.fit_partial(train,
                      epochs=1)

    assert (model.item_embedding_gradients > 0).any()
    assert (model.item_embedding_momentum > 0).any()
    assert (model.item_bias_gradients > 0).any()
    assert (model.item_bias_momentum > 0).any()

    assert (model.user_embedding_gradients > 0).any()
    assert (model.user_embedding_momentum > 0).any()
    assert (model.user_bias_gradients > 0).any()
    assert (model.user_bias_momentum > 0).any()
Example #11
0
def test_empty_matrix():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)
Example #12
0
def test_return_self():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    assert model.fit_partial(train) is model
    assert model.fit(train) is model
Example #13
0
def fit_lightfm_model():
	""" Fit the lightFM model 
	
	returns d_user_pred, list_user, list_coupon
	list_coupon = list of test coupons 
	list_user = list of user ID 
	d_user_pred : key = user, value = predicted ranking of coupons in list_coupon
	"""

	#Load data
	Mui_train = spi.mmread("../Data/Data_translated/biclass_user_item_train_mtrx.mtx")
	uf        = spi.mmread("../Data/Data_translated/user_feat_mtrx.mtx")
	itrf      = spi.mmread("../Data/Data_translated/train_item_feat_mtrx.mtx")
	itef      = spi.mmread("../Data/Data_translated/test_item_feat_mtrx.mtx")
	
	#Print shapes as a check
	print "user_features shape: %s,\nitem train features shape: %s,\nitem test features shape: %s"   % (uf.shape, itrf.shape, itef.shape)
	
	#Load test coupon  and user lists
	cplte       = pd.read_csv("../Data/Data_translated/coupon_list_test_translated.csv")
	ulist       = pd.read_csv("../Data/Data_translated/user_list_translated.csv")
	list_coupon = cplte["COUPON_ID_hash"].values
	list_user   = ulist["USER_ID_hash"].values
	
	#Build model
	no_comp, lr, ep = 10, 0.01, 5
	model = LightFM(no_components=no_comp, learning_rate=lr, loss='warp')
	model.fit_partial(Mui_train, user_features = uf, item_features = itrf, epochs = ep, num_threads = 4, verbose = True)

	test               = sps.csr_matrix((len(list_user), len(list_coupon)), dtype = np.int32)
	no_users, no_items = test.shape
	pid_array          = np.arange(no_items, dtype=np.int32)

	#Create and initialise dict to store predictions
	d_user_pred = {}
	for user in list_user :
		d_user_pred[user] = []
	
	# Loop over users and compute predictions
	for user_id, row in enumerate(test):
		sys.stdout.write("\rProcessing user " + str(user_id)+"/ "+str(len(list_user)))
		sys.stdout.flush()
		uid_array         = np.empty(no_items, dtype=np.int32)
		uid_array.fill(user_id)
		predictions       = model.predict(uid_array, pid_array,user_features = uf, item_features = itef, num_threads=4)
		user              = str(list_user[user_id])
		# apply MinMaxScaler for blending later on
		MMS               = MinMaxScaler()
		pred              = MMS.fit_transform(np.ravel(predictions))
		d_user_pred[user] = pred

	# Pickle the predictions for future_use
	d_pred = {"list_coupon" : list_coupon.tolist(), "d_user_pred" : d_user_pred}
	with open("../Data/Data_translated/d_pred_lightfm.pickle", "w") as f:
		pickle.dump(d_pred, f, protocol = pickle.HIGHEST_PROTOCOL)

	return d_user_pred, list_user, list_coupon
Example #14
0
def test_state_reset():

    model = LightFM()

    model.fit(train,
              epochs=1)

    assert np.mean(model.user_embedding_gradients) > 1.0

    model.fit(train,
              epochs=0)
    assert np.all(model.user_embedding_gradients == 1.0)
Example #15
0
def test_not_enough_features_fails():

    no_users, no_items = (10, 100)
    no_features = 20

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    user_features = sp.csr_matrix((no_users - 1, no_features), dtype=np.int32)
    item_features = sp.csr_matrix((no_items - 1, no_features), dtype=np.int32)
    model = LightFM()
    with pytest.raises(Exception):
        model.fit_partial(train, user_features=user_features, item_features=item_features)
Example #16
0
def test_warp_stability():

    learning_rates = (0.05, 0.1, 0.5)

    for lrate in learning_rates:

        model = LightFM(learning_rate=lrate,
                        loss='warp')
        model.fit_partial(train,
                          epochs=10)

        assert not np.isnan(model.user_embeddings).any()
        assert not np.isnan(model.item_embeddings).any()
Example #17
0
def test_predict():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)

    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items), np.arange(no_items))
        scores_int = model.predict(uid, np.arange(no_items))
        assert np.allclose(scores_arr, scores_int)
Example #18
0
def test_movielens_accuracy_fit():

    model = LightFM(random_state=SEED)
    model.fit(train,
              epochs=10)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Example #19
0
def test_predict(num_threads=2):

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users,
                           no_items),
                          dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)

    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items),
                                   np.arange(no_items))
        scores_int = model.predict(uid,
                                   np.arange(no_items))
        assert np.allclose(scores_arr, scores_int)
        scores_parallel = model.predict(np.repeat(uid, no_items),
                                        np.arange(no_items),
                                        num_threads=num_threads)
        assert np.allclose(scores_parallel, scores_arr)
        scores_no_prec = model.predict(np.repeat(uid, no_items),
                                       np.arange(no_items),
                                       num_threads=num_threads,
                                       precompute_representations=False)
        assert np.allclose(scores_parallel, scores_no_prec)
        scores_no_prec_serial = model.predict(np.repeat(uid, no_items),
                                              np.arange(no_items),
                                              num_threads=1,
                                              precompute_representations=False)
        assert np.allclose(scores_parallel, scores_no_prec_serial)
Example #20
0
def test_movielens_accuracy_resume():

    model = LightFM(random_state=SEED)

    for _ in range(10):
        model.fit_partial(train,
                          epochs=1)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Example #21
0
def test_movielens_accuracy_pickle():

    model = LightFM()
    model.fit(train,
              epochs=10)

    model = pickle.loads(pickle.dumps(model))

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Example #22
0
def test_regularization():

    # Let's regularize
    model = LightFM(no_components=50,
                    item_alpha=0.0001,
                    user_alpha=0.0001)
    model.fit_partial(train,
                      epochs=30)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.80
    assert roc_auc_score(test.data, test_predictions) > 0.75
Example #23
0
def test_overfitting():

    # Let's massivly overfit
    model = LightFM(no_components=50)
    model.fit_partial(train,
                      epochs=30)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)
    overfit_train = roc_auc_score(train.data, train_predictions)
    overfit_test = roc_auc_score(test.data, test_predictions)

    assert overfit_train > 0.99
    assert overfit_test < 0.75
Example #24
0
def test_movielens_excessive_regularization():

    # Should perform poorly with high regularization
    model = LightFM(no_components=10,
                    item_alpha=1.0,
                    user_alpha=1.0)
    model.fit_partial(train,
                      epochs=10)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) < 0.6
    assert roc_auc_score(test.data, test_predictions) < 0.6
Example #25
0
def test_zeros_negative_accuracy():

    # Should get the same accuracy when zeros are used to
    # denote negative interactions
    train.data[train.data == -1] = 0
    model = LightFM()
    model.fit_partial(train,
                      epochs=10)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Example #26
0
def test_sklearn_api():
    model = LightFM()
    params = model.get_params()
    model2 = LightFM(**params)
    params2 = model2.get_params()
    assert params == params2
    model.set_params(**params)
    params['invalid_param'] = 666
    with pytest.raises(ValueError):
        model.set_params(**params)
Example #27
0
def test_logistic_precision():

    model = LightFM(random_state=SEED)
    model.fit_partial(train,
                      epochs=10)

    (train_precision,
     test_precision,
     full_train_auc,
     full_test_auc) = _get_metrics(model,
                                   train,
                                   test)

    assert train_precision > 0.3
    assert test_precision > 0.03

    assert full_train_auc > 0.79
    assert full_test_auc > 0.73
Example #28
0
def test_random_state_advanced():
    # Check that using the random state
    # to seed rand_r in Cython advances
    # the random generator state.

    model = LightFM(learning_rate=0.05,
                    loss='warp',
                    random_state=SEED)

    model.fit_partial(train,
                      epochs=1)

    rng_state = model.random_state.get_state()[1].copy()

    model.fit_partial(train,
                      epochs=1)

    assert not np.all(rng_state == model.random_state.get_state()[1])
Example #29
0
def test_movielens_accuracy_resume():

    model = LightFM(random_state=SEED)

    for _ in range(10):
        model.fit_partial(train, epochs=1)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Example #30
0
def test_movielens_accuracy_pickle():

    model = LightFM(random_state=SEED)
    model.fit(train, epochs=10)

    model = pickle.loads(pickle.dumps(model))

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Example #31
0
def test_coo_with_duplicate_entries():
    # Calling .tocsr on a COO matrix with duplicate entries
    # changes its data arrays in-place, leading to out-of-bounds
    # array accesses in the WARP code.
    # Reported in https://github.com/lyst/lightfm/issues/117.

    rows, cols = 1000, 100
    mat = sp.random(rows, cols)
    mat.data[:] = 1

    # Duplicate entries in the COO matrix
    mat.data = np.concatenate((mat.data, mat.data[:1000]))
    mat.row = np.concatenate((mat.row, mat.row[:1000]))
    mat.col = np.concatenate((mat.col, mat.col[:1000]))

    for loss in ('warp', 'bpr', 'warp-kos'):
        model = LightFM(loss=loss)
        model.fit(mat)
Example #32
0
def test_random_state_fixing():

    model = LightFM(
        learning_rate=0.05,
        loss='warp',
        random_state=SEED,
    )

    model.fit_partial(train, epochs=2)

    model_2 = LightFM(
        learning_rate=0.05,
        loss='warp',
        random_state=SEED,
    )

    model_2.fit_partial(train, epochs=2)

    assert np.all(model.user_embeddings == model_2.user_embeddings)
    assert np.all(model.item_embeddings == model_2.item_embeddings)
Example #33
0
def test_movielens_accuracy():

    model = LightFM()
    model.fit_partial(train,
                      epochs=10)

    train_predictions = model.predict(train.row,
                                      train.col)
    test_predictions = model.predict(test.row,
                                     test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Example #34
0
def test_overfitting():

    # Let's massivly overfit
    model = LightFM(no_components=50, random_state=SEED)
    model.fit_partial(train, epochs=30)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)
    overfit_train = roc_auc_score(train.data, train_predictions)
    overfit_test = roc_auc_score(test.data, test_predictions)

    assert overfit_train > 0.99
    assert overfit_test < 0.75
Example #35
0
def test_zeros_negative_accuracy():

    # Should get the same accuracy when zeros are used to
    # denote negative interactions
    train.data[train.data == -1] = 0
    model = LightFM(random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Example #36
0
def test_matrix_types():

    mattypes = (sp.coo_matrix,
                sp.lil_matrix,
                sp.csr_matrix,
                sp.csc_matrix)

    dtypes = (np.int32,
              np.int64,
              np.float32,
              np.float64)

    no_users, no_items = (10, 100)
    no_features = 20

    for mattype in mattypes:
        for dtype in dtypes:
            train = mattype((no_users,
                             no_items),
                            dtype=dtype)

            user_features = mattype((no_users,
                                     no_features),
                                    dtype=dtype)
            item_features = mattype((no_items,
                                     no_features),
                                    dtype=dtype)

            model = LightFM()
            model.fit_partial(train,
                              user_features=user_features,
                              item_features=item_features)

            model.predict(np.random.randint(0, no_users, 10).astype(np.int32),
                          np.random.randint(0, no_items, 10).astype(np.int32),
                          user_features=user_features,
                          item_features=item_features)

            model.predict_rank(train,
                               user_features=user_features,
                               item_features=item_features)
Example #37
0
def test_precision_at_k_with_ties():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")
    model.fit_partial(train)

    # Make all predictions zero
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    k = 10

    precision = evaluation.precision_at_k(model, test, k=k)

    # Pessimistic precision with all ties
    assert precision.mean() == 0.0
Example #38
0
def test_full_batch_predict_wo_features():
    no_components = 2
    top_k = 5
    ds = RandomDataset(density=1.0)

    model = LightFM(no_components=no_components)
    model.fit_partial(ds.train)
    user_ids = [0, 1, 2]

    # Single process
    model.batch_setup({0: ds.item_ids})
    recoms = model.batch_predict(
        user_ids=user_ids,
        chunk_id=0,
        top_k=top_k,
    )
    for user_id in user_ids:
        assert user_id in recoms
        assert len(recoms[user_id][0]) == top_k
def main():
    current_stage = 6
    model = LightFM(no_components=30)
    dataset = Dataset()

    for c in range(0, current_stage + 1):
        click_train = pd.read_csv(
            train_path + "/underexpose_train_click-{}.csv".format(c),
            header=None,
            names=["user_id", "item_id", "time"],
        )
        click_test = pd.read_csv(
            test_path + "/underexpose_test_click-{}.csv".format(c),
            header=None,
            names=["user_id", "item_id", "time"],
        )
        dataset.fit_partial(click_train["user_id"], click_train["item_id"])
        num_users, num_items = dataset.interactions_shape()
        log('Num users: {}, num_items {}.'.format(num_users, num_items))
Example #40
0
def test_predict():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)

    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items),
                                   np.arange(no_items))
        scores_int = model.predict(uid, np.arange(no_items))
        assert np.allclose(scores_arr, scores_int)
Example #41
0
def test_random_state_advanced():
    # Check that using the random state
    # to seed rand_r in Cython advances
    # the random generator state.

    model = LightFM(learning_rate=0.05, loss='warp', random_state=SEED)

    model.fit_partial(train, epochs=1)

    rng_state = model.rng.get_state()[1].copy()

    model.fit_partial(train, epochs=1)

    assert not np.all(rng_state == model.rng.get_state()[1])
Example #42
0
def test_regularization():

    # Let's regularize
    model = LightFM(no_components=50,
                    item_alpha=0.0001,
                    user_alpha=0.0001,
                    random_state=SEED)
    model.fit_partial(train, epochs=30)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) > 0.80
    assert roc_auc_score(test.data, test_predictions) > 0.75
Example #43
0
def test_movielens_excessive_regularization():

    # Should perform poorly with high regularization
    model = LightFM(no_components=10,
                    item_alpha=1.0,
                    user_alpha=1.0,
                    random_state=SEED)
    model.fit_partial(train, epochs=10)

    train_predictions = model.predict(train.row, train.col)
    test_predictions = model.predict(test.row, test.col)

    assert roc_auc_score(train.data, train_predictions) < 0.6
    assert roc_auc_score(test.data, test_predictions) < 0.6
Example #44
0
def test_full_batch_predict():
    no_components = 2
    top_k = 5
    ds = RandomDataset()

    model = LightFM(no_components=no_components)
    model.fit_partial(ds.train,
                      user_features=ds.user_features,
                      item_features=ds.item_features)
    user_ids = [0, 1, 2]
    chunks = {0: ds.item_ids}

    # Single process
    model.batch_setup(item_chunks=chunks,
                      user_features=ds.user_features,
                      item_features=ds.item_features,
                      n_process=1)
    recoms = model.batch_predict(
        user_ids=user_ids,
        chunk_id=0,
        top_k=top_k,
    )
    for user_id in user_ids:
        assert user_id in recoms
        assert len(recoms[user_id][0]) == top_k
    initial_recoms = recoms
    model.batch_cleanup()

    model.batch_setup(item_chunks=chunks,
                      user_features=ds.user_features,
                      item_features=ds.item_features,
                      n_process=2)

    # Multiple processes
    recoms = model.batch_predict(
        user_ids=user_ids,
        chunk_id=0,
        top_k=top_k,
    )
    for user_id in user_ids:
        assert user_id in recoms
        assert_array_almost_equal(recoms[user_id], initial_recoms[user_id])
Example #45
0
def test_feature_inference_fails():
    # On predict if we try to use feature inference and supply
    # higher ids than the number of features that were supplied to fit
    # we should complain

    no_users, no_items = 10, 100
    no_features = 20

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    user_features = sp.csr_matrix((no_users, no_features), dtype=np.int32)
    item_features = sp.csr_matrix((no_items, no_features), dtype=np.int32)
    model = LightFM()
    model.fit_partial(train,
                      user_features=user_features,
                      item_features=item_features)

    with pytest.raises(ValueError):
        model.predict(np.array([no_features], dtype=np.int32),
                      np.array([no_features], dtype=np.int32))
Example #46
0
def test_input_dtypes():
    no_users, no_items = 10, 100
    no_features = 20

    for dtype in dtypes:
        train = sp.coo_matrix((no_users, no_items), dtype=dtype)
        user_features = sp.coo_matrix((no_users, no_features), dtype=dtype)
        item_features = sp.coo_matrix((no_items, no_features), dtype=dtype)

        model = LightFM()
        model.fit_partial(train,
                          user_features=user_features,
                          item_features=item_features)

        model.predict(
            np.random.randint(0, no_users, 10).astype(np.int32),
            np.random.randint(0, no_items, 10).astype(np.int32),
            user_features=user_features,
            item_features=item_features,
        )
    def obtener_modelo_gui(self, lista_param):
        """
        Método obtener_modelo_gui. Obtiene el modelo escogido según los parámetros pasados.

        Este método solo se utiliza en la interfaz web.

        Parameters
        ----------

        lista_param: list
            lista que contiene los parámetros escogidos por el usuario para crear el modelo.
        """

        global modelo

        # Se guardan los parámetros en variables para que sea más legible
        no_components = lista_param[0]
        k = lista_param[1]
        n = lista_param[2]
        learning_schedule = lista_param[3]
        loss = lista_param[4]
        learning_rate = lista_param[5]
        rho = lista_param[6]
        epsilon = lista_param[7]
        item_alpha = lista_param[8]
        user_alpha = lista_param[9]
        max_sampled = lista_param[10]

        # Se instancia el modelo según los parámetros anteriores
        modelo = LightFM(no_components=no_components,
                         k=k,
                         n=n,
                         learning_schedule=learning_schedule,
                         loss=loss,
                         learning_rate=learning_rate,
                         rho=rho,
                         epsilon=epsilon,
                         item_alpha=item_alpha,
                         user_alpha=user_alpha,
                         max_sampled=max_sampled)
Example #48
0
def test_predict_ranks():
    no_users, no_items = 10, 100

    train = sp.rand(no_users, no_items, format='csr', random_state=42)

    model = LightFM()
    model.fit_partial(train)

    # Compute ranks for all items
    rank_input = sp.csr_matrix(np.ones((no_users, no_items)))
    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == no_items - 1)

    for row in range(no_users):
        assert np.all(np.sort(ranks[row]) == np.arange(no_items))

    # Train set exclusions. All ranks should be zero
    # if train interactions is dense.
    ranks = model.predict_rank(rank_input,
                               train_interactions=rank_input).todense()
    assert np.all(ranks == 0)

    # Max rank should be num_items - 1 - number of positives
    # in train in that row
    ranks = model.predict_rank(rank_input, train_interactions=train).todense()
    assert np.all(
        np.squeeze(np.array(ranks.max(axis=1))) == no_items - 1 -
        np.squeeze(np.array(train.getnnz(axis=1))))

    # Make sure ranks are computed pessimistically when
    # there are ties (that is, equal predictions for every
    # item will assign maximum rank to each).
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 99)
    assert np.all(ranks.max(axis=1) == 99)

    # Wrong input dimensions
    with pytest.raises(ValueError):
        model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
Example #49
0
    def evaluate_ground_truth(self, model: LightFM, test: np.ndarray) -> None:
        """Evaluate a recommender by the ground truth perference labels."""
        results = {}

        users = test[:, 0].astype(int)
        items = test[:, 1].astype(int)
        cv = np.zeros(test.shape[0]) if "ml" in self.data else test[:, 2]
        cvr = test[:, -1] if "ml" in self.data else np.zeros(test.shape[0])
        ct = np.zeros(test.shape[0]) if "ml" in self.data else test[:, 3]

        for _k in self.k:
            for metric in self.metrics:
                results[f"{metric}@{_k}"] = []

        for user in set(users):
            indices = users == user
            items_for_current_user = items[indices]
            cvr_for_current_user = cvr[indices]
            ct_for_current_user = ct[indices]
            cv_for_current_user = cv[indices]

            # predict ranking score for each user
            scores = model.predict(user_ids=np.int(user),
                                   item_ids=items_for_current_user)
            # calculate ranking metrics
            for _k in self.k:
                for metric, metric_func in self.metrics.items():
                    results[f"{metric}@{_k}"].append(
                        metric_func(
                            cv=cv_for_current_user,
                            ct=ct_for_current_user,
                            cv_hat=cvr_for_current_user,
                            score=scores,
                            k=_k,
                        ))
        # aggregate results
        gt_results = pd.DataFrame(index=results.keys())
        gt_results["gt"] = list(map(np.mean, list(results.values())))
        return gt_results
Example #50
0
def test_batch_predict_user_recs_per_user():
    no_components = 2
    ds = RandomDataset()

    model = LightFM(no_components=no_components)
    model.fit_partial(ds.train,
                      user_features=ds.user_features,
                      item_features=ds.item_features)
    model.batch_setup(
        item_chunks={0: ds.item_ids},
        user_features=ds.user_features,
        item_features=ds.item_features,
    )

    for uid in range(ds.no_users):
        rec_item_ids, rec_scores = model.predict_for_user(
            user_id=uid,
            top_k=5,
            item_ids=ds.item_ids,
        )
        assert len(rec_scores) == 5
        assert_array_almost_equal(rec_scores, -1 * np.sort(-1 * rec_scores))
Example #51
0
def test_hogwild_accuracy():

    # Should get comparable accuracy with 2 threads
    model = LightFM()
    model.fit_partial(train,
                      epochs=10,
                      num_threads=2)

    train_predictions = model.predict(train.row,
                                      train.col,
                                      num_threads=2)
    test_predictions = model.predict(test.row,
                                     test.col,
                                     num_threads=2)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Example #52
0
def test_zero_weights_accuracy():

    # When very small weights are used
    # accuracy should be no better than
    # random.
    weights = train.copy()
    weights.data = np.zeros(train.getnnz(), dtype=np.float32)

    for loss in ('logistic', 'bpr', 'warp'):
        model = LightFM(loss=loss, random_state=SEED)
        model.fit_partial(train, sample_weight=weights, epochs=10)

        train_predictions = model.predict(train.row, train.col)
        test_predictions = model.predict(test.row, test.col)

        assert 0.45 < roc_auc_score(train.data, train_predictions) < 0.55
        assert 0.45 < roc_auc_score(test.data, test_predictions) < 0.55
Example #53
0
def test_regression_full_batch_predict():
    no_components = 2
    np.random.seed(42)
    ds = RandomDataset(no_items=5, density=1)

    model = LightFM(no_components=no_components)
    model.fit(ds.train,
              user_features=ds.user_features,
              item_features=ds.item_features)

    # Set non zero biases
    model.item_biases += 0.2
    model.user_biases += 0.5
    user_ids = [0, 1, 2]

    model.batch_setup(item_chunks={0: ds.item_ids},
                      item_features=ds.item_features,
                      user_features=ds.user_features)
    recoms = model.batch_predict(
        user_ids=user_ids,
        chunk_id=0,
        top_k=0,  # Score all items
    )
    zeros = 0
    for user_id in user_ids:
        scores = model.predict(
            user_ids=user_id,
            item_ids=ds.item_ids,
            item_features=ds.item_features,
            user_features=ds.user_features,
            num_threads=1,
        )
        if sum(scores) != 0:
            zeros += 1
        assert_array_almost_equal(recoms[user_id][1], scores)
    assert zeros != 0
Example #54
0
def test_user_supplied_features_accuracy():

    model = LightFM()
    model.fit_partial(train,
                      user_features=train_user_features,
                      item_features=train_item_features,
                      epochs=10)

    train_predictions = model.predict(train.row,
                                      train.col,
                                      user_features=train_user_features,
                                      item_features=train_item_features)
    test_predictions = model.predict(test.row,
                                     test.col,
                                     user_features=test_user_features,
                                     item_features=test_item_features)

    assert roc_auc_score(train.data, train_predictions) > 0.84
    assert roc_auc_score(test.data, test_predictions) > 0.76
Example #55
0
def test_movielens_genre_accuracy():

    item_features = movielens_data.get_movielens_item_metadata(
        use_item_ids=False)

    assert item_features.shape[1] < item_features.shape[0]

    model = LightFM()
    model.fit_partial(train, item_features=item_features, epochs=10)

    train_predictions = model.predict(train.row,
                                      train.col,
                                      item_features=item_features)
    test_predictions = model.predict(test.row,
                                     test.col,
                                     item_features=item_features)

    assert roc_auc_score(train.data, train_predictions) > 0.75
    assert roc_auc_score(test.data, test_predictions) > 0.69
Example #56
0
def test_movielens_excessive_regularization():

    for loss in ('logistic', 'warp', 'bpr', 'warp-kos'):

        # Should perform poorly with high regularization.
        # Check that regularization does not accumulate
        # until it reaches infinity.
        model = LightFM(no_components=10,
                        item_alpha=1.0,
                        user_alpha=1.0,
                        loss=loss,
                        random_state=SEED)
        model.fit_partial(train, epochs=10, num_threads=4)

        train_predictions = model.predict(train.row, train.col)
        test_predictions = model.predict(test.row, test.col)

        assert roc_auc_score(train.data, train_predictions) < 0.65
        assert roc_auc_score(test.data, test_predictions) < 0.65
Example #57
0
def test_movielens_genre_accuracy():

    item_features = fetch_movielens(indicator_features=False,
                                    genre_features=True)['item_features']

    assert item_features.shape[1] < item_features.shape[0]

    model = LightFM(random_state=SEED)
    model.fit_partial(train, item_features=item_features, epochs=10)

    train_predictions = model.predict(train.row,
                                      train.col,
                                      item_features=item_features)
    test_predictions = model.predict(test.row,
                                     test.col,
                                     item_features=item_features)

    assert roc_auc_score(train.data, train_predictions) > 0.75
    assert roc_auc_score(test.data, test_predictions) > 0.69
Example #58
0
def validate(ctx, data_home):

    # matrix creation validation
    df = load_movielens(data_home)
    dic = fetch_movielens(data_home, download_if_missing=True)

    train_o = dic['train']
    test_o = dic['test']

    train_df = df[df['is_train']]
    test_df = df[~df['is_train']]

    shape = (df.user_id.unique().shape[0], df.item_id.unique().shape[0])

    train_t = to_sparse_matrix(train_df.user_id.values, train_df.item_id.values, train_df.rating.values, shape)
    test_t = to_sparse_matrix(test_df.user_id.values, test_df.item_id.values, test_df.rating.values, shape)

    assert (train_o.shape == train_t.shape)
    assert (np.array_equal(test_o.diagonal(), test_t.diagonal()))

    model = LightFM(loss='warp')

    model.fit(train_o, epochs=10)
    train_precision, test_precision, train_auc, test_auc = evaluate_model(model, train_o, test_o)

    model.fit(train_t, epochs=10)
    train_precision_t, test_precision_t, train_auc_t, test_auc_t = evaluate_model(model, train_t, test_t)

    assert (abs(train_precision - train_precision_t) < 2)
    assert (abs(test_precision - test_precision_t) < 2)
    assert (abs(train_auc - train_auc_t) < 2)
    assert (abs(test_auc - test_auc_t) < 2)

    clf = LightWrapper(loss='warp', shape=shape)
    clf.fit(train_df[['user_id', 'item_id']].values, train_df.rating.values)
    train_precision_t, test_precision_t, train_auc_t, test_auc_t = clf.evaluate(test_df[['user_id', 'item_id']].values, test_df.rating.values)

    assert (abs(train_precision - train_precision_t) < 2)
    assert (abs(test_precision - test_precision_t) < 2)
    assert (abs(train_auc - train_auc_t) < 2)
    assert (abs(test_auc - test_auc_t) < 2)

    random_search(clf, df[['user_id', 'item_id', 'rating']].values, [[train_df.index.values, test_df.index.values]], param_dist={"epochs": [10], "learning_rate": [0.005]})