def test_movielens_accuracy_sample_weights_grad_accumulation(): # Set weights to zero for all even-numbered users # and check that they have not accumulated any # gradient updates. weights = train.copy() weights.data = np.ones(train.getnnz(), dtype=np.float32) even_users = weights.row % 2 == 0 weights.data *= even_users even_idx = np.arange(train.shape[0]) % 2 == 0 odd_idx = np.arange(train.shape[0]) % 2 != 0 for loss in ('logistic', 'bpr', 'warp'): model = LightFM(loss=loss, random_state=SEED) model.fit_partial(train, sample_weight=weights, epochs=1) assert np.allclose(model.user_embedding_gradients[odd_idx], 1.0) assert np.allclose(model.user_bias_gradients[odd_idx], 1.0) assert not np.allclose(model.user_embedding_gradients[even_idx], 1.0) assert not np.allclose(model.user_bias_gradients[even_idx], 1.0)
def test_predict_ranks(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) model = LightFM() model.fit_partial(train) # Compute ranks for all items rank_input = sp.csr_matrix(np.ones((no_users, no_items))) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == no_items - 1) for row in range(no_users): assert np.all(np.sort(ranks[row]) == np.arange(no_items)) # Make sure this is true also when there are ties model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == 0) # Wrong input dimensions with pytest.raises(ValueError): model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
def test_warp_kos_precision(): # Remove all negative examples training = train.copy() training.data[training.data < 1] = 0 training = training.tocsr() training.eliminate_zeros() model = LightFM(learning_rate=0.05, k=5, loss='warp-kos') model.fit_partial(training, epochs=10) train_precision = precision_at_k(model, training, 10) test_precision = precision_at_k(model, test, 10) full_train_auc = full_auc(model, training) full_test_auc = full_auc(model, test) assert train_precision > 0.44 assert test_precision > 0.06 assert full_train_auc > 0.9 assert full_test_auc > 0.87
def test_input_dtypes(): dtypes = (np.int32, np.int64, np.float32, np.float64) no_users, no_items = (10, 100) no_features = 20 for dtype in dtypes: train = sp.coo_matrix((no_users, no_items), dtype=dtype) user_features = sp.coo_matrix((no_users, no_features), dtype=dtype) item_features = sp.coo_matrix((no_items, no_features), dtype=dtype) model = LightFM() model.fit_partial(train, user_features=user_features, item_features=item_features) model.predict(np.random.randint(0, no_users, 10).astype(np.int32), np.random.randint(0, no_items, 10).astype(np.int32), user_features=user_features, item_features=item_features)
def test_warp_kos_precision(): # Remove all negative examples training = train.copy() training.data[training.data < 1] = 0 training = training.tocsr() training.eliminate_zeros() model = LightFM(learning_rate=0.05, k=5, loss='warp-kos', random_state=SEED) model.fit_partial(training, epochs=10) (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(model, train, test) assert train_precision > 0.44 assert test_precision > 0.06 assert full_train_auc > 0.9 assert full_test_auc > 0.87
def test_movielens_accuracy_sample_weights(): # Scaling weights down and learning rate up # by the same amount should result in # roughly the same accuracy scale = 1e-01 weights = train.copy() weights.data = np.ones(train.getnnz(), dtype=np.float32) * scale for (loss, exp_score) in (('logistic', 0.74), ('bpr', 0.84), ('warp', 0.89)): model = LightFM(loss=loss, random_state=SEED) model.learning_rate * 1.0 / scale model.fit_partial(train, sample_weight=weights, epochs=10) (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(model, train, test) assert full_train_auc > exp_score
def test_feature_inference_fails(): # On predict if we try to use feature inference and supply # higher ids than the number of features that were supplied to fit # we should complain no_users, no_items = (10, 100) no_features = 20 train = sp.coo_matrix((no_users, no_items), dtype=np.int32) user_features = sp.csr_matrix((no_users, no_features), dtype=np.int32) item_features = sp.csr_matrix((no_items, no_features), dtype=np.int32) model = LightFM() model.fit_partial(train, user_features=user_features, item_features=item_features) with pytest.raises(AssertionError): model.predict(np.array([no_features], dtype=np.int32), np.array([no_features], dtype=np.int32))
def test_auc_score(): no_users, no_items = (10, 100) train, test = _generate_data(no_users, no_items) model = LightFM(loss='bpr') model.fit_partial(train) auc = evaluation.auc_score(model, test, num_threads=2) expected_auc = np.array(_auc(model, test)) assert auc.shape == expected_auc.shape assert np.abs(auc.mean() - expected_auc.mean()) < 0.01 assert len(auc) == (test.getnnz(axis=1) > 0).sum() assert len(evaluation.auc_score(model, train, preserve_rows=True)) == test.shape[0] # With omitting train interactions auc = evaluation.auc_score(model, test, train_interactions=train, num_threads=2) expected_auc = np.array(_auc(model, test, train)) assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
def test_matrix_types(): mattypes = (sp.coo_matrix, sp.lil_matrix, sp.csr_matrix, sp.csc_matrix) dtypes = (np.int32, np.int64, np.float32, np.float64) no_users, no_items = (10, 100) no_features = 20 for mattype in mattypes: for dtype in dtypes: train = mattype((no_users, no_items), dtype=dtype) user_features = mattype((no_users, no_features), dtype=dtype) item_features = mattype((no_items, no_features), dtype=dtype) model = LightFM() model.fit_partial(train, user_features=user_features, item_features=item_features) model.predict( np.random.randint(0, no_users, 10).astype(np.int32), np.random.randint(0, no_items, 10).astype(np.int32), user_features=user_features, item_features=item_features, )
def test_warp_precision_adadelta_multithreaded(): model = LightFM(learning_schedule='adadelta', rho=0.95, epsilon=0.000001, loss='warp') model.fit_partial(train, epochs=10, num_threads=4) train_precision = precision_at_k(model, train, 10) test_precision = precision_at_k(model, test, 10) full_train_auc = full_auc(model, train) full_test_auc = full_auc(model, test) assert train_precision > 0.45 assert test_precision > 0.07 assert full_train_auc > 0.94 assert full_test_auc > 0.9
def test_predict(num_threads=2): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.int32) model = LightFM() model.fit_partial(train) for uid in range(no_users): scores_arr = model.predict(np.repeat(uid, no_items), np.arange(no_items)) scores_int = model.predict(uid, np.arange(no_items)) assert np.allclose(scores_arr, scores_int) scores_parallel = model.predict(np.repeat(uid, no_items), np.arange(no_items), num_threads=num_threads) assert np.allclose(scores_parallel, scores_arr) scores_no_prec = model.predict(np.repeat(uid, no_items), np.arange(no_items), num_threads=num_threads, precompute_representations=False) assert np.allclose(scores_parallel, scores_no_prec) scores_no_prec_serial = model.predict(np.repeat(uid, no_items), np.arange(no_items), num_threads=1, precompute_representations=False) assert np.allclose(scores_parallel, scores_no_prec_serial)
def test_empty_matrix(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.int32) model = LightFM() model.fit_partial(train)
def fit_lightfm_model(): """ Fit the lightFM model returns d_user_pred, list_user, list_coupon list_coupon = list of test coupons list_user = list of user ID d_user_pred : key = user, value = predicted ranking of coupons in list_coupon """ #Load data Mui_train = spi.mmread("../Data/Data_translated/biclass_user_item_train_mtrx.mtx") uf = spi.mmread("../Data/Data_translated/user_feat_mtrx.mtx") itrf = spi.mmread("../Data/Data_translated/train_item_feat_mtrx.mtx") itef = spi.mmread("../Data/Data_translated/test_item_feat_mtrx.mtx") #Print shapes as a check print "user_features shape: %s,\nitem train features shape: %s,\nitem test features shape: %s" % (uf.shape, itrf.shape, itef.shape) #Load test coupon and user lists cplte = pd.read_csv("../Data/Data_translated/coupon_list_test_translated.csv") ulist = pd.read_csv("../Data/Data_translated/user_list_translated.csv") list_coupon = cplte["COUPON_ID_hash"].values list_user = ulist["USER_ID_hash"].values #Build model no_comp, lr, ep = 10, 0.01, 5 model = LightFM(no_components=no_comp, learning_rate=lr, loss='warp') model.fit_partial(Mui_train, user_features = uf, item_features = itrf, epochs = ep, num_threads = 4, verbose = True) test = sps.csr_matrix((len(list_user), len(list_coupon)), dtype = np.int32) no_users, no_items = test.shape pid_array = np.arange(no_items, dtype=np.int32) #Create and initialise dict to store predictions d_user_pred = {} for user in list_user : d_user_pred[user] = [] # Loop over users and compute predictions for user_id, row in enumerate(test): sys.stdout.write("\rProcessing user " + str(user_id)+"/ "+str(len(list_user))) sys.stdout.flush() uid_array = np.empty(no_items, dtype=np.int32) uid_array.fill(user_id) predictions = model.predict(uid_array, pid_array,user_features = uf, item_features = itef, num_threads=4) user = str(list_user[user_id]) # apply MinMaxScaler for blending later on MMS = MinMaxScaler() pred = MMS.fit_transform(np.ravel(predictions)) d_user_pred[user] = pred # Pickle the predictions for future_use d_pred = {"list_coupon" : list_coupon.tolist(), "d_user_pred" : d_user_pred} with open("../Data/Data_translated/d_pred_lightfm.pickle", "w") as f: pickle.dump(d_pred, f, protocol = pickle.HIGHEST_PROTOCOL) return d_user_pred, list_user, list_coupon
def test_random_state_fixing(): model = LightFM(learning_rate=0.05, loss='warp', random_state=SEED) model.fit_partial(train, epochs=2) model_2 = LightFM(learning_rate=0.05, loss='warp', random_state=SEED) model_2.fit_partial(train, epochs=2) assert np.all(model.user_embeddings == model_2.user_embeddings) assert np.all(model.item_embeddings == model_2.item_embeddings)
def test_movielens_accuracy_resume(): model = LightFM(random_state=SEED) for _ in range(10): model.fit_partial(train, epochs=1) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) assert roc_auc_score(train.data, train_predictions) > 0.84 assert roc_auc_score(test.data, test_predictions) > 0.76
def test_not_enough_features_fails(): no_users, no_items = (10, 100) no_features = 20 train = sp.coo_matrix((no_users, no_items), dtype=np.int32) user_features = sp.csr_matrix((no_users - 1, no_features), dtype=np.int32) item_features = sp.csr_matrix((no_items - 1, no_features), dtype=np.int32) model = LightFM() with pytest.raises(Exception): model.fit_partial(train, user_features=user_features, item_features=item_features)
def test_predict(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.int32) model = LightFM() model.fit_partial(train) for uid in range(no_users): scores_arr = model.predict(np.repeat(uid, no_items), np.arange(no_items)) scores_int = model.predict(uid, np.arange(no_items)) assert np.allclose(scores_arr, scores_int)
def test_warp_stability(): learning_rates = (0.05, 0.1, 0.5) for lrate in learning_rates: model = LightFM(learning_rate=lrate, loss='warp') model.fit_partial(train, epochs=10) assert not np.isnan(model.user_embeddings).any() assert not np.isnan(model.item_embeddings).any()
def test_movielens_accuracy(): model = LightFM() model.fit_partial(train, epochs=10) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) assert roc_auc_score(train.data, train_predictions) > 0.84 assert roc_auc_score(test.data, test_predictions) > 0.76
def test_overfitting(): # Let's massivly overfit model = LightFM(no_components=50, random_state=SEED) model.fit_partial(train, epochs=30) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) overfit_train = roc_auc_score(train.data, train_predictions) overfit_test = roc_auc_score(test.data, test_predictions) assert overfit_train > 0.99 assert overfit_test < 0.75
def test_logistic_precision(): model = LightFM(random_state=SEED) model.fit_partial(train, epochs=10) (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(model, train, test) assert train_precision > 0.3 assert test_precision > 0.03 assert full_train_auc > 0.79 assert full_test_auc > 0.73
def test_zeros_negative_accuracy(): # Should get the same accuracy when zeros are used to # denote negative interactions train.data[train.data == -1] = 0 model = LightFM(random_state=SEED) model.fit_partial(train, epochs=10) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) assert roc_auc_score(train.data, train_predictions) > 0.84 assert roc_auc_score(test.data, test_predictions) > 0.76
def test_predict_ranks(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) train = sp.rand(no_users, no_items, format='csr', random_state=42) model = LightFM() model.fit_partial(train) # Compute ranks for all items rank_input = sp.csr_matrix(np.ones((no_users, no_items))) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == no_items - 1) for row in range(no_users): assert np.all(np.sort(ranks[row]) == np.arange(no_items)) # Train set exclusions. All ranks should be zero # if train interactions is dense. ranks = model.predict_rank(rank_input, train_interactions=rank_input).todense() assert np.all(ranks == 0) # Max rank should be num_items - 1 - number of positives # in train in that row ranks = model.predict_rank(rank_input, train_interactions=train).todense() assert np.all(np.squeeze(np.array(ranks.max(axis=1))) == no_items - 1 - np.squeeze(np.array(train.getnnz(axis=1)))) # Make sure ranks are computed pessimistically when # there are ties (that is, equal predictions for every # item will assign maximum rank to each). model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 99) assert np.all(ranks.max(axis=1) == 99) # Wrong input dimensions with pytest.raises(ValueError): model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
def test_movielens_excessive_regularization(): # Should perform poorly with high regularization model = LightFM(no_components=10, item_alpha=1.0, user_alpha=1.0, random_state=SEED) model.fit_partial(train, epochs=10) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) assert roc_auc_score(train.data, train_predictions) < 0.6 assert roc_auc_score(test.data, test_predictions) < 0.6
def test_regularization(): # Let's regularize model = LightFM(no_components=50, item_alpha=0.0001, user_alpha=0.0001, random_state=SEED) model.fit_partial(train, epochs=30) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) assert roc_auc_score(train.data, train_predictions) > 0.80 assert roc_auc_score(test.data, test_predictions) > 0.75
def test_warp_precision_multithreaded(): model = LightFM(learning_rate=0.05, loss='warp', random_state=SEED) model.fit_partial(train, epochs=10, num_threads=4) (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(model, train, test) assert train_precision > 0.45 assert test_precision > 0.07 assert full_train_auc > 0.94 assert full_test_auc > 0.9
def test_bpr_precision(): model = LightFM(learning_rate=0.05, loss='bpr', random_state=SEED) model.fit_partial(train, epochs=10) (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(model, train, test) assert train_precision > 0.31 assert test_precision > 0.04 assert full_train_auc > 0.86 assert full_test_auc > 0.84
def test_random_state_advanced(): # Check that using the random state # to seed rand_r in Cython advances # the random generator state. model = LightFM(learning_rate=0.05, loss='warp', random_state=SEED) model.fit_partial(train, epochs=1) rng_state = model.rng.get_state()[1].copy() model.fit_partial(train, epochs=1) assert not np.all(rng_state == model.rng.get_state()[1])
def test_predict_ranks(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) train = sp.rand(no_users, no_items, format='csr') model = LightFM() model.fit_partial(train) # Compute ranks for all items rank_input = sp.csr_matrix(np.ones((no_users, no_items))) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == no_items - 1) for row in range(no_users): assert np.all(np.sort(ranks[row]) == np.arange(no_items)) # Train set exclusions. All ranks should be zero # if train interactions is dense. ranks = model.predict_rank(rank_input, train_interactions=rank_input).todense() assert np.all(ranks == 0) # Max rank should be num_items - 1 - number of positives # in train in that row ranks = model.predict_rank(rank_input, train_interactions=train).todense() assert np.all(np.squeeze(np.array(ranks.max(axis=1))) == no_items - 1 - np.squeeze(np.array(train.getnnz(axis=1)))) # Make sure invariants hold when there are ties model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == 0) # Wrong input dimensions with pytest.raises(ValueError): model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
def test_training_schedules(): model = LightFM(no_components=10, learning_schedule='adagrad', random_state=SEED) model.fit_partial(train, epochs=0) assert (model.item_embedding_gradients == 1).all() assert (model.item_embedding_momentum == 0).all() assert (model.item_bias_gradients == 1).all() assert (model.item_bias_momentum == 0).all() assert (model.user_embedding_gradients == 1).all() assert (model.user_embedding_momentum == 0).all() assert (model.user_bias_gradients == 1).all() assert (model.user_bias_momentum == 0).all() model.fit_partial(train, epochs=1) assert (model.item_embedding_gradients > 1).any() assert (model.item_embedding_momentum == 0).all() assert (model.item_bias_gradients > 1).any() assert (model.item_bias_momentum == 0).all() assert (model.user_embedding_gradients > 1).any() assert (model.user_embedding_momentum == 0).all() assert (model.user_bias_gradients > 1).any() assert (model.user_bias_momentum == 0).all() model = LightFM(no_components=10, learning_schedule='adadelta', random_state=SEED) model.fit_partial(train, epochs=0) assert (model.item_embedding_gradients == 0).all() assert (model.item_embedding_momentum == 0).all() assert (model.item_bias_gradients == 0).all() assert (model.item_bias_momentum == 0).all() assert (model.user_embedding_gradients == 0).all() assert (model.user_embedding_momentum == 0).all() assert (model.user_bias_gradients == 0).all() assert (model.user_bias_momentum == 0).all() model.fit_partial(train, epochs=1) assert (model.item_embedding_gradients > 0).any() assert (model.item_embedding_momentum > 0).any() assert (model.item_bias_gradients > 0).any() assert (model.item_bias_momentum > 0).any() assert (model.user_embedding_gradients > 0).any() assert (model.user_embedding_momentum > 0).any() assert (model.user_bias_gradients > 0).any() assert (model.user_bias_momentum > 0).any()
def test_warp_precision_max_sampled(): model = LightFM(learning_rate=0.05, max_sampled=1, loss='warp') # This is equivalent to a no-op pass # over the training data model.max_sampled = 0 model.fit_partial(train, epochs=1) full_train_auc = full_auc(model, train) full_test_auc = full_auc(model, test) # The AUC should be no better than random assert full_train_auc < 0.55 assert full_test_auc < 0.55
def test_regularization(): # Let's regularize model = LightFM(no_components=50, item_alpha=0.0001, user_alpha=0.0001) model.fit_partial(train, epochs=30) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) assert roc_auc_score(train.data, train_predictions) > 0.80 assert roc_auc_score(test.data, test_predictions) > 0.75
def test_overfitting(): # Let's massivly overfit model = LightFM(no_components=50) model.fit_partial(train, epochs=30) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) overfit_train = roc_auc_score(train.data, train_predictions) overfit_test = roc_auc_score(test.data, test_predictions) assert overfit_train > 0.99 assert overfit_test < 0.75
def test_auc_score(): no_users, no_items = (10, 100) train = sp.rand(no_users, no_items, format='coo') train.data = np.ones_like(train.data) model = LightFM(loss='bpr') model.fit_partial(train) auc = evaluation.auc_score(model, train, num_threads=2)[train.getnnz(axis=1) > 0] expected_auc = np.array(_auc(model, train)) assert auc.shape == expected_auc.shape assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
def test_zeros_negative_accuracy(): # Should get the same accuracy when zeros are used to # denote negative interactions train.data[train.data == -1] = 0 model = LightFM() model.fit_partial(train, epochs=10) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) assert roc_auc_score(train.data, train_predictions) > 0.84 assert roc_auc_score(test.data, test_predictions) > 0.76
def test_logistic_precision(): model = LightFM() model.fit_partial(train, epochs=10) train_precision = precision_at_k(model, train, 10) test_precision = precision_at_k(model, test, 10) full_train_auc = full_auc(model, train) full_test_auc = full_auc(model, test) assert train_precision > 0.3 assert test_precision > 0.03 assert full_train_auc > 0.79 assert full_test_auc > 0.74
def test_movielens_excessive_regularization(): # Should perform poorly with high regularization model = LightFM(no_components=10, item_alpha=1.0, user_alpha=1.0) model.fit_partial(train, epochs=10) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) assert roc_auc_score(train.data, train_predictions) < 0.6 assert roc_auc_score(test.data, test_predictions) < 0.6
def test_warp_precision(): model = LightFM(learning_rate=0.05, loss='warp') model.fit_partial(train, epochs=10) train_precision = precision_at_k(model, train, 10) test_precision = precision_at_k(model, test, 10) full_train_auc = full_auc(model, train) full_test_auc = full_auc(model, test) assert train_precision > 0.45 assert test_precision > 0.07 assert full_train_auc > 0.94 assert full_test_auc > 0.9
def test_bpr_precision_high_interaction_values(): model = LightFM(learning_rate=0.05, loss='bpr', random_state=SEED) _train = train.copy() _train.data = _train.data * 5 model.fit_partial(_train, epochs=10) (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(model, _train, test) assert train_precision > 0.31 assert test_precision > 0.04 assert full_train_auc > 0.86 assert full_test_auc > 0.84
def test_precision_at_k(): no_users, no_items = (10, 100) train = sp.rand(no_users, no_items, format='coo') train.data = np.ones_like(train.data) model = LightFM(loss='bpr') model.fit_partial(train) k = 10 mean_precision = evaluation.precision_at_k( model, train, k=k)[train.getnnz(axis=1) > 0].mean() expected_mean_precision = _precision_at_k(model, train, k) assert np.allclose(mean_precision, expected_mean_precision)
def test_zero_weights_accuracy(): # When very small weights are used # accuracy should be no better than # random. weights = train.copy() weights.data = np.zeros(train.getnnz(), dtype=np.float32) for loss in ('logistic', 'bpr', 'warp'): model = LightFM(loss=loss, random_state=SEED) model.fit_partial(train, sample_weight=weights, epochs=10) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) assert 0.45 < roc_auc_score(train.data, train_predictions) < 0.55 assert 0.45 < roc_auc_score(test.data, test_predictions) < 0.55
def test_hogwild_accuracy(): # Should get comparable accuracy with 2 threads model = LightFM() model.fit_partial(train, epochs=10, num_threads=2) train_predictions = model.predict(train.row, train.col, num_threads=2) test_predictions = model.predict(test.row, test.col, num_threads=2) assert roc_auc_score(train.data, train_predictions) > 0.84 assert roc_auc_score(test.data, test_predictions) > 0.76
def test_bpr_precision_multithreaded(): model = LightFM(learning_rate=0.05, loss='bpr') model.fit_partial(train, epochs=10, num_threads=4) train_precision = precision_at_k(model, train, 10) test_precision = precision_at_k(model, test, 10) full_train_auc = full_auc(model, train) full_test_auc = full_auc(model, test) assert train_precision > 0.31 assert test_precision > 0.04 assert full_train_auc > 0.86 assert full_test_auc > 0.84
def test_return_self(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.int32) model = LightFM() assert model.fit_partial(train) is model assert model.fit(train) is model
def test_warp_precision_adadelta(): model = LightFM(learning_schedule='adadelta', rho=0.95, epsilon=0.000001, loss='warp', random_state=SEED) model.fit_partial(train, epochs=10, num_threads=1) (train_precision, test_precision, full_train_auc, full_test_auc) = _get_metrics(model, train, test) assert train_precision > 0.45 assert test_precision > 0.07 assert full_train_auc > 0.94 assert full_test_auc > 0.9
def test_random_state_advanced(): # Check that using the random state # to seed rand_r in Cython advances # the random generator state. model = LightFM(learning_rate=0.05, loss='warp', random_state=SEED) model.fit_partial(train, epochs=1) rng_state = model.random_state.get_state()[1].copy() model.fit_partial(train, epochs=1) assert not np.all(rng_state == model.random_state.get_state()[1])
def update(self, mat_interaction, mat_user_feature, mat_item_feature, epoch=3, threads=8): model = LightFM(loss=self.loss, learning_rate=self.learning_rate, item_alpha=self.item_alpha, user_alpha=self.user_alpha) model.fit_partial(mat_interaction, user_features=mat_user_feature, item_features=mat_item_feature, epochs=epoch, num_threads=threads, verbose=False) return model
class LightFMRecommender(object): def __init__(self, n_comp=30, loss='warp-kos', learning='adagrad', alpha=1e-3): alpha = 1e-3 self.model = LightFM(no_components=30, loss='warp-kos', learning_schedule='adagrad', user_alpha=alpha, item_alpha=alpha) # self.model = LightFM(no_components=n_comp, # loss=loss, # learning_schedule= learning, # user_alpha=alpha, item_alpha=alpha) def fit(self, urm, epochs=100): self.urm = urm self.n_tracks = urm.shape[1] for epoch in range(epochs): self.model.fit_partial(urm.getCSR(), epochs=1) def get_pred_row(self, user_id): return self.model.predict(user_id, np.arange(self.n_tracks)) def s_recommend(self, user_id, nRec=10): scores = self.model.predict(user_id, np.arange(self.n_tracks)) top_items = np.argsort(-scores) recommended_items = self._filter_seen(user_id, top_items) return recommended_items[0:nRec] def _filter_seen(self, user_id, ranking): seen = self.urm.extractTracksFromPlaylist(user_id) unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True) return ranking[unseen_mask] def m_recommend(self, target_ids, nRec=10): results = [] for tid in target_ids: results.append(self.s_recommend(tid, nRec)) return results
def test_user_supplied_features_accuracy(): model = LightFM() model.fit_partial(train, user_features=train_user_features, item_features=train_item_features, epochs=10) train_predictions = model.predict(train.row, train.col, user_features=train_user_features, item_features=train_item_features) test_predictions = model.predict(test.row, test.col, user_features=test_user_features, item_features=test_item_features) assert roc_auc_score(train.data, train_predictions) > 0.84 assert roc_auc_score(test.data, test_predictions) > 0.76