def test_sample_weight(): model = LightFM() train = sp.coo_matrix(np.array([[0, 1], [0, 1]])) with pytest.raises(ValueError): # Wrong number of weights sample_weight = sp.coo_matrix(np.zeros((2, 2))) model.fit(train, sample_weight=sample_weight) with pytest.raises(ValueError): # Wrong shape sample_weight = sp.coo_matrix(np.zeros(2)) model.fit(train, sample_weight=np.zeros(3)) with pytest.raises(ValueError): # Wrong order of entries sample_weight = sp.coo_matrix((train.data, (train.row[::-1], train.col[::-1]))) model.fit(train, sample_weight=np.zeros(3)) sample_weight = sp.coo_matrix((train.data, (train.row, train.col))) model.fit(train, sample_weight=sample_weight) model = LightFM(loss="warp-kos") with pytest.raises(NotImplementedError): model.fit(train, sample_weight=np.ones(1))
def test_sample_weight(): model = LightFM() train = sp.coo_matrix(np.array([[0, 1], [0, 1]])) with pytest.raises(ValueError): # Wrong number of weights sample_weight = sp.coo_matrix(np.zeros((2, 2))) model.fit(train, sample_weight=sample_weight) with pytest.raises(ValueError): # Wrong shape sample_weight = sp.coo_matrix(np.zeros(2)) model.fit(train, sample_weight=np.zeros(3)) with pytest.raises(ValueError): # Wrong order of entries sample_weight = sp.coo_matrix( (train.data, (train.row[::-1], train.col[::-1]))) model.fit(train, sample_weight=np.zeros(3)) sample_weight = sp.coo_matrix((train.data, (train.row, train.col))) model.fit(train, sample_weight=sample_weight) model = LightFM(loss="warp-kos") with pytest.raises(NotImplementedError): model.fit(train, sample_weight=np.ones(1))
def test_warp_few_items(): no_users, no_items = (1000, 2) train = sp.rand(no_users, no_items, format="csr", random_state=42) model = LightFM(loss="warp", max_sampled=10) model.fit(train)
def test_warp_few_items(): no_users, no_items = (1000, 2) train = sp.rand(no_users, no_items, format="csr", random_state=42) model = LightFM(loss="warp", max_sampled=10) model.fit(train)
def test_exception_on_divergence(): no_users, no_items = (1000, 1000) train = sp.rand(no_users, no_items, format="csr", random_state=42) model = LightFM(learning_rate=10000000.0, loss="warp") with pytest.raises(ValueError): model.fit(train, epochs=10)
def test_state_reset(): model = LightFM(random_state=SEED) model.fit(train, epochs=1) assert np.mean(model.user_embedding_gradients) > 1.0 model.fit(train, epochs=0) assert np.all(model.user_embedding_gradients == 1.0)
def test_state_reset(): model = LightFM(random_state=SEED) model.fit(train, epochs=1) assert np.mean(model.user_embedding_gradients) > 1.0 model.fit(train, epochs=0) assert np.all(model.user_embedding_gradients == 1.0)
def test_movielens_accuracy_fit(): model = LightFM(random_state=SEED) model.fit(train, epochs=10) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) assert roc_auc_score(train.data, train_predictions) > 0.84 assert roc_auc_score(test.data, test_predictions) > 0.76
def test_exception_on_divergence(): no_users, no_items = (1000, 1000) train = sp.rand(no_users, no_items, format="csr", random_state=42) model = LightFM(learning_rate=10000000.0, loss="warp") with pytest.raises(ValueError): model.fit(train, epochs=10)
def test_movielens_accuracy_fit(): model = LightFM(random_state=SEED) model.fit(train, epochs=10) train_predictions = model.predict(train.row, train.col) test_predictions = model.predict(test.row, test.col) assert roc_auc_score(train.data, train_predictions) > 0.84 assert roc_auc_score(test.data, test_predictions) > 0.76
def test_nan_interactions(): no_users, no_items = (1000, 1000) train = sp.rand(no_users, no_items, format="csr", random_state=42) train.data *= np.nan model = LightFM(loss="warp") with pytest.raises(ValueError): model.fit(train)
def test_nan_interactions(): no_users, no_items = (1000, 1000) train = sp.rand(no_users, no_items, format="csr", random_state=42) train.data *= np.nan model = LightFM(loss="warp") with pytest.raises(ValueError): model.fit(train)
def test_nan_features(): no_users, no_items = (1000, 1000) train = sp.rand(no_users, no_items, format="csr", random_state=42) features = sp.identity(no_items) features.data *= np.nan model = LightFM(loss="warp") with pytest.raises(ValueError): model.fit(train, epochs=10, user_features=features, item_features=features)
def test_nan_features(): no_users, no_items = (1000, 1000) train = sp.rand(no_users, no_items, format="csr", random_state=42) features = sp.identity(no_items) features.data *= np.nan model = LightFM(loss="warp") with pytest.raises(ValueError): model.fit(train, epochs=10, user_features=features, item_features=features)
def test_overflow_predict(): no_users, no_items = (1000, 1000) train = sp.rand(no_users, no_items, format="csr", random_state=42) model = LightFM(loss="warp") model.fit(train) with pytest.raises((ValueError, OverflowError)): print( model.predict( 1231241241231241414, np.arange(no_items), user_features=sp.identity(no_users), ))
def test_coo_with_duplicate_entries(): # Calling .tocsr on a COO matrix with duplicate entries # changes its data arrays in-place, leading to out-of-bounds # array accesses in the WARP code. # Reported in https://github.com/lyst/lightfm/issues/117. rows, cols = (1000, 100) mat = sp.random(rows, cols) mat.data[:] = 1 # Duplicate entries in the COO matrix mat.data = np.concatenate((mat.data, mat.data[:1000])) mat.row = np.concatenate((mat.row, mat.row[:1000])) mat.col = np.concatenate((mat.col, mat.col[:1000])) for loss in ("warp", "bpr", "warp-kos"): model = LightFM(loss=loss) model.fit(mat)
def test_return_self(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.int32) model = LightFM() assert model.fit_partial(train) is model assert model.fit(train) is model
def test_return_self(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.int32) model = LightFM() assert model.fit_partial(train) is model assert model.fit(train) is model
def test_overflow_predict(): no_users, no_items = (1000, 1000) train = sp.rand(no_users, no_items, format="csr", random_state=42) model = LightFM(loss="warp") model.fit(train) with pytest.raises((ValueError, OverflowError)): print( model.predict( 1231241241231241414, np.arange(no_items), user_features=sp.identity(no_users), ) )
def test_coo_with_duplicate_entries(): # Calling .tocsr on a COO matrix with duplicate entries # changes its data arrays in-place, leading to out-of-bounds # array accesses in the WARP code. # Reported in https://github.com/lyst/lightfm/issues/117. rows, cols = (1000, 100) mat = sp.random(rows, cols) mat.data[:] = 1 # Duplicate entries in the COO matrix mat.data = np.concatenate((mat.data, mat.data[:1000])) mat.row = np.concatenate((mat.row, mat.row[:1000])) mat.col = np.concatenate((mat.col, mat.col[:1000])) for loss in ("warp", "bpr", "warp-kos"): model = LightFM(loss="warp") model.fit(mat)
def model(self): # Train and persist a model model = LightFM(random_state=10) model.fit(movielens["train"], epochs=5, num_threads=4) model.save(TEST_FILE_PATH) return model