def test_simple_fit(self): logger = logging.getLogger("ALS_test_simple_fit") logger.debug("\nPre-instantiate clf1") clf1 = AlternatingLeastSquares( random_state=1, use_gpu=False, use_cg=True, iterations=5) logger.debug("Pre-instantiate clf2") clf2 = AlternatingLeastSquares( random_state=1, use_gpu=False, use_cg=True, iterations=5) # Show that the _make_estimator will initialize the matrices in a # replicable fashion given the random seed # PRE-FIT: logger.debug("Making estimator with clf1") est1 = clf1._make_estimator(train) logger.debug("Making estimator with clf2") est2 = clf2._make_estimator(train) for attr in ('item_factors', 'user_factors'): assert_array_almost_equal(getattr(est1, attr), getattr(est2, attr)) # Are they the same POST-fit? They SHOULD be... (note this is only # the case if use_cg is FALSE!!) logger.debug("Fitting first estimator") clf1.fit(train) # Show the n_items is right assert clf1.n_items() == train.shape[1] assert clf1.n_users() == train.shape[0]
def test_simple_deployment(self): als = AlternatingLeastSquares(factors=10, use_cg=False, iterations=3) als.fit(train) recs1 = als.recommend_for_user(0, test) deployment = RecommenderDeployment(estimator=als) recs2 = deployment.recommend_for_user(0, test[0, :].toarray()[0]) assert_array_equal(recs1, recs2)
def test_recommend_single(self): clf = AlternatingLeastSquares( random_state=1, use_gpu=False, use_cg=True, iterations=5) clf.fit(train) # Make assertions on the recommendations self._single_recommend_assertions(clf, train, test) # Special assert for ALS only where n + count > len n_items. # Should just end up being n_items. n_items = test.shape[1] recs = clf.recommend_for_user(0, test, n=n_items + 5, filter_previously_rated=False) assert len(recs) == n_items, len(recs)
def test_random_cv_fit_recommend(self): """Test a simple fit""" # Create the estimator clf = AlternatingLeastSquares(random_state=42, use_cg=True, iterations=5, factors=15) # These are the hyper parameters we'll use hyper = { 'factors': randint(5, 6), 'regularization': uniform(0.01, 0.05) } # Make our cv cv = KFold(n_splits=2, random_state=1, shuffle=True) search = RandomizedRecommenderSearchCV( estimator=clf, cv=cv, random_state=42, param_distributions=hyper, n_jobs=1, n_iter=2, recommend_params={"filter_previously_rated": True}, verbose=1, scoring='ndcg') # While we're fitting, assert we get a warning about the # "filter_previously_rated" key in the fit params... with warnings.catch_warnings(record=True) as w: self._search_fit_assert(search) # should warn in fit # Verify... assert len(w) assert any(["filter_previously_rated" in str(warn.message) for warn in w])
def test_recommend_all(self): # Recommend for ALL users clf = AlternatingLeastSquares( random_state=1, use_gpu=False, use_cg=True, iterations=5).fit(train) # Mask assertions self._all_recommend_assertions(clf, test)
def test_random_val_fit(self): """Test a simple fit""" # Create the estimator clf = AlternatingLeastSquares(random_state=42, use_cg=True, iterations=5, factors=10) # These are the hyper parameters we'll use hyper = { 'factors': randint(5, 6), 'regularization': uniform(0.01, 0.05) } # Create search with no CV and use validation set instead search = RandomizedRecommenderSearchCV( estimator=clf, cv=None, random_state=42, param_distributions=hyper, n_jobs=1, n_iter=2, verbose=1) self._search_fit_assert(search, val=test)
def test_encoded_deployment(self): users = ['adam', 'betty', 'betty', 'frank', 'frank'] items = ["chili's", "chuy's", "chili's", "torchy's", "chuy's"] visits = [2, 4, 1, 8, 5] # Encode the labels user_le = LabelEncoder() item_le = LabelEncoder() users = user_le.fit_transform(users) items = item_le.fit_transform(items) # Make the matrix (don't bother splitting for this example) R = sparse.csr_matrix((visits, (users, items)), shape=(3, 3)) als = AlternatingLeastSquares(factors=2, use_cg=False, iterations=5) als.fit(R) recs1 = als.recommend_for_user(0, R) # Test failing constructors first with pytest.raises(TypeError): RecommenderDeployment(estimator=als, item_encoder='bad_encoder', user_encoder=user_le, user_missing_strategy='error') with pytest.raises(TypeError): RecommenderDeployment(estimator=als, item_encoder=item_le, user_encoder='bad_encoder', user_missing_strategy='error') with pytest.raises(TypeError): RecommenderDeployment(estimator=als, item_encoder=item_le, user_encoder=user_le, filter_items='non-iterable', user_missing_strategy='error') with pytest.raises(ValueError): RecommenderDeployment(estimator=als, item_encoder=item_le, user_encoder=user_le, user_missing_strategy='bad-strategy') # "deploy" with both encoders deployment = RecommenderDeployment(estimator=als, item_encoder=item_le, user_encoder=user_le, user_missing_strategy='error') recs2 = deployment.recommend_for_user('adam', R[0, :].toarray()[0]) # Show that the encoded recs are the same as before assert_array_equal(recs1, item_le.transform(recs2)) # What if we pass a dict? recs3 = deployment.recommend_for_user('adam', {"chili's": 2}) assert_array_equal(recs1, item_le.transform(recs3)) # And if we want scores? recs4, scores = deployment.recommend_for_user('adam', R[0, :].toarray()[0], return_scores=True) assert_array_equal(recs1, item_le.transform(recs4)) assert scores.shape[0] == recs4.shape[0] # Test the persistence model pkl_location = "model.pkl" try: joblib.dump(deployment, pkl_location, compress=3) loaded = joblib.load(pkl_location) recs5 = loaded.recommend_for_user('adam', R[0, :].toarray()[0]) assert_array_equal(recs1, item_le.transform(recs5)) finally: os.unlink(pkl_location) # If we set the user_encoder to None, show we get the same # recommendations with a non-encoded user ID deployment.user_encoder = None recs_no_encode = deployment.recommend_for_user(0, R[0, :].toarray()[0]) assert_array_equal(recs1, item_le.transform(recs_no_encode)) # Oh, and now we fail with a TypeError if we pass a string since it # never gets transformed with pytest.raises(TypeError): deployment.recommend_for_user('adam', R[0, :].toarray()[0]) # What if we give it a user that doesn't exist? Or a negative one? with pytest.raises(KeyError): deployment.recommend_for_user(9, R[0, :].toarray()[0]) with pytest.raises(KeyError): deployment.recommend_for_user(-1, R[0, :].toarray()[0]) # Show we fail with improper dims with pytest.raises(ValueError): deployment.recommend_for_user(0, [2.]) # Now set the item encoder to none deployment.item_encoder = None recs_no_encode_anything = deployment.recommend_for_user(0, {0: 2}) assert_array_equal(recs1, recs_no_encode_anything) # Set it to "warn" and try again deployment.user_missing_strategy = "warn" with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # execute the fxn recs = deployment.recommend_for_user(9, R[0, :].toarray()[0]) assert len(w) # assert there's something there... assert recs.shape[0] == 0 # do the same with return_scores recs, scores = deployment.recommend_for_user(9, R[0, :].toarray()[0], return_scores=True) assert recs.shape[0] == scores.shape[0] == 0
# pre-encoded, but we will do it here manually for example. user_le = LabelEncoder() item_le = LabelEncoder() users_transformed = user_le.fit_transform(users) items_transformed = item_le.fit_transform(items) # Split the data X = to_sparse_csr(u=users_transformed, i=items_transformed, r=ratings, axis=0, dtype=np.float32) train, test = train_test_split(X, train_size=0.75, random_state=42) # ############################################################################# # Fit our model, make our deployment object als = AlternatingLeastSquares( random_state=42, use_gpu=False, use_cg=True, iterations=50, factors=100) als.fit(train) # This is what you'd persist: wrapper = RecommenderDeployment( estimator=als, user_missing_strategy="error", # These are optional, and can be None if you don't want transformed recs item_encoder=item_le, user_encoder=user_le) # ############################################################################# # Generate predictions for a fan of classic rock def top_listener(of): musician_id = [i for i, v in artists.items() if v == of][0]
def test_serialize(self): clf = AlternatingLeastSquares( random_state=1, use_gpu=False, use_cg=True, iterations=5) self._serialization_assertions(clf, train, test)
def test_complex_fit(self): # Show we can fit a really complex model AlternatingLeastSquares(random_state=42, use_cg=True, iterations=15, factors=150, regularization=0.01, num_threads=1)
from reclab.collab import AlternatingLeastSquares as ALS import numpy as np # ############################################################################# # Load data and split into train/test lastfm = load_lastfm(cache=True, as_sparse=True) train, test = train_test_split(lastfm.ratings, random_state=42) print("Train:") print(repr(train)) print("\nTest:") print(repr(test)) # ############################################################################# # Fit our model als = ALS(random_state=1, use_gpu=False, use_cg=True, iterations=25, factors=100) als.fit(train) # ############################################################################# # Generate predictions (on the test set) for a user who is a metal head like me artists = lastfm.artists mayhem_id = np.where(artists == "Mayhem")[0][0] mayhem_listens = train[:, mayhem_id].toarray().ravel() mayhem_listeners = np.argsort(-mayhem_listens) mayhem_appreciator = mayhem_listeners[0] # Has the best taste in music :) print("\nUser #%i listened to Mayhem %i times.\nThis user's top 5 " "most-listened-to artists are:\n%s" % (mayhem_appreciator, int(train[mayhem_appreciator, mayhem_id]), str(artists[np.argsort( -train[mayhem_appreciator, :].toarray())][0, :5])))