def test_tr_te_split(): u = [0, 1, 0, 2, 1, 3] i = [1, 2, 2, 0, 3, 2] r = [0.5, 1.0, 0.0, 1.0, 0.0, 1.] X = csr_matrix((r, (u, i)), shape=(4, 4)) train, test = train_test_split(X, train_size=0.7, random_state=42) # one will be masked in the train array assert_array_almost_equal( train.toarray(), np.array([ [0, 0.5, 0, 0], [0, 0, 0, 0], # masked [1, 0, 0, 0], [0, 0, 1, 0] ])) assert_array_almost_equal( test.toarray(), np.array([ [0, 0, 0, 0], [0, 0, 1, 0], # held out [0, 0, 0, 0], [0, 0, 0, 0] ]))
from reclab.model_selection import train_test_split from reclab.model_deployment import RecommenderDeployment from reclab.collab import AlternatingLeastSquares from scipy import sparse from sklearn.preprocessing import LabelEncoder from sklearn.externals import joblib from numpy.testing import assert_array_equal import pytest import warnings import os # Load data and split into train/test lastfm = load_lastfm(cache=True, as_sparse=True) train, test = train_test_split(lastfm.ratings, random_state=42) class TestRecommenderDeployment(object): def test_simple_deployment(self): als = AlternatingLeastSquares(factors=10, use_cg=False, iterations=3) als.fit(train) recs1 = als.recommend_for_user(0, test) deployment = RecommenderDeployment(estimator=als) recs2 = deployment.recommend_for_user(0, test[0, :].toarray()[0]) assert_array_equal(recs1, recs2) def test_encoded_deployment(self): users = ['adam', 'betty', 'betty', 'frank', 'frank'] items = ["chili's", "chuy's", "chili's", "torchy's", "chuy's"]
items = lastfm.products ratings = lastfm.ratings artists = lastfm.artists # We need to encode the users/items. If you use as_sparse=True, they come # pre-encoded, but we will do it here manually for example. user_le = LabelEncoder() item_le = LabelEncoder() users_transformed = user_le.fit_transform(users) items_transformed = item_le.fit_transform(items) # Split the data X = to_sparse_csr(u=users_transformed, i=items_transformed, r=ratings, axis=0, dtype=np.float32) train, test = train_test_split(X, train_size=0.75, random_state=42) # ############################################################################# # Fit our model, make our deployment object als = AlternatingLeastSquares( random_state=42, use_gpu=False, use_cg=True, iterations=50, factors=100) als.fit(train) # This is what you'd persist: wrapper = RecommenderDeployment( estimator=als, user_missing_strategy="error", # These are optional, and can be None if you don't want transformed recs item_encoder=item_le, user_encoder=user_le)