def test_dynamic_updates(self): """ TensorCoFi dynamic update We will take a tensor cofi. Train the model, evaluate it. Then we remove all the user factors and recompute them using the online_user_factors to check if the performance is almost the same... """ pyTF = PyTensorCoFi(n_factors=20, n_iterations=5, c_lambda=0.05, c_alpha=40) evaluator = Evaluator() tf = TensorCoFi(n_factors=2, n_iterations=100, c_lambda=0.05, c_alpha=40) df = pd.read_csv(resource_filename(testfm.__name__, "data/movielenshead.dat"), sep="::", header=None, names=["user", "item", "rating", "date", "title"]) training, testing = testfm.split.holdoutByRandom(df, 0.7) users = {user: list(entries) for user, entries in training.groupby("user")["item"]} tf.fit(training) map1 = evaluator.evaluate_model(tf, testing) # map of the original model #now we try to replace the original factors with on the fly computed factors #lets iterate over the training data of items and the users for u, items in users.items(): #user id in the tf uid = tf.data_map[tf.get_user_column()][u] # user id iids = [tf.data_map[tf.get_item_column()][i] for i in items] # item ids that user has seen #original_factors = tf.factors["user"][uid] new_factors = pyTF.online_user_factors(tf.factors[1], iids, p_param=40, lambda_param=0.05) #replace original factors with the new factors tf.factors[0][uid, :] = new_factors #tf.update_user_factors(uid, new_factors) #lets evaluate the new model with real-time updated factors map2 = evaluator.evaluate_model(tf, testing) #The difference should be smaller than 20% assert abs(map1[0]-map2[0]) < 0.2*map1[0]
def test_user_model_update(self): pyTF = PyTensorCoFi() Y = np.array([[-1.0920831, -0.01566422], [-0.8727925, 0.22307773], [0.8753245, -0.80181429], [-0.1338534, -0.51448172], [-0.2144651, -0.96081265]]) user_items = [1, 3, 4] res = pyTF.online_user_factors(Y, user_items, p_param=10, lambda_param=0.01) assert np.array([-1.18324547, -0.95040477]).all() == res.all(), "Results not equal"
def test_dynamic_updates(self): """ TensorCoFi dynamic update We will take a tensor cofi. Train the model, evaluate it. Then we remove all the user factors and recompute them using the online_user_factors to check if the performance is almost the same... """ pyTF = PyTensorCoFi(n_factors=20, n_iterations=5, c_lambda=0.05, c_alpha=40) evaluator = Evaluator() tf = TensorCoFi(n_factors=2, n_iterations=100, c_lambda=0.05, c_alpha=40) df = pd.read_csv(resource_filename(testfm.__name__, "data/movielenshead.dat"), sep="::", header=None, names=["user", "item", "rating", "date", "title"]) training, testing = testfm.split.holdoutByRandom(df, 0.7) users = { user: list(entries) for user, entries in training.groupby("user")["item"] } tf.fit(training) map1 = evaluator.evaluate_model(tf, testing) # map of the original model #now we try to replace the original factors with on the fly computed factors #lets iterate over the training data of items and the users for u, items in users.items(): #user id in the tf uid = tf.data_map[tf.get_user_column()][u] # user id iids = [tf.data_map[tf.get_item_column()][i] for i in items] # item ids that user has seen #original_factors = tf.factors["user"][uid] new_factors = pyTF.online_user_factors(tf.factors[1], iids, p_param=40, lambda_param=0.05) #replace original factors with the new factors tf.factors[0][uid, :] = new_factors #tf.update_user_factors(uid, new_factors) #lets evaluate the new model with real-time updated factors map2 = evaluator.evaluate_model(tf, testing) #The difference should be smaller than 20% assert abs(map1[0] - map2[0]) < 0.2 * map1[0]
def test_score_for_python_version(self): tf = PyTensorCoFi(n_factors=2) inp = [{"user": 10, "item": 100}, {"user": 10, "item": 110}, {"user": 12, "item": 120}] inp = pd.DataFrame(inp) tf.fit(inp) uid = tf.data_map[tf.get_user_column()][10] iid = tf.data_map[tf.get_item_column()][100] tf.factors[0][uid, 0] = 0 tf.factors[0][uid, 1] = 1 tf.factors[1][iid, 0] = 1 tf.factors[1][iid, 1] = 5 self.assertEqual(0*1+1*5, tf.get_score(10, 100))
def test_tensor_score_against_testfm(self): """ [recommendation.models.TensorCoFi] Test tensorcofi scores with test.fm benchmark """ evaluator = Evaluator() tc = TensorCoFi(n_users=len(self.df.user.unique()), n_items=len(self.df.item.unique()), n_factors=2) ptc = PyTensorCoFi() training, testing = testfm.split.holdoutByRandom(self.df, 0.9) items = training.item.unique() tc.fit(training) ptc.fit(training) tc_score = evaluator.evaluate_model(tc, testing, all_items=items)[0] ptc_score = evaluator.evaluate_model(ptc, testing, all_items=items)[0] assert abs(tc_score-ptc_score) < .15, \ "TensorCoFi score is not close enough to testfm benchmark (%.3f != %.3f)" % (tc_score, ptc_score)
def test_ids_returns_for_python_version(self): tf = PyTensorCoFi(n_factors=2) inp = [{"user": 10, "item": 100}, {"user": 10, "item": 110}, {"user": 12, "item": 120}] inp = pd.DataFrame(inp) tf.fit(inp) # Test the id in map uid = tf.data_map[tf.get_user_column()][10] iid = tf.data_map[tf.get_item_column()][100] self.assertEquals(uid, 0) self.assertEquals(iid, 0) # Test number of factors self.assertEquals(len(tf.factors[0][uid, :]), tf.number_of_factors) self.assertEquals(len(tf.factors[1][iid, :]), tf.number_of_factors)
def test_nogil_against_std_05(self): """ [EVALUATOR] Test the groups measure differences between python and c implementations for 5% training """ df = pd.read_csv(resource_filename(testfm.__name__, 'data/movielenshead.dat'), sep="::", header=None, names=['user', 'item', 'rating', 'date', 'title']) model = PyTensorCoFi() ev = Evaluator(False) ev_nogil = Evaluator() results = {"implementation": [], "measure": []} for i in range(SAMPLE_SIZE_FOR_TEST): training, testing = testfm.split.holdoutByRandom(df, 0.05) model.fit(training) results["implementation"].append("Cython"), results["measure"].append(ev_nogil.evaluate_model(model, testing)[0]) results["implementation"].append("Python"), results["measure"].append(ev.evaluate_model(model, testing)[0]) ##################### # ANOVA over result # ##################### assert_equality_in_groups(results, alpha=ALPHA, groups="implementation", test_var="measure")
def test_recommendation_with_testfm(self): """ [recommendation.api.GetRecommendation] Test recommendation with testfm """ data = np.array(zip(*map(lambda x: (x["user_id"]-1, x["item_id"]-1, 1.), Inventory.objects.all().values("user_id", "item_id"))), dtype=np.float32) users, items = zip(*Inventory.objects.all().values_list("user_id", "item_id")) df = pd.DataFrame({"user": pd.Series(users), "item": pd.Series(items)}, dtype=np.float32) evaluator = Evaluator(use_multi_threading=False) tensor = TensorCoFi.get_model_from_cache() tfm_tensor = PyTensorCoFi() tfm_tensor.data_map = tensor.data_map tfm_tensor.users_size = lambda: tensor.users_size() tfm_tensor.items_size = lambda: tensor.items_size() tfm_tensor.get_score = lambda user, item: \ np.dot(tfm_tensor.factors[0][tfm_tensor.data_map[tfm_tensor.get_user_column()][user]], tfm_tensor.factors[1][tfm_tensor.data_map[tfm_tensor.get_item_column()][item]].transpose()) tfm_tensor.train(data.transpose()) items = df.item.unique() t = evaluator.evaluate_model(tensor, df, all_items=items, non_relevant_count=100) tfm = evaluator.evaluate_model(tfm_tensor, df, all_items=items, non_relevant_count=100) assert abs(t[0] - tfm[0]) < 0.15, \ "Difference between testfm implementation and frappe is to high (%f, %f)" % (t[0], tfm[0])
def test_ids_returns_for_python_version(self): tf = PyTensorCoFi(n_factors=2) inp = [{ "user": 10, "item": 100 }, { "user": 10, "item": 110 }, { "user": 12, "item": 120 }] inp = pd.DataFrame(inp) tf.fit(inp) # Test the id in map uid = tf.data_map[tf.get_user_column()][10] iid = tf.data_map[tf.get_item_column()][100] self.assertEquals(uid, 0) self.assertEquals(iid, 0) # Test number of factors self.assertEquals(len(tf.factors[0][uid, :]), tf.number_of_factors) self.assertEquals(len(tf.factors[1][iid, :]), tf.number_of_factors)
def test_score_for_python_version(self): tf = PyTensorCoFi(n_factors=2) inp = [{ "user": 10, "item": 100 }, { "user": 10, "item": 110 }, { "user": 12, "item": 120 }] inp = pd.DataFrame(inp) tf.fit(inp) uid = tf.data_map[tf.get_user_column()][10] iid = tf.data_map[tf.get_item_column()][100] tf.factors[0][uid, 0] = 0 tf.factors[0][uid, 1] = 1 tf.factors[1][iid, 0] = 1 tf.factors[1][iid, 1] = 5 self.assertEqual(0 * 1 + 1 * 5, tf.get_score(10, 100))
def test_fit_for_python_version(self): tf = PyTensorCoFi(n_factors=2) tf.fit(self.df) #item and user are row vectors self.assertEqual(len(self.df.user.unique()), tf.factors[0].shape[0]) self.assertEqual(len(self.df.item.unique()), tf.factors[1].shape[0])
from testfm.evaluation.evaluator import Evaluator from pkg_resources import resource_filename from testfm.evaluation.parameter_tuning import ParameterTuning if __name__ == "__main__": eval = Evaluator( ) # Call this before loading the data to save memory (fork of process takes place) # Prepare the data df = pd.read_csv(resource_filename(testfm.__name__, 'data/movielenshead.dat'), sep="::", header=None, names=['user', 'item', 'rating', 'date', 'title']) print df.head() training, testing = testfm.split.holdoutByRandom(df, 0.9) print "Tuning the parameters." tr, validation = testfm.split.holdoutByRandom(training, 0.7) pt = ParameterTuning() pt.set_max_iterations(100) pt.set_z_value(90) tf_params = pt.get_best_params(TensorCoFi, tr, validation) print tf_params tf = TensorCoFi() tf.set_params(**tf_params) tf.fit(training) print tf.get_name().ljust(50), print eval.evaluate_model(tf, testing, all_items=training.item.unique())