예제 #1
0
    def test_dynamic_updates(self):
        """
        TensorCoFi dynamic update
        We will take a tensor cofi. Train the model, evaluate it. Then we remove all the user factors
        and recompute them using the online_user_factors to check if the performance is almost the same...
        """
        pyTF = PyTensorCoFi(n_factors=20, n_iterations=5, c_lambda=0.05, c_alpha=40)

        evaluator = Evaluator()
        tf = TensorCoFi(n_factors=2, n_iterations=100, c_lambda=0.05, c_alpha=40)
        df = pd.read_csv(resource_filename(testfm.__name__, "data/movielenshead.dat"), sep="::", header=None,
                         names=["user", "item", "rating", "date", "title"])
        training, testing = testfm.split.holdoutByRandom(df, 0.7)
        users = {user: list(entries) for user, entries in training.groupby("user")["item"]}

        tf.fit(training)
        map1 = evaluator.evaluate_model(tf, testing)  # map of the original model

        #now we try to replace the original factors with on the fly computed factors
        #lets iterate over the training data of items and the users
        for u, items in users.items():
            #user id in the tf
            uid = tf.data_map[tf.get_user_column()][u]  # user id
            iids = [tf.data_map[tf.get_item_column()][i] for i in items]  # item ids that user has seen
            #original_factors = tf.factors["user"][uid]
            new_factors = pyTF.online_user_factors(tf.factors[1], iids, p_param=40, lambda_param=0.05)
            #replace original factors with the new factors
            tf.factors[0][uid, :] = new_factors
            #tf.update_user_factors(uid, new_factors)


        #lets evaluate the new model with real-time updated factors
        map2 = evaluator.evaluate_model(tf, testing)
        #The difference should be smaller than 20%
        assert abs(map1[0]-map2[0]) < 0.2*map1[0]
예제 #2
0
 def test_user_model_update(self):
     pyTF = PyTensorCoFi()
     Y = np.array([[-1.0920831, -0.01566422], [-0.8727925, 0.22307773], [0.8753245, -0.80181429],
                   [-0.1338534, -0.51448172], [-0.2144651, -0.96081265]])
     user_items = [1, 3, 4]
     res = pyTF.online_user_factors(Y, user_items, p_param=10, lambda_param=0.01)
     assert np.array([-1.18324547, -0.95040477]).all() == res.all(), "Results not equal"
예제 #3
0
 def test_user_model_update(self):
     pyTF = PyTensorCoFi()
     Y = np.array([[-1.0920831, -0.01566422], [-0.8727925, 0.22307773],
                   [0.8753245, -0.80181429], [-0.1338534, -0.51448172],
                   [-0.2144651, -0.96081265]])
     user_items = [1, 3, 4]
     res = pyTF.online_user_factors(Y,
                                    user_items,
                                    p_param=10,
                                    lambda_param=0.01)
     assert np.array([-1.18324547,
                      -0.95040477]).all() == res.all(), "Results not equal"
예제 #4
0
    def test_dynamic_updates(self):
        """
        TensorCoFi dynamic update
        We will take a tensor cofi. Train the model, evaluate it. Then we remove all the user factors
        and recompute them using the online_user_factors to check if the performance is almost the same...
        """
        pyTF = PyTensorCoFi(n_factors=20,
                            n_iterations=5,
                            c_lambda=0.05,
                            c_alpha=40)

        evaluator = Evaluator()
        tf = TensorCoFi(n_factors=2,
                        n_iterations=100,
                        c_lambda=0.05,
                        c_alpha=40)
        df = pd.read_csv(resource_filename(testfm.__name__,
                                           "data/movielenshead.dat"),
                         sep="::",
                         header=None,
                         names=["user", "item", "rating", "date", "title"])
        training, testing = testfm.split.holdoutByRandom(df, 0.7)
        users = {
            user: list(entries)
            for user, entries in training.groupby("user")["item"]
        }

        tf.fit(training)
        map1 = evaluator.evaluate_model(tf,
                                        testing)  # map of the original model

        #now we try to replace the original factors with on the fly computed factors
        #lets iterate over the training data of items and the users
        for u, items in users.items():
            #user id in the tf
            uid = tf.data_map[tf.get_user_column()][u]  # user id
            iids = [tf.data_map[tf.get_item_column()][i]
                    for i in items]  # item ids that user has seen
            #original_factors = tf.factors["user"][uid]
            new_factors = pyTF.online_user_factors(tf.factors[1],
                                                   iids,
                                                   p_param=40,
                                                   lambda_param=0.05)
            #replace original factors with the new factors
            tf.factors[0][uid, :] = new_factors
            #tf.update_user_factors(uid, new_factors)

        #lets evaluate the new model with real-time updated factors
        map2 = evaluator.evaluate_model(tf, testing)
        #The difference should be smaller than 20%
        assert abs(map1[0] - map2[0]) < 0.2 * map1[0]
예제 #5
0
    def test_score_for_python_version(self):
        tf = PyTensorCoFi(n_factors=2)
        inp = [{"user": 10, "item": 100},
               {"user": 10, "item": 110},
               {"user": 12, "item": 120}]
        inp = pd.DataFrame(inp)
        tf.fit(inp)
        uid = tf.data_map[tf.get_user_column()][10]
        iid = tf.data_map[tf.get_item_column()][100]

        tf.factors[0][uid, 0] = 0
        tf.factors[0][uid, 1] = 1
        tf.factors[1][iid, 0] = 1
        tf.factors[1][iid, 1] = 5
        self.assertEqual(0*1+1*5, tf.get_score(10, 100))
예제 #6
0
    def test_tensor_score_against_testfm(self):
        """
        [recommendation.models.TensorCoFi] Test tensorcofi scores with test.fm benchmark
        """
        evaluator = Evaluator()
        tc = TensorCoFi(n_users=len(self.df.user.unique()), n_items=len(self.df.item.unique()), n_factors=2)
        ptc = PyTensorCoFi()
        training, testing = testfm.split.holdoutByRandom(self.df, 0.9)

        items = training.item.unique()
        tc.fit(training)
        ptc.fit(training)
        tc_score = evaluator.evaluate_model(tc, testing, all_items=items)[0]
        ptc_score = evaluator.evaluate_model(ptc, testing, all_items=items)[0]
        assert abs(tc_score-ptc_score) < .15, \
            "TensorCoFi score is not close enough to testfm benchmark (%.3f != %.3f)" % (tc_score, ptc_score)
예제 #7
0
    def test_ids_returns_for_python_version(self):
        tf = PyTensorCoFi(n_factors=2)
        inp = [{"user": 10, "item": 100},
               {"user": 10, "item": 110},
               {"user": 12, "item": 120}]
        inp = pd.DataFrame(inp)
        tf.fit(inp)

        # Test the id in map
        uid = tf.data_map[tf.get_user_column()][10]
        iid = tf.data_map[tf.get_item_column()][100]
        self.assertEquals(uid, 0)
        self.assertEquals(iid, 0)

        # Test number of factors
        self.assertEquals(len(tf.factors[0][uid, :]), tf.number_of_factors)
        self.assertEquals(len(tf.factors[1][iid, :]), tf.number_of_factors)
예제 #8
0
    def test_nogil_against_std_05(self):
        """
        [EVALUATOR] Test the groups measure differences between python and c implementations for 5% training
        """
        df = pd.read_csv(resource_filename(testfm.__name__, 'data/movielenshead.dat'),
                         sep="::", header=None, names=['user', 'item', 'rating', 'date', 'title'])
        model = PyTensorCoFi()
        ev = Evaluator(False)
        ev_nogil = Evaluator()
        results = {"implementation": [], "measure": []}
        for i in range(SAMPLE_SIZE_FOR_TEST):
            training, testing = testfm.split.holdoutByRandom(df, 0.05)
            model.fit(training)
            results["implementation"].append("Cython"), results["measure"].append(ev_nogil.evaluate_model(model, testing)[0])
            results["implementation"].append("Python"), results["measure"].append(ev.evaluate_model(model, testing)[0])

        #####################
        # ANOVA over result #
        #####################
        assert_equality_in_groups(results, alpha=ALPHA, groups="implementation", test_var="measure")
예제 #9
0
 def test_recommendation_with_testfm(self):
     """
     [recommendation.api.GetRecommendation] Test recommendation with testfm
     """
     data = np.array(zip(*map(lambda x: (x["user_id"]-1, x["item_id"]-1, 1.),
                              Inventory.objects.all().values("user_id", "item_id"))), dtype=np.float32)
     users, items = zip(*Inventory.objects.all().values_list("user_id", "item_id"))
     df = pd.DataFrame({"user": pd.Series(users), "item": pd.Series(items)}, dtype=np.float32)
     evaluator = Evaluator(use_multi_threading=False)
     tensor = TensorCoFi.get_model_from_cache()
     tfm_tensor = PyTensorCoFi()
     tfm_tensor.data_map = tensor.data_map
     tfm_tensor.users_size = lambda: tensor.users_size()
     tfm_tensor.items_size = lambda: tensor.items_size()
     tfm_tensor.get_score = lambda user, item: \
         np.dot(tfm_tensor.factors[0][tfm_tensor.data_map[tfm_tensor.get_user_column()][user]],
                tfm_tensor.factors[1][tfm_tensor.data_map[tfm_tensor.get_item_column()][item]].transpose())
     tfm_tensor.train(data.transpose())
     items = df.item.unique()
     t = evaluator.evaluate_model(tensor, df, all_items=items, non_relevant_count=100)
     tfm = evaluator.evaluate_model(tfm_tensor, df, all_items=items, non_relevant_count=100)
     assert abs(t[0] - tfm[0]) < 0.15, \
         "Difference between testfm implementation and frappe is to high (%f, %f)" % (t[0], tfm[0])
예제 #10
0
    def test_ids_returns_for_python_version(self):
        tf = PyTensorCoFi(n_factors=2)
        inp = [{
            "user": 10,
            "item": 100
        }, {
            "user": 10,
            "item": 110
        }, {
            "user": 12,
            "item": 120
        }]
        inp = pd.DataFrame(inp)
        tf.fit(inp)

        # Test the id in map
        uid = tf.data_map[tf.get_user_column()][10]
        iid = tf.data_map[tf.get_item_column()][100]
        self.assertEquals(uid, 0)
        self.assertEquals(iid, 0)

        # Test number of factors
        self.assertEquals(len(tf.factors[0][uid, :]), tf.number_of_factors)
        self.assertEquals(len(tf.factors[1][iid, :]), tf.number_of_factors)
예제 #11
0
    def test_score_for_python_version(self):
        tf = PyTensorCoFi(n_factors=2)
        inp = [{
            "user": 10,
            "item": 100
        }, {
            "user": 10,
            "item": 110
        }, {
            "user": 12,
            "item": 120
        }]
        inp = pd.DataFrame(inp)
        tf.fit(inp)
        uid = tf.data_map[tf.get_user_column()][10]
        iid = tf.data_map[tf.get_item_column()][100]

        tf.factors[0][uid, 0] = 0
        tf.factors[0][uid, 1] = 1
        tf.factors[1][iid, 0] = 1
        tf.factors[1][iid, 1] = 5
        self.assertEqual(0 * 1 + 1 * 5, tf.get_score(10, 100))
예제 #12
0
 def test_fit_for_python_version(self):
     tf = PyTensorCoFi(n_factors=2)
     tf.fit(self.df)
     #item and user are row vectors
     self.assertEqual(len(self.df.user.unique()), tf.factors[0].shape[0])
     self.assertEqual(len(self.df.item.unique()), tf.factors[1].shape[0])
예제 #13
0
 def test_fit_for_python_version(self):
     tf = PyTensorCoFi(n_factors=2)
     tf.fit(self.df)
     #item and user are row vectors
     self.assertEqual(len(self.df.user.unique()), tf.factors[0].shape[0])
     self.assertEqual(len(self.df.item.unique()), tf.factors[1].shape[0])
예제 #14
0
from testfm.evaluation.evaluator import Evaluator
from pkg_resources import resource_filename

from testfm.evaluation.parameter_tuning import ParameterTuning

if __name__ == "__main__":
    eval = Evaluator(
    )  # Call this before loading the data to save memory (fork of process takes place)

    # Prepare the data
    df = pd.read_csv(resource_filename(testfm.__name__,
                                       'data/movielenshead.dat'),
                     sep="::",
                     header=None,
                     names=['user', 'item', 'rating', 'date', 'title'])
    print df.head()
    training, testing = testfm.split.holdoutByRandom(df, 0.9)

    print "Tuning the parameters."
    tr, validation = testfm.split.holdoutByRandom(training, 0.7)
    pt = ParameterTuning()
    pt.set_max_iterations(100)
    pt.set_z_value(90)
    tf_params = pt.get_best_params(TensorCoFi, tr, validation)
    print tf_params

    tf = TensorCoFi()
    tf.set_params(**tf_params)
    tf.fit(training)
    print tf.get_name().ljust(50),
    print eval.evaluate_model(tf, testing, all_items=training.item.unique())