예제 #1
0
 def setUp(self):
     self.tf = TensorCoFi(dim=2)
     self.df = pd.read_csv(resource_filename(testfm.__name__,
                                             'data/movielenshead.dat'),
                           sep="::", header=None, names=['user', 'item',
                                                         'rating', 'date',
                                                         'title'])
     self.df = self.df.head(n=100)
예제 #2
0
    def test_score(self):
        tf = TensorCoFi(dim=2)
        inp = [{'user': 10, 'item': 100}, {'user': 10,'item': 110},
               {'user': 12,'item': 120}]
        inp = pd.DataFrame(inp)
        tf.fit(inp)
        uid = tf._dmap['user'][10]
        iid = tf._dmap['item'][100]
        self.assertEquals(uid, 1)
        self.assertEquals(iid, 1)

        tf.factors['user'][0][0] = 0
        tf.factors['user'][0][1] = 1
        tf.factors['item'][0][0] = 1
        tf.factors['item'][0][1] = 5


        self.assertEqual(0*1+1*5, tf.getScore(10,100))
예제 #3
0
from testfm.evaluation.parameterTuning import ParameterTuning

eval = Evaluator()  # call this before loading the data to save memory (fork of process takes place)

# prepare the data
df = pd.read_csv(
    resource_filename(testfm.__name__, "data/movielenshead.dat"),
    sep="::",
    header=None,
    names=["user", "item", "rating", "date", "title"],
)
print df.head()
training, testing = testfm.split.holdoutByRandom(df, 0.9)

print "Tuning the parameters."
tr, validation = testfm.split.holdoutByRandom(training, 0.7)
pt = ParameterTuning()
pt.setMaxIterations(10)
pt.setZvalue(80)
tf_params = pt.getBestParams(TensorCoFi, tr, validation)
print tf_params

tf = TensorCoFi()
tf.setParams(**tf_params)
tf.fit(training)
print tf.getName().ljust(50),
print eval.evaluate_model(tf, testing, all_items=training.item.unique())

eval.close()  # need this call to clean up the worker processes
예제 #4
0
class TestTensorCoFi(unittest.TestCase):

    def tearDown(self):
        import os
        if os.path.exists('user.csv'):
            os.remove('user.csv')
        if os.path.exists('item.csv'):
            os.remove('item.csv')

    def setUp(self):
        self.tf = TensorCoFi(dim=2)
        self.df = pd.read_csv(resource_filename(testfm.__name__,
                                                'data/movielenshead.dat'),
                              sep="::", header=None, names=['user', 'item',
                                                            'rating', 'date',
                                                            'title'])
        self.df = self.df.head(n=100)

    def test_array(self):
        arr, tmap = self.tf._dataframe_to_float_matrix(self.df)


    def test_fit(self):
        self.tf.fit(self.df)
        #item and user are row vectors
        self.assertEqual(len(self.df.user.unique()), self.tf.factors['user'].shape[0])
        self.assertEqual(len(self.df.item.unique()), self.tf.factors['item'].shape[0])
        self.assertEqual(self.tf.user_features[1], [1])
        self.assertEqual(self.tf.item_features[122], [1])

    def test_ids_returns(self):
        inp = [{'user': 10, 'item': 100}, {'user': 10, 'item': 110},
               {'user': 12,'item': 120}]
        inp = pd.DataFrame(inp)
        self.tf.fit(inp)
        self.assertEquals(self.tf.user_column_names, ['user'])
        self.assertEquals(self.tf.item_column_names, ['item'])
        self.assertEquals(len(self.tf.user_features), 2)
        self.assertEquals(len(self.tf.item_features), 3)

        self.assertEquals(len(self.tf.factors['user']), 2)
        self.assertEquals(len(self.tf.factors['item']), 3)

        uid = self.tf._dmap['user'][10]
        iid = self.tf._dmap['item'][100]
        self.assertEquals(uid, 1)
        self.assertEquals(iid, 1)

        self.assertEquals(len(self.tf.factors['user'][uid]), 2)
        self.assertEquals(len(self.tf.factors['user'][uid]), self.tf._dim)
        self.assertEquals(len(self.tf.factors['item'][iid]), self.tf._dim)


    def test_score(self):
        tf = TensorCoFi(dim=2)
        inp = [{'user': 10, 'item': 100}, {'user': 10,'item': 110},
               {'user': 12,'item': 120}]
        inp = pd.DataFrame(inp)
        tf.fit(inp)
        uid = tf._dmap['user'][10]
        iid = tf._dmap['item'][100]
        self.assertEquals(uid, 1)
        self.assertEquals(iid, 1)

        tf.factors['user'][0][0] = 0
        tf.factors['user'][0][1] = 1
        tf.factors['item'][0][0] = 1
        tf.factors['item'][0][1] = 5


        self.assertEqual(0*1+1*5, tf.getScore(10,100))

    def test_floatmatrix_to_numpy(self):
        from jnius import autoclass
        FloatMatrix = autoclass('org.jblas.FloatMatrix')
        rand = FloatMatrix.rand(4,2)
        rand_np = self.tf._float_matrix2numpy(rand)

        self.assertEqual(rand.get(0,0), rand_np[0,0])
        self.assertEqual(rand.get(1,0), rand_np[1,0])
        self.assertEqual(rand.get(1,1), rand_np[1,1])
        self.assertEqual((rand.rows, rand.columns), rand_np.shape)

    def test_score_tcff(self):
        tf = TensorCoFiByFile(dim=2)
        inp = [{'user': 10, 'item': 100},
               {'user': 10,'item': 110},
               {'user': 12,'item': 120}]
        inp = pd.DataFrame(inp)
        tf.fit(inp)
        uid = tf._dmap['user'][10]
        iid = tf._dmap['item'][100]
        self.assertEquals(uid, 1)
        self.assertEquals(iid, 1)

        tf.factors['user'][0][0] = 0
        tf.factors['user'][0][1] = 1
        tf.factors['item'][0][0] = 1
        tf.factors['item'][0][1] = 5


        self.assertEqual(0*1+1*5, tf.getScore(10, 100))

    def test_result_by_file(self):
        def floatMatrixToCSV(fm, n):
            csv = '\n'.join((','.join((str(fm.get(row, column))
                for column in xrange(0, fm.columns)))
                for row in xrange(0, fm.rows)))
            with open(n, 'w') as f:
                f.write(csv)
        tf = TensorCoFiByFile(dim=2)
        inp = [{'user': 10, 'item': 100},
               {'user': 10,'item': 110},
               {'user': 12,'item': 120}]
        inp = pd.DataFrame(inp)
        ten = tf._fit(inp)

        #####
        floatMatrixToCSV(ten.getModel().get(0), 'user.csv')
        floatMatrixToCSV(ten.getModel().get(1), 'item.csv')

        fromFile = {
            'user': np.ma.column_stack(np.genfromtxt(open('user.csv', 'r'),
                                                     delimiter=',')),
            'item': np.ma.column_stack(np.genfromtxt(open('item.csv', 'r'),
                                                     delimiter=','))
        }
        for i in xrange(0, 2):
            self.assertAlmostEqual(tf.factors['user'][i][0],
                                   fromFile['user'][i][0])
            self.assertAlmostEqual(tf.factors['user'][i][1],
                                   fromFile['user'][i][1])
        for i in xrange(0, 3):
            self.assertAlmostEqual(tf.factors['item'][i][0],
                                   fromFile['item'][i][0])
            self.assertAlmostEqual(tf.factors['item'][i][1],
                                   fromFile['item'][i][1])