def setUp(self): self.tf = TensorCoFi(dim=2) self.df = pd.read_csv(resource_filename(testfm.__name__, 'data/movielenshead.dat'), sep="::", header=None, names=['user', 'item', 'rating', 'date', 'title']) self.df = self.df.head(n=100)
def test_score(self): tf = TensorCoFi(dim=2) inp = [{'user': 10, 'item': 100}, {'user': 10,'item': 110}, {'user': 12,'item': 120}] inp = pd.DataFrame(inp) tf.fit(inp) uid = tf._dmap['user'][10] iid = tf._dmap['item'][100] self.assertEquals(uid, 1) self.assertEquals(iid, 1) tf.factors['user'][0][0] = 0 tf.factors['user'][0][1] = 1 tf.factors['item'][0][0] = 1 tf.factors['item'][0][1] = 5 self.assertEqual(0*1+1*5, tf.getScore(10,100))
from testfm.evaluation.parameterTuning import ParameterTuning eval = Evaluator() # call this before loading the data to save memory (fork of process takes place) # prepare the data df = pd.read_csv( resource_filename(testfm.__name__, "data/movielenshead.dat"), sep="::", header=None, names=["user", "item", "rating", "date", "title"], ) print df.head() training, testing = testfm.split.holdoutByRandom(df, 0.9) print "Tuning the parameters." tr, validation = testfm.split.holdoutByRandom(training, 0.7) pt = ParameterTuning() pt.setMaxIterations(10) pt.setZvalue(80) tf_params = pt.getBestParams(TensorCoFi, tr, validation) print tf_params tf = TensorCoFi() tf.setParams(**tf_params) tf.fit(training) print tf.getName().ljust(50), print eval.evaluate_model(tf, testing, all_items=training.item.unique()) eval.close() # need this call to clean up the worker processes
class TestTensorCoFi(unittest.TestCase): def tearDown(self): import os if os.path.exists('user.csv'): os.remove('user.csv') if os.path.exists('item.csv'): os.remove('item.csv') def setUp(self): self.tf = TensorCoFi(dim=2) self.df = pd.read_csv(resource_filename(testfm.__name__, 'data/movielenshead.dat'), sep="::", header=None, names=['user', 'item', 'rating', 'date', 'title']) self.df = self.df.head(n=100) def test_array(self): arr, tmap = self.tf._dataframe_to_float_matrix(self.df) def test_fit(self): self.tf.fit(self.df) #item and user are row vectors self.assertEqual(len(self.df.user.unique()), self.tf.factors['user'].shape[0]) self.assertEqual(len(self.df.item.unique()), self.tf.factors['item'].shape[0]) self.assertEqual(self.tf.user_features[1], [1]) self.assertEqual(self.tf.item_features[122], [1]) def test_ids_returns(self): inp = [{'user': 10, 'item': 100}, {'user': 10, 'item': 110}, {'user': 12,'item': 120}] inp = pd.DataFrame(inp) self.tf.fit(inp) self.assertEquals(self.tf.user_column_names, ['user']) self.assertEquals(self.tf.item_column_names, ['item']) self.assertEquals(len(self.tf.user_features), 2) self.assertEquals(len(self.tf.item_features), 3) self.assertEquals(len(self.tf.factors['user']), 2) self.assertEquals(len(self.tf.factors['item']), 3) uid = self.tf._dmap['user'][10] iid = self.tf._dmap['item'][100] self.assertEquals(uid, 1) self.assertEquals(iid, 1) self.assertEquals(len(self.tf.factors['user'][uid]), 2) self.assertEquals(len(self.tf.factors['user'][uid]), self.tf._dim) self.assertEquals(len(self.tf.factors['item'][iid]), self.tf._dim) def test_score(self): tf = TensorCoFi(dim=2) inp = [{'user': 10, 'item': 100}, {'user': 10,'item': 110}, {'user': 12,'item': 120}] inp = pd.DataFrame(inp) tf.fit(inp) uid = tf._dmap['user'][10] iid = tf._dmap['item'][100] self.assertEquals(uid, 1) self.assertEquals(iid, 1) tf.factors['user'][0][0] = 0 tf.factors['user'][0][1] = 1 tf.factors['item'][0][0] = 1 tf.factors['item'][0][1] = 5 self.assertEqual(0*1+1*5, tf.getScore(10,100)) def test_floatmatrix_to_numpy(self): from jnius import autoclass FloatMatrix = autoclass('org.jblas.FloatMatrix') rand = FloatMatrix.rand(4,2) rand_np = self.tf._float_matrix2numpy(rand) self.assertEqual(rand.get(0,0), rand_np[0,0]) self.assertEqual(rand.get(1,0), rand_np[1,0]) self.assertEqual(rand.get(1,1), rand_np[1,1]) self.assertEqual((rand.rows, rand.columns), rand_np.shape) def test_score_tcff(self): tf = TensorCoFiByFile(dim=2) inp = [{'user': 10, 'item': 100}, {'user': 10,'item': 110}, {'user': 12,'item': 120}] inp = pd.DataFrame(inp) tf.fit(inp) uid = tf._dmap['user'][10] iid = tf._dmap['item'][100] self.assertEquals(uid, 1) self.assertEquals(iid, 1) tf.factors['user'][0][0] = 0 tf.factors['user'][0][1] = 1 tf.factors['item'][0][0] = 1 tf.factors['item'][0][1] = 5 self.assertEqual(0*1+1*5, tf.getScore(10, 100)) def test_result_by_file(self): def floatMatrixToCSV(fm, n): csv = '\n'.join((','.join((str(fm.get(row, column)) for column in xrange(0, fm.columns))) for row in xrange(0, fm.rows))) with open(n, 'w') as f: f.write(csv) tf = TensorCoFiByFile(dim=2) inp = [{'user': 10, 'item': 100}, {'user': 10,'item': 110}, {'user': 12,'item': 120}] inp = pd.DataFrame(inp) ten = tf._fit(inp) ##### floatMatrixToCSV(ten.getModel().get(0), 'user.csv') floatMatrixToCSV(ten.getModel().get(1), 'item.csv') fromFile = { 'user': np.ma.column_stack(np.genfromtxt(open('user.csv', 'r'), delimiter=',')), 'item': np.ma.column_stack(np.genfromtxt(open('item.csv', 'r'), delimiter=',')) } for i in xrange(0, 2): self.assertAlmostEqual(tf.factors['user'][i][0], fromFile['user'][i][0]) self.assertAlmostEqual(tf.factors['user'][i][1], fromFile['user'][i][1]) for i in xrange(0, 3): self.assertAlmostEqual(tf.factors['item'][i][0], fromFile['item'][i][0]) self.assertAlmostEqual(tf.factors['item'][i][1], fromFile['item'][i][1])