Ejemplo n.º 1
0
def calculate_stats_users(pct_train):
    dat_file = 'user_data_working.csv'
    data = Data()
    data.load(dat_file,
              sep=',',
              format={
                  'col': 0,
                  'row': 1,
                  'value': 2,
                  'ids': int
              })
    train, test = data.split_train_test(percent=pct_train)
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=100,
                min_values=2,
                pre_normalize=None,
                mean_center=True,
                post_normalize=False)
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Ejemplo n.º 2
0
def ex1(dat_file='./ml-1m/ratings.dat',
        pct_train=0.5):

    data = Data()
    data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,'ids':int})
       

    # create train/test split
    train, test = data.split_train_test(percent=pct_train)

    # create svd
    K=100
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)

    # evaluate performance
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Ejemplo n.º 3
0
def evaluate(data, count=5, K=100):
    results = []

    for i in range(count):
        train, test = data.split_train_test(percent=PERCENT_TRAIN)
        print len(data.get()), len(train.get()), len(test.get())
        #test_in_train(test, train)
        #print train.get()
        svd = SVD()
        svd.set_data(train)
        svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)

        #Evaluation using prediction-based metrics
        rmse = RMSE()
        mae = MAE()
        for rating, item_id, user_id in test.get():
            try:
                pred_rating = svd.predict(item_id, user_id)
                rmse.add(rating, pred_rating)
                mae.add(rating, pred_rating)
            except KeyError:
                #print "keyerror: ===========================================================>"
                continue
        try:
            rsu = {}
            rsu["RMSE"] = rmse.compute()
            rsu["MAE"] = mae.compute()
            print rsu
            results.append(rsu)
        except:
            print "one error....++++++++++++++++++++++++++++++++++++++++++++++++++++"
        

    return results
Ejemplo n.º 4
0
def test_SVD(svd,train,test,pct_train):
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():      
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Ejemplo n.º 5
0
def test_SVD(svd, train, test, pct_train):
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Ejemplo n.º 6
0
 def eval_rmse(self):
     # Evaluation using prediction-based metrics
     rmse = RMSE()
     mae = MAE()
     for rating, item_id, user_id in self.test.get():
         try:
             pred_rating = self.svd.predict(item_id, user_id)
             rmse.add(rating, pred_rating)
             mae.add(rating, pred_rating)
         except KeyError:
             continue
     print 'RMSE=%s' % rmse.compute()
     print 'MAE=%s' % mae.compute()
Ejemplo n.º 7
0
def eval_reco(model, test):
    """ Compute RMSE and MAE on test set
    """

    #Evaluation using prediction-based metrics
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = model.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    return rmse, mae
Ejemplo n.º 8
0
def eval_reco(model, test):
    """ Compute RMSE and MAE on test set
    """

    #Evaluation using prediction-based metrics
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = model.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    return rmse, mae
Ejemplo n.º 9
0
def evaluate(_svd, _testData, verbose=False):
    global rmse, mae, rating, item_id, user_id, pred_rating
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in _testData.get():
        try:
            pred_rating = _svd.predict(item_id, user_id, MIN_VALUE=0, MAX_VALUE=10)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)

            if verbose:
                print item_id, user_id, rating, pred_rating
        except Exception as e:
            print 'ERROR occurred:', e.message

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Ejemplo n.º 10
0
def ex1(dat_file=DATA_DIR + 'ml-1m-ratings.dat', pct_train=0.5):

    data = Data()
    data.load(dat_file,
              sep='::',
              format={
                  'col': 0,
                  'row': 1,
                  'value': 2,
                  'ids': int
              })
    # About format parameter:
    #   'row': 1 -> Rows in matrix come from column 1 in ratings.dat file
    #   'col': 0 -> Cols in matrix come from column 0 in ratings.dat file
    #   'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat
    #   file
    #   'ids': int -> Ids (row and col ids) are integers (not strings)

    # create train/test split
    train, test = data.split_train_test(percent=pct_train)

    # create svd
    K = 100
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=K,
                min_values=5,
                pre_normalize=None,
                mean_center=True,
                post_normalize=True)

    # evaluate performance
    rmse = RMSE()
    # mae is mean ABSOLUTE error
    # ... in this case it will return 1.09 which means there is an error of almost 1 point out of 5
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Ejemplo n.º 11
0
def evaulte(train_set, test_set):
    svd = SVD()
    svd.set_data(train_set)
    svd.compute(k=KKK, min_values=MIN_ITEM, pre_normalize=None, mean_center=True, post_normalize=True)

    mae = MAE()
    k_err = 0
    for rating, item_id, user_id in test_set.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            mae.add(rating, pred_rating)
        except KeyError:
            #print "keyerror: ===========================================================>"
            k_err += 1
            continue
    
    print "k_err", k_err, " -- ", "test-len: ", len(test_set.get()), "train-len: ", len(train_set.get())
    result = mae.compute()/2.0
    return result
Ejemplo n.º 12
0
def calculate_stats_users(pct_train):
    dat_file = 'user_data_working.csv'
    data = Data()
    data.load(dat_file, sep=',', format={'col':0, 'row':1, 'value':2,'ids':int})
    train, test = data.split_train_test(percent=pct_train)               
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=100, min_values=2, pre_normalize=None, mean_center=True,
    post_normalize=False)
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():      
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Ejemplo n.º 13
0
def evaluate(clf, _testData, verbose = False):

    rmse = RMSE()
    mae = MAE()
    numErrors = 0

    for rating, item_id, user_id in _testData.get():
        try:
            pred_rating = clf.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)

            if verbose:
                print item_id, user_id, rating, pred_rating
        except KeyError as e:
            if verbose:
                print 'ERROR occurred:', e.message
            numErrors += 1

    print '\n%i/%i data points raised errors.' % (numErrors, len(_testData))
    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Ejemplo n.º 14
0
def evaulte(train_set, test_set):
    svd = SVD()
    svd.set_data(train_set)
    svd.compute(k=KKK,
                min_values=MIN_ITEM,
                pre_normalize=None,
                mean_center=True,
                post_normalize=True)

    mae = MAE()
    k_err = 0
    for rating, item_id, user_id in test_set.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            mae.add(rating, pred_rating)
        except KeyError:
            #print "keyerror: ===========================================================>"
            k_err += 1
            continue

    print "k_err", k_err, " -- ", "test-len: ", len(
        test_set.get()), "train-len: ", len(train_set.get())
    result = mae.compute() / 2.0
    return result
Ejemplo n.º 15
0
def ex1(dat_file='ml-1m/ratings.dat',
        pct_train=0.5):

    data = Data()
    data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,
    'ids':int})
        # About format parameter:
        #   'row': 1 -> Rows in matrix come from column 1 in ratings.dat file
        #   'col': 0 -> Cols in matrix come from column 0 in ratings.dat file
        #   'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat
        #   file
        #   'ids': int -> Ids (row and col ids) are integers (not strings)

    # create train/test split
    train, test = data.split_train_test(percent=pct_train)

    # create svd
    K = 100
    svd = SVD()
    svd.set_data(train)
    svd.compute(
        k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)

    # evaluate performance
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Ejemplo n.º 16
0
class TestPrediction(Test):
    def __init__(self):
        super(TestPrediction, self).__init__()
        # Prediction-based metrics: MAE, RMSE, Pearson
        self.mae = MAE(self.DATA_PRED)
        self.rmse = RMSE(self.DATA_PRED)

        self.R = 3        # Real Rating (ground truth)
        self.R_PRED = 2.1 # Predicted Rating

    # test_PRED MAE
    def test_PRED_MAE_compute_one(self):
        assert_equal(self.mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_one_empty_datasets(self):
        mae = MAE()
        assert_equal(mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_all(self):
        assert_equal(self.mae.compute(), 0.7)

    def test_PRED_MAE_nan(self):
        mae = MAE()
        mae.add(2.0, nan)
        assert_equal(mae.get_test(), [])
        assert_equal(mae.get_ground_truth(), [])

    def test_PRED_MAE_load(self):
        mae = MAE()
        mae.load(self.GT_DATA, self.TEST_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_load_test(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        assert_equal(len(mae.get_test()), len(self.TEST_DATA))
        assert_equal(len(mae.get_ground_truth()), 0)
        assert_raises(ValueError, mae.compute) #Raise: GT is empty!

    def test_PRED_MAE_load_test_and_ground_truth(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        mae.load_ground_truth(self.GT_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_add_entry(self):
        self.mae.add(1, 4) #1: GT rating, 4: Predicted rating
        assert_equal(len(self.mae.get_test()), len(self.DATA_PRED)+1)
        assert_equal(self.mae.compute(), 1.083333)

    def test_PRED_MAE_different_list_sizes(self):
        mae = MAE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        mae.load(GT, self.TEST_DATA)
        assert_raises(ValueError, mae.compute)

    # test_PRED RMSE
    def test_PRED_RMSE_compute_one(self):
        #Even though rmse has data, we only compute these two param values
        assert_equal(self.rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_one_empty_datasets(self):
        rmse = RMSE()
        assert_equal(rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_all(self):
        assert_equal(self.rmse.compute(), 0.891067)

    def test_PRED_RMSE_load_test(self):
        rmse = RMSE()
        self.TEST_DATA = [2.3, 0.9, 4.9, 0.9, 1.5]
        rmse.load_test(self.TEST_DATA)
        assert_equal(len(rmse.get_test()), len(self.TEST_DATA))

    def test_PRED_RMSE_add_entry(self):
        self.rmse.add(1,4)
        assert_equal(len(self.rmse.get_test()), len(self.DATA_PRED)+1)
        assert_equal(self.rmse.compute(), 1.470261)

    def test_PRED_RMSE_different_list_sizes(self):
        rmse = RMSE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        rmse.load(GT, self.TEST_DATA)
        assert_raises(ValueError, rmse.compute)

    def test_PRED_RMSE_numpy_array(self):
        rmse = RMSE()
        rmse.load(array(self.GT_DATA), array(self.TEST_DATA))
        assert(rmse.compute(), 0.891067)
Ejemplo n.º 17
0
#Dataset
PERCENT_TRAIN = int(sys.argv[2])
data = Data()
data.load(sys.argv[1], sep='::', format={'col':0, 'row':1, 'value':2, 'ids':int})
#Train & Test data
train, test = data.split_train_test(percent=PERCENT_TRAIN)

svdlibc = SVDLIBC('./ml-1m/ratings.dat')
svdlibc.to_sparse_matrix(sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int})
svdlibc.compute(k=100)
svd = svdlibc.export()
svd.save_model('/tmp/svd-model', options={'k': 100})
#svd.similar(ITEMID1) # results might be different than example 4. as there's no min_values=10 set here


#Evaluation using prediction-based metrics
print 'Evaluating...'
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
    try:
        pred_rating = svd.predict(item_id, user_id, 0.0, 5.0)
        rmse.add(rating, pred_rating)
        mae.add(rating, pred_rating)
    except KeyError:
        continue

print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
Ejemplo n.º 18
0
                     post_normalize=True)

    # Evaluate
    rmse_svd = RMSE()
    mae_svd = MAE()
    rmse_svd_neig = RMSE()
    mae_svd_neig = MAE()

    i = 1
    total = len(test.get())
    print 'Total Test ratings: %s' % total
    for rating, item_id, user_id in test:
        try:
            pred_rating_svd = svd.predict(item_id, user_id)
            rmse_svd.add(rating, pred_rating_svd)
            mae_svd.add(rating, pred_rating_svd)

            pred_rating_svd_neig = svd_neig.predict(item_id,
                                                    user_id)  #Koren & co.
            if pred_rating_svd_neig is not nan:
                rmse_svd_neig.add(rating, pred_rating_svd_neig)
                mae_svd_neig.add(rating, pred_rating_svd_neig)

            print "\rProcessed test rating %d" % i,
            sys.stdout.flush()

            i += 1
        except KeyError:
            continue

        rmse_svd_all.append(rmse_svd.compute())
Ejemplo n.º 19
0
 def test_PRED_MAE_nan(self):
     mae = MAE()
     mae.add(2.0, nan)
     assert_equal(mae.get_test(), [])
     assert_equal(mae.get_ground_truth(), [])
Ejemplo n.º 20
0
class TestPrediction(Test):
    def __init__(self):
        super(TestPrediction, self).__init__()
        # Prediction-based metrics: MAE, RMSE, Pearson
        self.mae = MAE(self.DATA_PRED)
        self.rmse = RMSE(self.DATA_PRED)

        self.R = 3  # Real Rating (ground truth)
        self.R_PRED = 2.1  # Predicted Rating

    # test_PRED MAE
    def test_PRED_MAE_compute_one(self):
        assert_equal(self.mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_one_empty_datasets(self):
        mae = MAE()
        assert_equal(mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_all(self):
        assert_equal(self.mae.compute(), 0.7)

    def test_PRED_MAE_nan(self):
        mae = MAE()
        mae.add(2.0, nan)
        assert_equal(mae.get_test(), [])
        assert_equal(mae.get_ground_truth(), [])

    def test_PRED_MAE_load(self):
        mae = MAE()
        mae.load(self.GT_DATA, self.TEST_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_load_test(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        assert_equal(len(mae.get_test()), len(self.TEST_DATA))
        assert_equal(len(mae.get_ground_truth()), 0)
        assert_raises(ValueError, mae.compute)  #Raise: GT is empty!

    def test_PRED_MAE_load_test_and_ground_truth(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        mae.load_ground_truth(self.GT_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_add_entry(self):
        self.mae.add(1, 4)  #1: GT rating, 4: Predicted rating
        assert_equal(len(self.mae.get_test()), len(self.DATA_PRED) + 1)
        assert_equal(self.mae.compute(), 1.083333)

    def test_PRED_MAE_different_list_sizes(self):
        mae = MAE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        mae.load(GT, self.TEST_DATA)
        assert_raises(ValueError, mae.compute)

    # test_PRED RMSE
    def test_PRED_RMSE_compute_one(self):
        #Even though rmse has data, we only compute these two param values
        assert_equal(self.rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_one_empty_datasets(self):
        rmse = RMSE()
        assert_equal(rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_all(self):
        assert_equal(self.rmse.compute(), 0.891067)

    def test_PRED_RMSE_load_test(self):
        rmse = RMSE()
        self.TEST_DATA = [2.3, 0.9, 4.9, 0.9, 1.5]
        rmse.load_test(self.TEST_DATA)
        assert_equal(len(rmse.get_test()), len(self.TEST_DATA))

    def test_PRED_RMSE_add_entry(self):
        self.rmse.add(1, 4)
        assert_equal(len(self.rmse.get_test()), len(self.DATA_PRED) + 1)
        assert_equal(self.rmse.compute(), 1.470261)

    def test_PRED_RMSE_different_list_sizes(self):
        rmse = RMSE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        rmse.load(GT, self.TEST_DATA)
        assert_raises(ValueError, rmse.compute)

    def test_PRED_RMSE_numpy_array(self):
        rmse = RMSE()
        rmse.load(array(self.GT_DATA), array(self.TEST_DATA))
        assert (rmse.compute(), 0.891067)
Ejemplo n.º 21
0
 def test_PRED_MAE_nan(self):
     mae = MAE()
     mae.add(2.0, nan)
     assert_equal(mae.get_test(), [])
     assert_equal(mae.get_ground_truth(), [])
Ejemplo n.º 22
0
#Load SVD from /tmp
svd2 = SVD(filename='/tmp/movielens') # Loading already computed SVD model

#Predict User rating for given user and movie:
USERID = 2   
ITEMID= 1 # Toy Story
rating1=svd2.predict(ITEMID, USERID, 0.0, 5.0)
print 'Predicted rating=%f'% rating1

flag=0
#Retrieve actual rating for given user and movie
for rating, item_id, user_id in data.get():
	if user_id == USERID and item_id == ITEMID:
		rat = rating
		#print 'Actual rating=%f' % rating
		flag=1
		break
		
if flag == 1:
	print 'Actual rating=%f'% rat
else :
	sys.exit("No actual rating available")

#Evaluating prediction
rmse = RMSE()
mae = MAE()
rmse.add(rating1, rat)
mae.add(rating1, rat)
print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
Ejemplo n.º 23
0
    svd_neig.compute(k=K, min_values=None, pre_normalize=None, mean_center=True, post_normalize=True)

    # Evaluate
    rmse_svd = RMSE()
    mae_svd = MAE()
    rmse_svd_neig = RMSE()
    mae_svd_neig = MAE()

    i = 1
    total = len(test.get())
    print "Total Test ratings: %s" % total
    for rating, item_id, user_id in test:
        try:
            pred_rating_svd = svd.predict(item_id, user_id)
            rmse_svd.add(rating, pred_rating_svd)
            mae_svd.add(rating, pred_rating_svd)

            pred_rating_svd_neig = svd_neig.predict(item_id, user_id)  # Koren & co.
            if pred_rating_svd_neig is not nan:
                rmse_svd_neig.add(rating, pred_rating_svd_neig)
                mae_svd_neig.add(rating, pred_rating_svd_neig)

            print "\rProcessed test rating %d" % i,
            sys.stdout.flush()

            i += 1
        except KeyError:
            continue

        rmse_svd_all.append(rmse_svd.compute())
        mae_svd_all.append(mae_svd.compute())
Ejemplo n.º 24
0
                  MAX_RATING)  # predicted rating value
print svd.get_matrix().value(ITEMID, USERID)  # real rating value

print ''
print 'GENERATING RECOMMENDATION'
print svd.recommend(USERID, n=5, only_unknowns=True, is_row=False)

#Evaluation using prediction-based metrics
rmse = RMSE()
mae = MAE()
spearman = SpearmanRho()
kendall = KendallTau()
#decision = PrecisionRecallF1()
for rating, item_id, user_id in test.get():
    try:
        pred_rating = svd.predict(item_id, user_id)
        rmse.add(rating, pred_rating)
        mae.add(rating, pred_rating)
        spearman.add(rating, pred_rating)
        kendall.add(rating, pred_rating)
    except KeyError:
        continue

print ''
print 'EVALUATION RESULT'
print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
print 'Spearman\'s rho=%s' % spearman.compute()
print 'Kendall-tau=%s' % kendall.compute()
#print decision.compute()
print ''