Ejemplo n.º 1
0
def evaluate(data, count=5, K=100):
    results = []

    for i in range(count):
        train, test = data.split_train_test(percent=PERCENT_TRAIN)
        print len(data.get()), len(train.get()), len(test.get())
        #test_in_train(test, train)
        #print train.get()
        svd = SVD()
        svd.set_data(train)
        svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)

        #Evaluation using prediction-based metrics
        rmse = RMSE()
        mae = MAE()
        for rating, item_id, user_id in test.get():
            try:
                pred_rating = svd.predict(item_id, user_id)
                rmse.add(rating, pred_rating)
                mae.add(rating, pred_rating)
            except KeyError:
                #print "keyerror: ===========================================================>"
                continue
        try:
            rsu = {}
            rsu["RMSE"] = rmse.compute()
            rsu["MAE"] = mae.compute()
            print rsu
            results.append(rsu)
        except:
            print "one error....++++++++++++++++++++++++++++++++++++++++++++++++++++"
        

    return results
Ejemplo n.º 2
0
def calculate_stats_users(pct_train):
    dat_file = 'user_data_working.csv'
    data = Data()
    data.load(dat_file,
              sep=',',
              format={
                  'col': 0,
                  'row': 1,
                  'value': 2,
                  'ids': int
              })
    train, test = data.split_train_test(percent=pct_train)
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=100,
                min_values=2,
                pre_normalize=None,
                mean_center=True,
                post_normalize=False)
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Ejemplo n.º 3
0
def ex1(dat_file='./ml-1m/ratings.dat',
        pct_train=0.5):

    data = Data()
    data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,'ids':int})
       

    # create train/test split
    train, test = data.split_train_test(percent=pct_train)

    # create svd
    K=100
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)

    # evaluate performance
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Ejemplo n.º 4
0
    def __init__(self):
        super(TestPrediction, self).__init__()
        # Prediction-based metrics: MAE, RMSE, Pearson
        self.mae = MAE(self.DATA_PRED)
        self.rmse = RMSE(self.DATA_PRED)

        self.R = 3  # Real Rating (ground truth)
        self.R_PRED = 2.1  # Predicted Rating
Ejemplo n.º 5
0
def test_SVD(svd, train, test, pct_train):
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Ejemplo n.º 6
0
def test_SVD(svd,train,test,pct_train):
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():      
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Ejemplo n.º 7
0
 def eval_rmse(self):
     # Evaluation using prediction-based metrics
     rmse = RMSE()
     mae = MAE()
     for rating, item_id, user_id in self.test.get():
         try:
             pred_rating = self.svd.predict(item_id, user_id)
             rmse.add(rating, pred_rating)
             mae.add(rating, pred_rating)
         except KeyError:
             continue
     print 'RMSE=%s' % rmse.compute()
     print 'MAE=%s' % mae.compute()
Ejemplo n.º 8
0
 def recsys_evaluate_matrices(self,
                              original_matrix,
                              imputed_matrix,
                              evaluator=MAE()):
     total_error = 0
     total_rows = 0
     errors = ()
     for row_i, row in enumerate(original_matrix):
         # For each row build its list of non-zero values
         # Build a corresponding list of values for the imputed matrix
         row_values = []
         imputed_values = []
         for col_i, col in enumerate(row):
             if row[col_i] > 0:
                 row_values.append(col)
                 imputed_values.append(imputed_matrix[row_i][col_i])
         if len(row_values) == 0 or len(imputed_values) == 0:
             continue
         evaluator.load_ground_truth(row_values)
         evaluator.load_test(imputed_values)
         row_error = evaluator.compute()
         errors = errors + (row_error, )
         total_error += row_error
         total_rows += 1
     mean_total_error = 0.0
     if total_rows > 0.0:
         mean_total_error = total_error / total_rows
     return errors, mean_total_error
Ejemplo n.º 9
0
 def yeung_factor_matrix(self,
                         matrix=[],
                         steps=5000,
                         factors=10,
                         evaluator=MAE(),
                         verbose=True):
     if not matrix:
         matrix = self.load_lists_ui_matrix()
     R = numpy.array(matrix)
     N = len(R)
     M = len(R[0])
     K = factors
     P = numpy.random.rand(N, K)
     Q = numpy.random.rand(M, K)
     nP, nQ, e = self.yeung_matrix_factorization(R,
                                                 P,
                                                 Q,
                                                 K,
                                                 steps,
                                                 verbose=verbose)
     if verbose: print "Final error: {}".format(e)
     self.save_json_file(self.factors_matrix.format(K, steps), nP.tolist())
     self.save_json_file(self.weights_matrix.format(K, steps), nQ.tolist())
     nR = numpy.dot(nP, nQ.T)
     if verbose: print "Saving to JSON file..."
     self.save_json_file(self.reconstructed_matrix.format(K, steps),
                         nR.tolist())
     if verbose:
         print "Evaluation using {}...".format(evaluator.__class__.__name__)
     errors, mean_total_error = self.recsys_evaluate_matrices(
         R, nR, evaluator)
     if verbose: print "Mean total error: {}".format(mean_total_error)
     return nR, nP, nQ, errors, mean_total_error
Ejemplo n.º 10
0
def eval_reco(model, test):
    """ Compute RMSE and MAE on test set
    """

    #Evaluation using prediction-based metrics
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = model.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    return rmse, mae
Ejemplo n.º 11
0
def eval_reco(model, test):
    """ Compute RMSE and MAE on test set
    """

    #Evaluation using prediction-based metrics
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = model.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    return rmse, mae
Ejemplo n.º 12
0
def evaluate(_svd, _testData, verbose=False):
    global rmse, mae, rating, item_id, user_id, pred_rating
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in _testData.get():
        try:
            pred_rating = _svd.predict(item_id, user_id, MIN_VALUE=0, MAX_VALUE=10)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)

            if verbose:
                print item_id, user_id, rating, pred_rating
        except Exception as e:
            print 'ERROR occurred:', e.message

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Ejemplo n.º 13
0
def ex1(dat_file=DATA_DIR + 'ml-1m-ratings.dat', pct_train=0.5):

    data = Data()
    data.load(dat_file,
              sep='::',
              format={
                  'col': 0,
                  'row': 1,
                  'value': 2,
                  'ids': int
              })
    # About format parameter:
    #   'row': 1 -> Rows in matrix come from column 1 in ratings.dat file
    #   'col': 0 -> Cols in matrix come from column 0 in ratings.dat file
    #   'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat
    #   file
    #   'ids': int -> Ids (row and col ids) are integers (not strings)

    # create train/test split
    train, test = data.split_train_test(percent=pct_train)

    # create svd
    K = 100
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=K,
                min_values=5,
                pre_normalize=None,
                mean_center=True,
                post_normalize=True)

    # evaluate performance
    rmse = RMSE()
    # mae is mean ABSOLUTE error
    # ... in this case it will return 1.09 which means there is an error of almost 1 point out of 5
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Ejemplo n.º 14
0
    def __init__(self):
        super(TestPrediction, self).__init__()
        # Prediction-based metrics: MAE, RMSE, Pearson
        self.mae = MAE(self.DATA_PRED)
        self.rmse = RMSE(self.DATA_PRED)

        self.R = 3        # Real Rating (ground truth)
        self.R_PRED = 2.1 # Predicted Rating
Ejemplo n.º 15
0
def get_mae_rmse(step):

    data = Data()

    format = {'col': 1, 'row': 0, 'value': 2, 'ids': 'str'}

    filename = 'second_train_test.dat.{step}'.format(step=step)

    data.load(filename, sep='::', format=format)

    train, test = data.split_train_test(percent=80)

    try:

        svd = SVD('svdn_model_{step}.zip'.format(step=step))
        print('Loading model... {step}'.format(step=step))

    except:

        return

    mae_predicted, rmse_predicted = [], []
    for rating, item_id, user_id in test:
        try:

            predicted = svd.predict(item_id, user_id)

            mae_predicted.append((rating, predicted))
            rmse_predicted.append((rating, predicted))

        except:

            pass

    mae_value, rmse_value = np.nan, np.nan

    if len(mae_predicted) > 0:
        mae = MAE(mae_predicted)
        mae_value = mae.compute()

    if len(rmse_predicted) > 0:
        rmse = RMSE(rmse_predicted)
        rmse_value = rmse.compute()

    return mae_value, rmse_value
Ejemplo n.º 16
0
def evaulte(train_set, test_set):
    svd = SVD()
    svd.set_data(train_set)
    svd.compute(k=KKK, min_values=MIN_ITEM, pre_normalize=None, mean_center=True, post_normalize=True)

    mae = MAE()
    k_err = 0
    for rating, item_id, user_id in test_set.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            mae.add(rating, pred_rating)
        except KeyError:
            #print "keyerror: ===========================================================>"
            k_err += 1
            continue
    
    print "k_err", k_err, " -- ", "test-len: ", len(test_set.get()), "train-len: ", len(train_set.get())
    result = mae.compute()/2.0
    return result
Ejemplo n.º 17
0
def test_random(data):

    mae_predicted, rmse_predicted = [], []
    for rating in data:
        random_predicted = float(random_score(review_percentages))
        mae_predicted.append((rating, random_predicted))
        rmse_predicted.append((rating, random_predicted))

    mae_value, rmse_value = np.nan, np.nan

    if len(mae_predicted) > 0:
        mae = MAE(mae_predicted)
        mae_value = mae.compute()

    if len(rmse_predicted) > 0:
        rmse = RMSE(rmse_predicted)
        rmse_value = rmse.compute()

    return mae_value, rmse_value
Ejemplo n.º 18
0
def calculate_stats_users(pct_train):
    dat_file = 'user_data_working.csv'
    data = Data()
    data.load(dat_file, sep=',', format={'col':0, 'row':1, 'value':2,'ids':int})
    train, test = data.split_train_test(percent=pct_train)               
    svd = SVD()
    svd.set_data(train)
    svd.compute(k=100, min_values=2, pre_normalize=None, mean_center=True,
    post_normalize=False)
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():      
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s\n' % mae.compute()
Ejemplo n.º 19
0
def evaluate(clf, _testData, verbose = False):

    rmse = RMSE()
    mae = MAE()
    numErrors = 0

    for rating, item_id, user_id in _testData.get():
        try:
            pred_rating = clf.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)

            if verbose:
                print item_id, user_id, rating, pred_rating
        except KeyError as e:
            if verbose:
                print 'ERROR occurred:', e.message
            numErrors += 1

    print '\n%i/%i data points raised errors.' % (numErrors, len(_testData))
    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Ejemplo n.º 20
0
def evaulte(train_set, test_set):
    svd = SVD()
    svd.set_data(train_set)
    svd.compute(k=KKK,
                min_values=MIN_ITEM,
                pre_normalize=None,
                mean_center=True,
                post_normalize=True)

    mae = MAE()
    k_err = 0
    for rating, item_id, user_id in test_set.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            mae.add(rating, pred_rating)
        except KeyError:
            #print "keyerror: ===========================================================>"
            k_err += 1
            continue

    print "k_err", k_err, " -- ", "test-len: ", len(
        test_set.get()), "train-len: ", len(train_set.get())
    result = mae.compute() / 2.0
    return result
Ejemplo n.º 21
0
def ex1(dat_file='ml-1m/ratings.dat',
        pct_train=0.5):

    data = Data()
    data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,
    'ids':int})
        # About format parameter:
        #   'row': 1 -> Rows in matrix come from column 1 in ratings.dat file
        #   'col': 0 -> Cols in matrix come from column 0 in ratings.dat file
        #   'value': 2 -> Values (Mij) in matrix come from column 2 in ratings.dat
        #   file
        #   'ids': int -> Ids (row and col ids) are integers (not strings)

    # create train/test split
    train, test = data.split_train_test(percent=pct_train)

    # create svd
    K = 100
    svd = SVD()
    svd.set_data(train)
    svd.compute(
        k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)

    # evaluate performance
    rmse = RMSE()
    mae = MAE()
    for rating, item_id, user_id in test.get():
        try:
            pred_rating = svd.predict(item_id, user_id)
            rmse.add(rating, pred_rating)
            mae.add(rating, pred_rating)
        except KeyError:
            continue

    print 'RMSE=%s' % rmse.compute()
    print 'MAE=%s' % mae.compute()
Ejemplo n.º 22
0
def mean_absolute_error(train_values, predicted_values):

    if len(train_values) != len(predicted_values):
        sys.stderr.write("mean_absolute_error: Invalid list lengths")
        exit(1)

    mae = MAE()
    mae.load_ground_truth(train_values)
    mae.load_test(predicted_values)
    return mae.compute()
Ejemplo n.º 23
0
#Dataset
PERCENT_TRAIN = int(sys.argv[2])
data = Data()
data.load(sys.argv[1], sep='::', format={'col':0, 'row':1, 'value':2, 'ids':int})
#Train & Test data
train, test = data.split_train_test(percent=PERCENT_TRAIN)

svdlibc = SVDLIBC('./ml-1m/ratings.dat')
svdlibc.to_sparse_matrix(sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int})
svdlibc.compute(k=100)
svd = svdlibc.export()
svd.save_model('/tmp/svd-model', options={'k': 100})
#svd.similar(ITEMID1) # results might be different than example 4. as there's no min_values=10 set here


#Evaluation using prediction-based metrics
print 'Evaluating...'
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
    try:
        pred_rating = svd.predict(item_id, user_id, 0.0, 5.0)
        rmse.add(rating, pred_rating)
        mae.add(rating, pred_rating)
    except KeyError:
        continue

print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
    '''
    user_item_recs = user_item_pairs.map(
        lambda p: topNRecommendations(p[0],p[1],isb.value,500)).collect()

    '''
    Read in test data and calculate MAE
    '''

    test_ratings = defaultdict(list)

    # read in the test data
    f = open("tests/data/cftest.txt", 'rt')
    reader = csv.reader(f, delimiter='|')
    for row in reader:
        user = row[0]
        item = row[1]
        rating = row[2]
        test_ratings[user] += [(item,rating)]

    # create train-test rating tuples
    preds = []
    for (user,items_with_rating) in user_item_recs:
        for (rating,item) in items_with_rating:
            for (test_item,test_rating) in test_ratings[user]:                
                if str(test_item) == str(item):
                    preds.append((rating,float(test_rating)))

    mae = MAE(preds)
    result = mae.compute()
    print "Mean Absolute Error: ",result
Ejemplo n.º 25
0
 def test_PRED_MAE_different_list_sizes(self):
     mae = MAE()
     GT = [3, 1, 5, 2]
     # GT list has one element less than self.TEST_DATA
     mae.load(GT, self.TEST_DATA)
     assert_raises(ValueError, mae.compute)
Ejemplo n.º 26
0
 def test_PRED_MAE_load_test_and_ground_truth(self):
     mae = MAE()
     mae.load_test(self.TEST_DATA)
     mae.load_ground_truth(self.GT_DATA)
     assert_equal(mae.compute(), 0.7)
Ejemplo n.º 27
0
 def test_PRED_MAE_load_test(self):
     mae = MAE()
     mae.load_test(self.TEST_DATA)
     assert_equal(len(mae.get_test()), len(self.TEST_DATA))
     assert_equal(len(mae.get_ground_truth()), 0)
     assert_raises(ValueError, mae.compute)  #Raise: GT is empty!
Ejemplo n.º 28
0
 def test_PRED_MAE_load(self):
     mae = MAE()
     mae.load(self.GT_DATA, self.TEST_DATA)
     assert_equal(mae.compute(), 0.7)
Ejemplo n.º 29
0
    #Compute SVD
    svd.compute(k=K,
                min_values=None,
                pre_normalize=None,
                mean_center=True,
                post_normalize=True)
    svd_neig.compute(k=K,
                     min_values=None,
                     pre_normalize=None,
                     mean_center=True,
                     post_normalize=True)

    # Evaluate
    rmse_svd = RMSE()
    mae_svd = MAE()
    rmse_svd_neig = RMSE()
    mae_svd_neig = MAE()

    i = 1
    total = len(test.get())
    print 'Total Test ratings: %s' % total
    for rating, item_id, user_id in test:
        try:
            pred_rating_svd = svd.predict(item_id, user_id)
            rmse_svd.add(rating, pred_rating_svd)
            mae_svd.add(rating, pred_rating_svd)

            pred_rating_svd_neig = svd_neig.predict(item_id,
                                                    user_id)  #Koren & co.
            if pred_rating_svd_neig is not nan:
Ejemplo n.º 30
0
 def test_PRED_MAE_nan(self):
     mae = MAE()
     mae.add(2.0, nan)
     assert_equal(mae.get_test(), [])
     assert_equal(mae.get_ground_truth(), [])
Ejemplo n.º 31
0
 def evaluate_matrices_mae(self, original_matrix, imputed_matrix):
     return self.evaluate_matrices(original_matrix,
                                   imputed_matrix,
                                   evaluator=MAE())
Ejemplo n.º 32
0
from recsys.evaluation.prediction import RMSE, MAE
from recsys.datamodel.data import Data

from baseline import Baseline #Import the test class we've just created

#Dataset
PERCENT_TRAIN = int(sys.argv[2])
data = Data()
data.load(sys.argv[1], sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int})
#Train & Test data
train, test = data.split_train_test(percent=PERCENT_TRAIN)

baseline = Baseline()
baseline.set_data(train)
baseline.compute() # In this case, it does nothing

# Evaluate
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
    try:
        pred_rating = baseline.predict(item_id, user_id, user_is_row=False)
        rmse.add(rating, pred_rating)
        mae.add(rating, pred_rating)
    except KeyError:
        continue

print 'RMSE=%s' % rmse.compute() # in my case (~80% train, ~20% test set) returns RMSE = 1.036374
print 'MAE=%s' % mae.compute()   # in my case (~80% train, ~20% test set) returns  MAE = 0.829024
Ejemplo n.º 33
0
 def test_PRED_MAE_different_list_sizes(self):
     mae = MAE()
     GT = [3, 1, 5, 2]
     # GT list has one element less than self.TEST_DATA
     mae.load(GT, self.TEST_DATA)
     assert_raises(ValueError, mae.compute)
Ejemplo n.º 34
0
 def test_PRED_MAE_load_test(self):
     mae = MAE()
     mae.load_test(self.TEST_DATA)
     assert_equal(len(mae.get_test()), len(self.TEST_DATA))
     assert_equal(len(mae.get_ground_truth()), 0)
     assert_raises(ValueError, mae.compute) #Raise: GT is empty!
Ejemplo n.º 35
0
#Load SVD from /tmp
svd2 = SVD(filename='/tmp/movielens') # Loading already computed SVD model

#Predict User rating for given user and movie:
USERID = 2   
ITEMID= 1 # Toy Story
rating1=svd2.predict(ITEMID, USERID, 0.0, 5.0)
print 'Predicted rating=%f'% rating1

flag=0
#Retrieve actual rating for given user and movie
for rating, item_id, user_id in data.get():
	if user_id == USERID and item_id == ITEMID:
		rat = rating
		#print 'Actual rating=%f' % rating
		flag=1
		break
		
if flag == 1:
	print 'Actual rating=%f'% rat
else :
	sys.exit("No actual rating available")

#Evaluating prediction
rmse = RMSE()
mae = MAE()
rmse.add(rating1, rat)
mae.add(rating1, rat)
print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
Ejemplo n.º 36
0
class TestPrediction(Test):
    def __init__(self):
        super(TestPrediction, self).__init__()
        # Prediction-based metrics: MAE, RMSE, Pearson
        self.mae = MAE(self.DATA_PRED)
        self.rmse = RMSE(self.DATA_PRED)

        self.R = 3  # Real Rating (ground truth)
        self.R_PRED = 2.1  # Predicted Rating

    # test_PRED MAE
    def test_PRED_MAE_compute_one(self):
        assert_equal(self.mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_one_empty_datasets(self):
        mae = MAE()
        assert_equal(mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_all(self):
        assert_equal(self.mae.compute(), 0.7)

    def test_PRED_MAE_nan(self):
        mae = MAE()
        mae.add(2.0, nan)
        assert_equal(mae.get_test(), [])
        assert_equal(mae.get_ground_truth(), [])

    def test_PRED_MAE_load(self):
        mae = MAE()
        mae.load(self.GT_DATA, self.TEST_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_load_test(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        assert_equal(len(mae.get_test()), len(self.TEST_DATA))
        assert_equal(len(mae.get_ground_truth()), 0)
        assert_raises(ValueError, mae.compute)  #Raise: GT is empty!

    def test_PRED_MAE_load_test_and_ground_truth(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        mae.load_ground_truth(self.GT_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_add_entry(self):
        self.mae.add(1, 4)  #1: GT rating, 4: Predicted rating
        assert_equal(len(self.mae.get_test()), len(self.DATA_PRED) + 1)
        assert_equal(self.mae.compute(), 1.083333)

    def test_PRED_MAE_different_list_sizes(self):
        mae = MAE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        mae.load(GT, self.TEST_DATA)
        assert_raises(ValueError, mae.compute)

    # test_PRED RMSE
    def test_PRED_RMSE_compute_one(self):
        #Even though rmse has data, we only compute these two param values
        assert_equal(self.rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_one_empty_datasets(self):
        rmse = RMSE()
        assert_equal(rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_all(self):
        assert_equal(self.rmse.compute(), 0.891067)

    def test_PRED_RMSE_load_test(self):
        rmse = RMSE()
        self.TEST_DATA = [2.3, 0.9, 4.9, 0.9, 1.5]
        rmse.load_test(self.TEST_DATA)
        assert_equal(len(rmse.get_test()), len(self.TEST_DATA))

    def test_PRED_RMSE_add_entry(self):
        self.rmse.add(1, 4)
        assert_equal(len(self.rmse.get_test()), len(self.DATA_PRED) + 1)
        assert_equal(self.rmse.compute(), 1.470261)

    def test_PRED_RMSE_different_list_sizes(self):
        rmse = RMSE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        rmse.load(GT, self.TEST_DATA)
        assert_raises(ValueError, rmse.compute)

    def test_PRED_RMSE_numpy_array(self):
        rmse = RMSE()
        rmse.load(array(self.GT_DATA), array(self.TEST_DATA))
        assert (rmse.compute(), 0.891067)
Ejemplo n.º 37
0
print 'GENERATING PREDICTION'
MIN_RATING = 0.0
MAX_RATING = 5.0
ITEMID = 1
USERID = 1
print svd.predict(ITEMID, USERID, MIN_RATING,
                  MAX_RATING)  # predicted rating value
print svd.get_matrix().value(ITEMID, USERID)  # real rating value

print ''
print 'GENERATING RECOMMENDATION'
print svd.recommend(USERID, n=5, only_unknowns=True, is_row=False)

#Evaluation using prediction-based metrics
rmse = RMSE()
mae = MAE()
spearman = SpearmanRho()
kendall = KendallTau()
#decision = PrecisionRecallF1()
for rating, item_id, user_id in test.get():
    try:
        pred_rating = svd.predict(item_id, user_id)
        rmse.add(rating, pred_rating)
        mae.add(rating, pred_rating)
        spearman.add(rating, pred_rating)
        kendall.add(rating, pred_rating)
    except KeyError:
        continue

print ''
print 'EVALUATION RESULT'
Ejemplo n.º 38
0
class TestPrediction(Test):
    def __init__(self):
        super(TestPrediction, self).__init__()
        # Prediction-based metrics: MAE, RMSE, Pearson
        self.mae = MAE(self.DATA_PRED)
        self.rmse = RMSE(self.DATA_PRED)

        self.R = 3        # Real Rating (ground truth)
        self.R_PRED = 2.1 # Predicted Rating

    # test_PRED MAE
    def test_PRED_MAE_compute_one(self):
        assert_equal(self.mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_one_empty_datasets(self):
        mae = MAE()
        assert_equal(mae.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_MAE_compute_all(self):
        assert_equal(self.mae.compute(), 0.7)

    def test_PRED_MAE_nan(self):
        mae = MAE()
        mae.add(2.0, nan)
        assert_equal(mae.get_test(), [])
        assert_equal(mae.get_ground_truth(), [])

    def test_PRED_MAE_load(self):
        mae = MAE()
        mae.load(self.GT_DATA, self.TEST_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_load_test(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        assert_equal(len(mae.get_test()), len(self.TEST_DATA))
        assert_equal(len(mae.get_ground_truth()), 0)
        assert_raises(ValueError, mae.compute) #Raise: GT is empty!

    def test_PRED_MAE_load_test_and_ground_truth(self):
        mae = MAE()
        mae.load_test(self.TEST_DATA)
        mae.load_ground_truth(self.GT_DATA)
        assert_equal(mae.compute(), 0.7)

    def test_PRED_MAE_add_entry(self):
        self.mae.add(1, 4) #1: GT rating, 4: Predicted rating
        assert_equal(len(self.mae.get_test()), len(self.DATA_PRED)+1)
        assert_equal(self.mae.compute(), 1.083333)

    def test_PRED_MAE_different_list_sizes(self):
        mae = MAE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        mae.load(GT, self.TEST_DATA)
        assert_raises(ValueError, mae.compute)

    # test_PRED RMSE
    def test_PRED_RMSE_compute_one(self):
        #Even though rmse has data, we only compute these two param values
        assert_equal(self.rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_one_empty_datasets(self):
        rmse = RMSE()
        assert_equal(rmse.compute(self.R, self.R_PRED), 0.9)

    def test_PRED_RMSE_compute_all(self):
        assert_equal(self.rmse.compute(), 0.891067)

    def test_PRED_RMSE_load_test(self):
        rmse = RMSE()
        self.TEST_DATA = [2.3, 0.9, 4.9, 0.9, 1.5]
        rmse.load_test(self.TEST_DATA)
        assert_equal(len(rmse.get_test()), len(self.TEST_DATA))

    def test_PRED_RMSE_add_entry(self):
        self.rmse.add(1,4)
        assert_equal(len(self.rmse.get_test()), len(self.DATA_PRED)+1)
        assert_equal(self.rmse.compute(), 1.470261)

    def test_PRED_RMSE_different_list_sizes(self):
        rmse = RMSE()
        GT = [3, 1, 5, 2]
        # GT list has one element less than self.TEST_DATA
        rmse.load(GT, self.TEST_DATA)
        assert_raises(ValueError, rmse.compute)

    def test_PRED_RMSE_numpy_array(self):
        rmse = RMSE()
        rmse.load(array(self.GT_DATA), array(self.TEST_DATA))
        assert(rmse.compute(), 0.891067)
Ejemplo n.º 39
0
 def test_PRED_MAE_compute_one_empty_datasets(self):
     mae = MAE()
     assert_equal(mae.compute(self.R, self.R_PRED), 0.9)
Ejemplo n.º 40
0
 def test_PRED_MAE_compute_one_empty_datasets(self):
     mae = MAE()
     assert_equal(mae.compute(self.R, self.R_PRED), 0.9)
Ejemplo n.º 41
0
 def test_PRED_MAE_nan(self):
     mae = MAE()
     mae.add(2.0, nan)
     assert_equal(mae.get_test(), [])
     assert_equal(mae.get_ground_truth(), [])
Ejemplo n.º 42
0
 def test_PRED_MAE_load(self):
     mae = MAE()
     mae.load(self.GT_DATA, self.TEST_DATA)
     assert_equal(mae.compute(), 0.7)
Ejemplo n.º 43
0
                        artist_id = db.get_label(index=vec)
                        index = baseline._matrix.get().row_index(str(artist_id['artist_id']).encode('utf-8'))
                        artist_count = baseline._matrix.get_value(str(artist_id['artist_id']).encode('utf-8'), str(user["_id"]).encode('utf-8'))
                        counts[index] = artist_count

                    pred_items = baseline.recommend(user["_id"], n=10, only_unknowns=False, is_row=False, v_vectors=v_vectors, sparse_matrix_vector=s_matrix_vector[0]['array'])
                    for item_id, relevance in pred_items:
                        index = baseline._matrix.get().row_index(str(item_id).encode('utf-8'))
                        if index in counts:
                            GT_DECISION.append(int(counts[index]))
                            TEST_DECISION.append(relevance)

                    if len(GT_DECISION) == 0 and len(TEST_DECISION) == 0:
                        mae = 1.0
                    else:
                        eval = MAE()
                        eval.load(GT_DECISION, TEST_DECISION)
                        mae = eval.compute()
                        if mae > 1:
                            x = 1

                        total_mae += float(mae)
                        mae_count += 1

                    print "UserID: %s,      Count: %s,      MAE: %s" % (str(user["_id"]).encode('utf-8'), user['artist_distinct_count'], mae)
                    myFile.write("UserID: " + str(user["_id"]).encode('utf-8') + ", " + "Count: " + str(user['artist_distinct_count']) + ", " + "MAE: " + str(mae) + "\n")

                    count += 1
                    if count % 10 == 0:
                        output_mae = float(total_mae) / float(mae_count)
                        print "Interval: %s,     Avg MAE: %s" % (start, output_mae)
Ejemplo n.º 44
0
 def test_PRED_MAE_load_test_and_ground_truth(self):
     mae = MAE()
     mae.load_test(self.TEST_DATA)
     mae.load_ground_truth(self.GT_DATA)
     assert_equal(mae.compute(), 0.7)
Ejemplo n.º 45
0
    isb = sc.broadcast(item_sim_dict)
    '''
    Calculate the top-N item recommendations for each user
    user_id -> [item1,item2,item3,...]
    '''
    user_item_recs = user_item_pairs.map(
        lambda p: topNRecommendations(p[0], p[1], isb.value, 500)).collect()
    ''' Read in test data and calculate MAE
    47
    '''

    test_ratings = defaultdict(list)

    # read in the test data f = open("tests/data/cftest.txt", ’rt’) reader = csv.reader(f, delimiter=’|’) for row in reader:

    user = row[0]
    item = row[1]
    rating = row[2]
    test_ratings[user] += [(item, rating)]

    # create train-test rating tuples preds = [] for (user,items_with_rating) in user_item_recs:

    for (rating, item) in items_with_rating:
        for (test_item, test_rating) in test_ratings[user]:
            if str(test_item) == str(item):
                preds.append((rating, float(test_rating)))

    mae = MAE(preds)
    result = mae.compute()
    print "Mean Absolute Error: ", result
Ejemplo n.º 46
0
RUNS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
for run in RUNS:
    print "RUN(%d)" % run
    # Train & Test data
    train, test = data.split_train_test(percent=PERCENT_TRAIN)

    svd.set_data(train)
    svd_neig.set_data(train)

    # Compute SVD
    svd.compute(k=K, min_values=None, pre_normalize=None, mean_center=True, post_normalize=True)
    svd_neig.compute(k=K, min_values=None, pre_normalize=None, mean_center=True, post_normalize=True)

    # Evaluate
    rmse_svd = RMSE()
    mae_svd = MAE()
    rmse_svd_neig = RMSE()
    mae_svd_neig = MAE()

    i = 1
    total = len(test.get())
    print "Total Test ratings: %s" % total
    for rating, item_id, user_id in test:
        try:
            pred_rating_svd = svd.predict(item_id, user_id)
            rmse_svd.add(rating, pred_rating_svd)
            mae_svd.add(rating, pred_rating_svd)

            pred_rating_svd_neig = svd_neig.predict(item_id, user_id)  # Koren & co.
            if pred_rating_svd_neig is not nan:
                rmse_svd_neig.add(rating, pred_rating_svd_neig)
Ejemplo n.º 47
0
data.load(sys.argv[1],
          sep='::',
          format={
              'col': 0,
              'row': 1,
              'value': 2,
              'ids': int
          })
#Train & Test data
train, test = data.split_train_test(percent=PERCENT_TRAIN)

baseline = Baseline()
baseline.set_data(train)
baseline.compute()  # In this case, it does nothing

# Evaluate
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
    try:
        pred_rating = baseline.predict(item_id, user_id, user_is_row=False)
        rmse.add(rating, pred_rating)
        mae.add(rating, pred_rating)
    except KeyError:
        continue

print 'RMSE=%s' % rmse.compute(
)  # in my case (~80% train, ~20% test set) returns RMSE = 1.036374
print 'MAE=%s' % mae.compute(
)  # in my case (~80% train, ~20% test set) returns  MAE = 0.829024