Beispiel #1
0
 def read_data(self):
     reader = dataset.Reader(line_format="user item rating", sep=',', rating_scale=(0,1), skip_lines=0)
     self.datasets = [dataset.Dataset.load_from_file(self.surprise_file_path, reader=reader) for _ in range(self.no_of_folds)]
     
     ratings = self.datasets[0].raw_ratings
     ratings_exclude_size = len(ratings)//self.no_of_folds
     for idx, dataset in enumerate(self.datasets):
         dataset.raw_ratings = [ele for idx, ele in enumerate(dataset.raw_ratings) if i not in range(idx*ratings_exclude_size, (idx+1)*ratings_exclude_size)]
Beispiel #2
0
 def read_data(self):
     reader = dataset.Reader(line_format="user item rating",
                             sep=',',
                             rating_scale=(0, 1),
                             skip_lines=0)
     self.data = dataset.Dataset.load_from_file(self.surprise_file_path,
                                                reader=reader)
     self.data.split(n_folds=5)
Beispiel #3
0
    rects1 = ax.bar(ind, val1, width, color='r', yerr=interval1)
    rects2 = ax.bar(ind + width, val2, width, color='y', yerr=interval2)

    ax.legend((rects1[0], rects2[0]), ('RMSE', 'MAE'))
    ax.set_ylabel('Error')
    ax.set_title('Error Rates of SVD')
    plt.show()
    plt.savefig("error.png")


if __name__ == "__main__":
    start_time = time.time()

    folds = 5
    reader = dataset.Reader(line_format='user item rating', sep='\t')
    data = Dataset.load_from_file('movielens100k/ml-100k/u.data', reader)
    data.split(n_folds=folds)

    # We'll use the famous SVD algorithm.
    algo = SVD()

    # Evaluate performances of our algorithm on the dataset.
    perf = evaluate(algo, data, measures=['RMSE', 'MAE'])
    print("\n\n--- Time Elapsed: %s seconds ---" % (time.time() - start_time))

    rmse = np.array(perf['rmse'])
    mae = np.array(perf['mae'])

    rmse_mean, rmse_conf_interval = GetStats("RMSE", rmse)
    mae_mean, mae_conf_interval = GetStats("MAE", mae)
Beispiel #4
0
from surprise import SVD
from surprise import dataset
from surprise import Dataset
import numpy as np
from surprise import evaluate, print_perf

# Read the training set
file1 = '/home/ldua/DM/train_rating.txt'
train_df = pd.read_csv(file1)

# Read the testing set
testfile = '/home/ldua/DM/test_rating.txt'
test_df = pd.read_csv(testfile)

print(len(test_df))
reader1 = dataset.Reader(rating_scale=(1, 5))

test_df['rating'] = 0

# Read the data in the form of customer, product, rating
data = Dataset.load_from_df(train_df[['user_id', 'business_id', 'rating']],
                            reader1)
data_test = Dataset.load_from_df(test_df[['user_id', 'business_id', 'rating']],
                                 reader1)

#Build train set and test set
trainset = data.build_full_trainset()
testset = data_test.build_full_trainset()
testset2 = testset.build_testset()

# Set the parameters values for the model
    start_time = time.time()

    # Normailise dataset
    header = ['user', 'item', 'rating', 'timestamp']
    ratings_data = pd.read_csv('movielens100k/ml-100k/u.data',
                               sep='\t',
                               names=header)
    ratings_data.rating = (ratings_data.rating / 5.0)
    ratings_data.to_csv("./normalised_movielens.data",
                        sep='\t',
                        index=False,
                        header=False)

    folds = 5
    reader = dataset.Reader(line_format='user item rating',
                            sep='\t',
                            rating_scale=(0, 1))
    data = Dataset.load_from_file('./normalised_movielens.data', reader)
    data.split(n_folds=folds)

    # We'll use the famous SVD algorithm.
    algo = SVD()

    rsquared_folds = np.zeros(folds)
    rmse_folds = np.zeros(folds)
    mse_folds = np.zeros(folds)
    fold = 0
    for trainset, testset in data.folds():
        start_time2 = time.time()

        # train and test algorithm.