def read_data(self): reader = dataset.Reader(line_format="user item rating", sep=',', rating_scale=(0,1), skip_lines=0) self.datasets = [dataset.Dataset.load_from_file(self.surprise_file_path, reader=reader) for _ in range(self.no_of_folds)] ratings = self.datasets[0].raw_ratings ratings_exclude_size = len(ratings)//self.no_of_folds for idx, dataset in enumerate(self.datasets): dataset.raw_ratings = [ele for idx, ele in enumerate(dataset.raw_ratings) if i not in range(idx*ratings_exclude_size, (idx+1)*ratings_exclude_size)]
def read_data(self): reader = dataset.Reader(line_format="user item rating", sep=',', rating_scale=(0, 1), skip_lines=0) self.data = dataset.Dataset.load_from_file(self.surprise_file_path, reader=reader) self.data.split(n_folds=5)
rects1 = ax.bar(ind, val1, width, color='r', yerr=interval1) rects2 = ax.bar(ind + width, val2, width, color='y', yerr=interval2) ax.legend((rects1[0], rects2[0]), ('RMSE', 'MAE')) ax.set_ylabel('Error') ax.set_title('Error Rates of SVD') plt.show() plt.savefig("error.png") if __name__ == "__main__": start_time = time.time() folds = 5 reader = dataset.Reader(line_format='user item rating', sep='\t') data = Dataset.load_from_file('movielens100k/ml-100k/u.data', reader) data.split(n_folds=folds) # We'll use the famous SVD algorithm. algo = SVD() # Evaluate performances of our algorithm on the dataset. perf = evaluate(algo, data, measures=['RMSE', 'MAE']) print("\n\n--- Time Elapsed: %s seconds ---" % (time.time() - start_time)) rmse = np.array(perf['rmse']) mae = np.array(perf['mae']) rmse_mean, rmse_conf_interval = GetStats("RMSE", rmse) mae_mean, mae_conf_interval = GetStats("MAE", mae)
from surprise import SVD from surprise import dataset from surprise import Dataset import numpy as np from surprise import evaluate, print_perf # Read the training set file1 = '/home/ldua/DM/train_rating.txt' train_df = pd.read_csv(file1) # Read the testing set testfile = '/home/ldua/DM/test_rating.txt' test_df = pd.read_csv(testfile) print(len(test_df)) reader1 = dataset.Reader(rating_scale=(1, 5)) test_df['rating'] = 0 # Read the data in the form of customer, product, rating data = Dataset.load_from_df(train_df[['user_id', 'business_id', 'rating']], reader1) data_test = Dataset.load_from_df(test_df[['user_id', 'business_id', 'rating']], reader1) #Build train set and test set trainset = data.build_full_trainset() testset = data_test.build_full_trainset() testset2 = testset.build_testset() # Set the parameters values for the model
start_time = time.time() # Normailise dataset header = ['user', 'item', 'rating', 'timestamp'] ratings_data = pd.read_csv('movielens100k/ml-100k/u.data', sep='\t', names=header) ratings_data.rating = (ratings_data.rating / 5.0) ratings_data.to_csv("./normalised_movielens.data", sep='\t', index=False, header=False) folds = 5 reader = dataset.Reader(line_format='user item rating', sep='\t', rating_scale=(0, 1)) data = Dataset.load_from_file('./normalised_movielens.data', reader) data.split(n_folds=folds) # We'll use the famous SVD algorithm. algo = SVD() rsquared_folds = np.zeros(folds) rmse_folds = np.zeros(folds) mse_folds = np.zeros(folds) fold = 0 for trainset, testset in data.folds(): start_time2 = time.time() # train and test algorithm.