from surprise import Dataset from surprise import SVD, KNNWithMeans from surprise import accuracy from surprise.model_selection import train_test_split data = Dataset.load_builtin('ml-100k') train, test = train_test_split(data, test_size=0.25, random_state=10) algo = SVD() algo.n_epochs = 20 algo.random_state = 15 algo.fit(train) predictions = algo.test(test) accuracy.rmse(predictions) uid = str(196) # raw user id iid = str(302) # raw item id r_ui = 4 # already know the true rating is 4, so we can make a comparison pred = algo.predict(uid, iid, r_ui=r_ui, verbose=True) print(pred.est) knn = KNNWithMeans( sim_options={ "name": "msd", # cosine / msd / pearson / pearson_baseline "min_support": 2, "user_based": False }) knn.fit(train)
# ***************************************************************************** # %% Best Hyper-parameters Training: # Training over whole training dataset, using best hyper-parameters alg = SVD() alg.biased = Train_CV.best_params['rmse']['biased'] alg.n_epochs = Train_CV.best_params['rmse']['n_epochs'] alg.n_factors = Train_CV.best_params['rmse']['n_factors'] alg.reg_pu = Train_CV.best_params['rmse']['reg_pu'] alg.reg_qi = Train_CV.best_params['rmse']['reg_qi'] alg.reg_bu = Train_CV.best_params['rmse']['reg_bu'] alg.reg_bi = Train_CV.best_params['rmse']['reg_bi'] alg.lr_pu = Train_CV.best_params['rmse']['lr_all'] alg.lr_qi = Train_CV.best_params['rmse']['lr_all'] alg.verbose = True alg.random_state = 0 alg.fit(data_train.build_full_trainset()) # ***************************************************************************** # %% Loading Test Data file_path = "Data/sample_submission.csv" data_test = utils.load_data_desired(file_path) # ***************************************************************************** # %% Predicting test data labels Predict_Test = [] for line in data_test: Predict_Test.append(alg.predict(str(line[1]), str(line[0])).est)
# sample test set to get individual predictions. # ## SVD # In[ ]: #SVD with baselines algo = SVD() algo.n_factors = 400 algo.verbose = False algo.biased = True algo.reg_all = 0.1 algo.lr_all = 0.01 algo.n_epochs = 500 algo.random_state = seed print("Training SVD...") algo.fit(trainset) print("Computing predictions for SVD... \n") test_predictions_svd = algo.test( testset) #Get real predictions to append to big final matrix # In[ ]: test_predictions_svd = np.asarray(test_predictions_svd) test_predictions_svd_filtered = test_predictions_svd[:, 3] # ## SVD Without Baselines