from surprise import Dataset
from surprise import SVD, KNNWithMeans
from surprise import accuracy
from surprise.model_selection import train_test_split

data = Dataset.load_builtin('ml-100k')
train, test = train_test_split(data, test_size=0.25, random_state=10)

algo = SVD()
algo.n_epochs = 20
algo.random_state = 15
algo.fit(train)

predictions = algo.test(test)
accuracy.rmse(predictions)

uid = str(196)  # raw user id
iid = str(302)  # raw item id
r_ui = 4  # already know the true rating is 4, so we can make a comparison

pred = algo.predict(uid, iid, r_ui=r_ui, verbose=True)
print(pred.est)

knn = KNNWithMeans(
    sim_options={
        "name": "msd",  # cosine / msd / pearson / pearson_baseline
        "min_support": 2,
        "user_based": False
    })
knn.fit(train)
# *****************************************************************************
# %% Best Hyper-parameters Training:
# Training over whole training dataset, using best hyper-parameters
alg = SVD()

alg.biased = Train_CV.best_params['rmse']['biased']
alg.n_epochs = Train_CV.best_params['rmse']['n_epochs']
alg.n_factors = Train_CV.best_params['rmse']['n_factors']
alg.reg_pu = Train_CV.best_params['rmse']['reg_pu']
alg.reg_qi = Train_CV.best_params['rmse']['reg_qi']
alg.reg_bu = Train_CV.best_params['rmse']['reg_bu']
alg.reg_bi = Train_CV.best_params['rmse']['reg_bi']
alg.lr_pu = Train_CV.best_params['rmse']['lr_all']
alg.lr_qi = Train_CV.best_params['rmse']['lr_all']
alg.verbose = True
alg.random_state = 0

alg.fit(data_train.build_full_trainset())

# *****************************************************************************
# %% Loading Test Data
file_path = "Data/sample_submission.csv"
data_test = utils.load_data_desired(file_path)

# *****************************************************************************
# %% Predicting test data labels
Predict_Test = []

for line in data_test:
    Predict_Test.append(alg.predict(str(line[1]), str(line[0])).est)
Beispiel #3
0
# sample test set to get individual predictions.

# ## SVD

# In[ ]:

#SVD with baselines

algo = SVD()
algo.n_factors = 400
algo.verbose = False
algo.biased = True
algo.reg_all = 0.1
algo.lr_all = 0.01
algo.n_epochs = 500
algo.random_state = seed

print("Training SVD...")
algo.fit(trainset)

print("Computing predictions for SVD... \n")
test_predictions_svd = algo.test(
    testset)  #Get real predictions to append to big final matrix

# In[ ]:

test_predictions_svd = np.asarray(test_predictions_svd)
test_predictions_svd_filtered = test_predictions_svd[:, 3]

# ## SVD Without Baselines