truth = testing['rating'].head(10000).as_matrix() user_mappings = recsys_utils.read_user_map() movie_mappings = recsys_utils.read_movie_map() user_means = np.squeeze(np.sum(np.array(training.todense()), axis=1)) user_means = np.divide(user_means, (np.array(training.todense()) != 0).sum(1)) print('collaborative filtering for User-User:'******'float32')), metric='cosine') print('distance calculation time:', time() - start_time_user) predictions = predict(training, user_dist, testing, user_mappings, movie_mappings, 10) print('Time for User-User:'******'RMSE:', evaluation.RMSE(predictions, truth)) print('spearman rank correlation:', evaluation.spearman_rank_correlation(predictions, truth)) print( 'top k precision:', evaluation.top_k_precision(predictions, testing, user_means, user_mappings, k=5)) print('Total time:', time() - start_time_user) # Item-item collaborative filtering it_means = np.squeeze(np.sum(np.array(training.T.todense()), axis=1)) it_means = np.divide(it_means, (np.array(training.T.todense()) != 0).sum(1))
Matrix_C = formMat_C(C_frob, train, no_of_param, forbenius_norm_matrix_row) print(len(Matrix_C), " , ", len(Matrix_C[0])) Matrix_R = [[(train[int(y[0]), i]) / (sqrt(no_of_param * y[1])) for i in range(len(forbenius_norm_matrix_col))] for y in R_frob] U_mat = Compute_U(train, C_frob, R_frob) Cur_mat = Compute_Cur(Matrix_C, Matrix_R, U_mat) end_Time = time() print("Time taken for CUR : ", (end_Time - start_time_user)) pred_Ratings = [] for idx, row in test.iterrows(): pred_Ratings.append(Cur_mat[user_map[row['userId']], movie_map[row['movieId']]]) predictions = np.array(pred_Ratings) print(len(predictions)) print("RMSE ERROR:", evaluation.RMSE(predictions, truth)) print("Spearman Rank Correlation: ", evaluation.spearman_rank_correlation(predictions, truth)) print( "Top K rank Precision: ", evaluation.top_k_precision(predictions, test, np.squeeze(np.array(means_mat)), user_map))
def evaluate(self, predictions): """Check RMSE of predictions""" _, Y = self.preprocess_simple() return evaluation.RMSE(predictions, Y)
price = torch.from_numpy(price).cuda() price_pre = net(desc) loss = criterion(price_pre, price) optimizer.zero_grad() loss.backward() optimizer.step() for j in range(BATCHSIZE): price_trues.append( transformer.convert2price(train_price[i * BATCHSIZE + j])) price_pres.append( transformer.convert2price( price_pre.cpu().detach().numpy()[j][0])) train_loss = evaluation.RMSE(price_trues, price_pres) train_loss_list.append(train_loss) net.eval() price_pres = [] for i in range(len(test_desc)): desc = (test_desc[i]).reshape(1, Dimension) desc = torch.from_numpy(desc).cuda() price_pre = net(desc) price_pres.append( transformer.convert2price(price_pre.cpu().detach().numpy()[0][0])) test_loss = evaluation.eval_test(price_pres) test_loss_list.append(test_loss) dataloader.write_csv(price_pres, './result/result_epoch' + str(epoch + 1) + '.csv')
def validation(model, device, val_loader, prediction_dir): ground_truths = val_loader.dataset.labels[:, 1] preds = test(model, device, val_loader, prediction_dir)['predict'] print('validation test finish') res = evaluation.RMSE(ground_truths, preds, len(ground_truths)) print('validation : ', res)
for val in nz_val: t2[val[0], val[1]] = t2[val[0], val[1]] - means_matrix[val[0]] - movie_matrix[val[1]] means_matrix = np.squeeze(means_matrix) movie_matrix = np.squeeze(movie_matrix) user_dist = 1 - pairwise_distances(sub_mean(t), metric='cosine') start_time_item = time() predictions_usr = predict_baseline(training, user_dist, testing, user_map, movie_map, 10, 'user', t2, means_matrix, movie_matrix) predictions_usr = np.squeeze(predictions_usr) print('Total time for User-User:'******'RMSE:', evaluation.RMSE(predictions_usr, truth[0:10000])) print('spearman_rank_correlation:', evaluation.spearman_rank_correlation(predictions_usr, truth[0:10000])) print( 'Precision on top K:', evaluation.top_k_precision(predictions_usr, testing.head(10000), means_matrix, user_map)) print('collaborative filtering for....') start_time_item = time() item_dist = 1 - pairwise_distances(sub_mean(training.T), metric='cosine') print('Time taken to calculate distances:', time() - start_time_item) t2 = t2.T predictions_mov = predict_baseline(training.T, item_dist, testing, user_map, movie_map, 10, 'item', t2, means_matrix, movie_matrix)
truth = test['rating'].head(10000).as_matrix() user_map = recsys_utils.read_user_map() movie_map = recsys_utils.read_movie_map() # User-user collaborative filtering # user_means=np.squeeze(np.sum(np.array(train.todense()), axis=1)) user_means = np.squeeze(np.sum(np.array(train.todense()), axis=1)) user_means = np.divide(user_means, (np.array(train.todense()) != 0).sum(1)) print 'User-user collaborative filtering....' start_time_user = time() user_dist = 1 - pairwise_distances(subtract_mean(train.astype('float32')), metric='cosine') print 'Time taken to calculate distances:', time() - start_time_user predictions = predict(train, user_dist, test, user_map, movie_map, 10) print 'User-user-> Total time:', time() - start_time_user print 'User-user-> RMSE:', evaluation.RMSE(predictions, truth) print 'spearman_rank_correlation', evaluation.spearman_rank_correlation( predictions, truth) print 'top k precision:', evaluation.top_k_precision(predictions, test, user_means, user_map, k=5) print 'Total time:', time() - start_time_user # Item-item collaborative filtering # item_means=np.squeeze(np.sum(np.array(train.T.todense()), axis=1)) item_means = np.squeeze(np.sum(np.array(train.T.todense()), axis=1)) item_means = np.divide(item_means, (np.array(train.T.todense()) != 0).sum(1)) print 'Item-item collaborative filtering....'
# Read data train = np.array(recsys_utils.read_train()) test = recsys_utils.read_test_table() truth = test['rating'].as_matrix() user_map = recsys_utils.read_user_map() movie_map = recsys_utils.read_movie_map() start_time = time() # Subtracting mean of data from train set user_means = np.squeeze(np.sum(train, axis=1)) user_means = np.divide(user_means, (train != 0).sum(1)) for i in range(train.shape[0]): train[i, :][train[i, :] != 0] -= user_means[i] # Decomposition and Reconstruction of SVD U, V_t, sigma = SVD(train, retain_energy=90, save_factorized=True) reconstructed = np.dot(np.dot(U, sigma), V_t) # Get predicted pred_matrix = train + np.reshape(user_means, [len(user_means), 1]) ro = [user_map[x] for x in test['userId']] co = [movie_map[x] for x in test['movieId']] predicted = pred_matrix[ro, co] total_time_svd = time() - start_time print('RMSE:', evaluation.RMSE(np.array(predicted), truth)) print('spearman_rank_correlation', evaluation.spearman_rank_correlation(np.array(predicted), truth)) print('Top k Precision(k=5):', evaluation.top_k_precision(predicted, test, user_means, user_map, 5)) print('Total SVD time:', total_time_svd)
train=np.array(recsys_utils.read_train()) test=recsys_utils.read_test_table() truth=test['rating'].as_matrix() user_map=recsys_utils.read_user_map() movie_map=recsys_utils.read_movie_map() start_time=time() # Subtract means from train user_means=np.squeeze(np.sum(train, axis=1)) user_means=np.divide(user_means, (train!=0).sum(1)) for i in range(train.shape[0]): train[i, :][train[i, :]!=0]-=user_means[i] # SVD Decomposition and Reconstruction U, V_t, sigma=SVD(train, percent_energy_retain=100, save_factorized=True) print 'Factorization Time:', time()-start_time reconstructed=np.dot(np.dot(U, sigma), V_t) print 'RMSE(reconstruction):', evaluation.RMSE_mat(train, reconstructed) # Get Predictions pred_mat=train+np.reshape(user_means, [len(user_means), 1]) rows=[user_map[x] for x in test['userId']] cols=[movie_map[x] for x in test['movieId']] predictions=pred_mat[rows, cols] total_time_svd=time()-start_time print 'RMSE:', evaluation.RMSE(np.array(predictions), truth) print 'spearman_rank_correlation', evaluation.spearman_rank_correlation(np.array(predictions), truth) print 'Top k Precision(k=5):', evaluation.top_k_precision(predictions, test, user_means, user_map, 5) print 'Total SVD time:', total_time_svd
print 'means' means_mat = np.squeeze(means_mat) movie_mat = np.squeeze(movie_mat) print means_mat.shape print movie_mat.shape print('Time taken:', time() - temp_start_time) user_dist = 1 - pairwise_distances(subtract_mean(temp), metric='cosine') start_time_item = time() predictions_usr = predict_baseline(train, user_dist, test, user_map, movie_map, 10, 'user', temp2, means_mat, movie_mat) print 'User-User-> Total time:', time() - start_time_item predictions_usr = np.squeeze(predictions_usr) print 'User-User-> Total time:', time() - start_time_item print 'User-User-> RMSE:', evaluation.RMSE(predictions_usr, truth[0:10000]) print 'spearman_rank_correlation', evaluation.spearman_rank_correlation( predictions_usr, truth[0:10000]) print 'Precision on top K', evaluation.top_k_precision(predictions_usr, test.head(10000), means_mat, user_map) print 'Item-item collaborative filtering....' start_time_item = time() item_dist = 1 - pairwise_distances(subtract_mean(train.T), metric='cosine') print 'Time taken to calculate distances:', time() - start_time_item temp2 = temp2.T predictions_mov = predict_baseline(train.T, item_dist, test, user_map, movie_map, 10, 'item', temp2, means_mat, movie_mat) predictions = np.squeeze(predictions_mov)