movie_mappings = recsys_utils.read_movie_map() user_means = np.squeeze(np.sum(np.array(training.todense()), axis=1)) user_means = np.divide(user_means, (np.array(training.todense()) != 0).sum(1)) print('collaborative filtering for User-User:'******'float32')), metric='cosine') print('distance calculation time:', time() - start_time_user) predictions = predict(training, user_dist, testing, user_mappings, movie_mappings, 10) print('Time for User-User:'******'RMSE:', evaluation.RMSE(predictions, truth)) print('spearman rank correlation:', evaluation.spearman_rank_correlation(predictions, truth)) print( 'top k precision:', evaluation.top_k_precision(predictions, testing, user_means, user_mappings, k=5)) print('Total time:', time() - start_time_user) # Item-item collaborative filtering it_means = np.squeeze(np.sum(np.array(training.T.todense()), axis=1)) it_means = np.divide(it_means, (np.array(training.T.todense()) != 0).sum(1)) print('collaborative filtering for Item-Item:') start_time_item = time()
val[1]] = t2[val[0], val[1]] - means_matrix[val[0]] - movie_matrix[val[1]] means_matrix = np.squeeze(means_matrix) movie_matrix = np.squeeze(movie_matrix) user_dist = 1 - pairwise_distances(sub_mean(t), metric='cosine') start_time_item = time() predictions_usr = predict_baseline(training, user_dist, testing, user_map, movie_map, 10, 'user', t2, means_matrix, movie_matrix) predictions_usr = np.squeeze(predictions_usr) print('Total time for User-User:'******'RMSE:', evaluation.RMSE(predictions_usr, truth[0:10000])) print('spearman_rank_correlation:', evaluation.spearman_rank_correlation(predictions_usr, truth[0:10000])) print( 'Precision on top K:', evaluation.top_k_precision(predictions_usr, testing.head(10000), means_matrix, user_map)) print('collaborative filtering for....') start_time_item = time() item_dist = 1 - pairwise_distances(sub_mean(training.T), metric='cosine') print('Time taken to calculate distances:', time() - start_time_item) t2 = t2.T predictions_mov = predict_baseline(training.T, item_dist, testing, user_map, movie_map, 10, 'item', t2, means_matrix, movie_matrix) predictions = np.squeeze(predictions_mov) print('Total time for Item-item:', time() - start_time_item)
# Read data train = np.array(recsys_utils.read_train()) test = recsys_utils.read_test_table() truth = test['rating'].as_matrix() user_map = recsys_utils.read_user_map() movie_map = recsys_utils.read_movie_map() start_time = time() # Subtracting mean of data from train set user_means = np.squeeze(np.sum(train, axis=1)) user_means = np.divide(user_means, (train != 0).sum(1)) for i in range(train.shape[0]): train[i, :][train[i, :] != 0] -= user_means[i] # Decomposition and Reconstruction of SVD U, V_t, sigma = SVD(train, retain_energy=90, save_factorized=True) reconstructed = np.dot(np.dot(U, sigma), V_t) # Get predicted pred_matrix = train + np.reshape(user_means, [len(user_means), 1]) ro = [user_map[x] for x in test['userId']] co = [movie_map[x] for x in test['movieId']] predicted = pred_matrix[ro, co] total_time_svd = time() - start_time print('RMSE:', evaluation.RMSE(np.array(predicted), truth)) print('spearman_rank_correlation', evaluation.spearman_rank_correlation(np.array(predicted), truth)) print('Top k Precision(k=5):', evaluation.top_k_precision(predicted, test, user_means, user_map, 5)) print('Total SVD time:', total_time_svd)
user_map = recsys_utils.read_user_map() movie_map = recsys_utils.read_movie_map() # User-user collaborative filtering # user_means=np.squeeze(np.sum(np.array(train.todense()), axis=1)) user_means = np.squeeze(np.sum(np.array(train.todense()), axis=1)) user_means = np.divide(user_means, (np.array(train.todense()) != 0).sum(1)) print 'User-user collaborative filtering....' start_time_user = time() user_dist = 1 - pairwise_distances(subtract_mean(train.astype('float32')), metric='cosine') print 'Time taken to calculate distances:', time() - start_time_user predictions = predict(train, user_dist, test, user_map, movie_map, 10) print 'User-user-> Total time:', time() - start_time_user print 'User-user-> RMSE:', evaluation.RMSE(predictions, truth) print 'spearman_rank_correlation', evaluation.spearman_rank_correlation( predictions, truth) print 'top k precision:', evaluation.top_k_precision(predictions, test, user_means, user_map, k=5) print 'Total time:', time() - start_time_user # Item-item collaborative filtering # item_means=np.squeeze(np.sum(np.array(train.T.todense()), axis=1)) item_means = np.squeeze(np.sum(np.array(train.T.todense()), axis=1)) item_means = np.divide(item_means, (np.array(train.T.todense()) != 0).sum(1)) print 'Item-item collaborative filtering....' start_time_item = time() item_dist = 1 - pairwise_distances(
train=np.array(recsys_utils.read_train()) test=recsys_utils.read_test_table() truth=test['rating'].as_matrix() user_map=recsys_utils.read_user_map() movie_map=recsys_utils.read_movie_map() start_time=time() # Subtract means from train user_means=np.squeeze(np.sum(train, axis=1)) user_means=np.divide(user_means, (train!=0).sum(1)) for i in range(train.shape[0]): train[i, :][train[i, :]!=0]-=user_means[i] # SVD Decomposition and Reconstruction U, V_t, sigma=SVD(train, percent_energy_retain=100, save_factorized=True) print 'Factorization Time:', time()-start_time reconstructed=np.dot(np.dot(U, sigma), V_t) print 'RMSE(reconstruction):', evaluation.RMSE_mat(train, reconstructed) # Get Predictions pred_mat=train+np.reshape(user_means, [len(user_means), 1]) rows=[user_map[x] for x in test['userId']] cols=[movie_map[x] for x in test['movieId']] predictions=pred_mat[rows, cols] total_time_svd=time()-start_time print 'RMSE:', evaluation.RMSE(np.array(predictions), truth) print 'spearman_rank_correlation', evaluation.spearman_rank_correlation(np.array(predictions), truth) print 'Top k Precision(k=5):', evaluation.top_k_precision(predictions, test, user_means, user_map, 5) print 'Total SVD time:', total_time_svd