movie_mappings = recsys_utils.read_movie_map()

    user_means = np.squeeze(np.sum(np.array(training.todense()), axis=1))
    user_means = np.divide(user_means,
                           (np.array(training.todense()) != 0).sum(1))
    print('collaborative filtering for User-User:'******'float32')), metric='cosine')
    print('distance calculation time:', time() - start_time_user)
    predictions = predict(training, user_dist, testing, user_mappings,
                          movie_mappings, 10)
    print('Time for User-User:'******'RMSE:', evaluation.RMSE(predictions, truth))
    print('spearman rank correlation:',
          evaluation.spearman_rank_correlation(predictions, truth))
    print(
        'top k precision:',
        evaluation.top_k_precision(predictions,
                                   testing,
                                   user_means,
                                   user_mappings,
                                   k=5))
    print('Total time:', time() - start_time_user)

    # Item-item collaborative filtering
    it_means = np.squeeze(np.sum(np.array(training.T.todense()), axis=1))
    it_means = np.divide(it_means,
                         (np.array(training.T.todense()) != 0).sum(1))
    print('collaborative filtering for Item-Item:')
    start_time_item = time()
       val[1]] = t2[val[0],
                    val[1]] - means_matrix[val[0]] - movie_matrix[val[1]]

means_matrix = np.squeeze(means_matrix)
movie_matrix = np.squeeze(movie_matrix)

user_dist = 1 - pairwise_distances(sub_mean(t), metric='cosine')
start_time_item = time()
predictions_usr = predict_baseline(training, user_dist, testing, user_map,
                                   movie_map, 10, 'user', t2, means_matrix,
                                   movie_matrix)
predictions_usr = np.squeeze(predictions_usr)
print('Total time for User-User:'******'RMSE:', evaluation.RMSE(predictions_usr, truth[0:10000]))
print('spearman_rank_correlation:',
      evaluation.spearman_rank_correlation(predictions_usr, truth[0:10000]))
print(
    'Precision on top K:',
    evaluation.top_k_precision(predictions_usr, testing.head(10000),
                               means_matrix, user_map))

print('collaborative filtering for....')
start_time_item = time()
item_dist = 1 - pairwise_distances(sub_mean(training.T), metric='cosine')
print('Time taken to calculate distances:', time() - start_time_item)
t2 = t2.T
predictions_mov = predict_baseline(training.T, item_dist, testing, user_map,
                                   movie_map, 10, 'item', t2, means_matrix,
                                   movie_matrix)
predictions = np.squeeze(predictions_mov)
print('Total time for Item-item:', time() - start_time_item)
예제 #3
0
    # Read data
    train = np.array(recsys_utils.read_train())
    test = recsys_utils.read_test_table()
    truth = test['rating'].as_matrix()
    user_map = recsys_utils.read_user_map()
    movie_map = recsys_utils.read_movie_map()

    start_time = time()

    # Subtracting mean of data from train set
    user_means = np.squeeze(np.sum(train, axis=1))
    user_means = np.divide(user_means, (train != 0).sum(1))
    for i in range(train.shape[0]):
        train[i, :][train[i, :] != 0] -= user_means[i]

    # Decomposition and Reconstruction of SVD
    U, V_t, sigma = SVD(train, retain_energy=90, save_factorized=True)
    reconstructed = np.dot(np.dot(U, sigma), V_t)

    # Get predicted
    pred_matrix = train + np.reshape(user_means, [len(user_means), 1])
    ro = [user_map[x] for x in test['userId']]
    co = [movie_map[x] for x in test['movieId']]
    predicted = pred_matrix[ro, co]
    total_time_svd = time() - start_time
    print('RMSE:', evaluation.RMSE(np.array(predicted), truth))
    print('spearman_rank_correlation',
          evaluation.spearman_rank_correlation(np.array(predicted), truth))
    print('Top k Precision(k=5):',
          evaluation.top_k_precision(predicted, test, user_means, user_map, 5))
    print('Total SVD time:', total_time_svd)
예제 #4
0
    user_map = recsys_utils.read_user_map()
    movie_map = recsys_utils.read_movie_map()

    # User-user collaborative filtering
    # user_means=np.squeeze(np.sum(np.array(train.todense()), axis=1))
    user_means = np.squeeze(np.sum(np.array(train.todense()), axis=1))
    user_means = np.divide(user_means, (np.array(train.todense()) != 0).sum(1))
    print 'User-user collaborative filtering....'
    start_time_user = time()
    user_dist = 1 - pairwise_distances(subtract_mean(train.astype('float32')),
                                       metric='cosine')
    print 'Time taken to calculate distances:', time() - start_time_user
    predictions = predict(train, user_dist, test, user_map, movie_map, 10)
    print 'User-user-> Total time:', time() - start_time_user
    print 'User-user-> RMSE:', evaluation.RMSE(predictions, truth)
    print 'spearman_rank_correlation', evaluation.spearman_rank_correlation(
        predictions, truth)
    print 'top k precision:', evaluation.top_k_precision(predictions,
                                                         test,
                                                         user_means,
                                                         user_map,
                                                         k=5)
    print 'Total time:', time() - start_time_user

    # Item-item collaborative filtering
    # item_means=np.squeeze(np.sum(np.array(train.T.todense()), axis=1))
    item_means = np.squeeze(np.sum(np.array(train.T.todense()), axis=1))
    item_means = np.divide(item_means,
                           (np.array(train.T.todense()) != 0).sum(1))
    print 'Item-item collaborative filtering....'
    start_time_item = time()
    item_dist = 1 - pairwise_distances(
예제 #5
0
파일: SVD.py 프로젝트: stgstg27/recsys
	train=np.array(recsys_utils.read_train())
	test=recsys_utils.read_test_table()
	truth=test['rating'].as_matrix()
	user_map=recsys_utils.read_user_map()
	movie_map=recsys_utils.read_movie_map()

	start_time=time()

	# Subtract means from train
	user_means=np.squeeze(np.sum(train, axis=1))
	user_means=np.divide(user_means, (train!=0).sum(1))
	for i in range(train.shape[0]):
		train[i, :][train[i, :]!=0]-=user_means[i]

	# SVD Decomposition and Reconstruction
	U, V_t, sigma=SVD(train, percent_energy_retain=100, save_factorized=True)
	print 'Factorization Time:', time()-start_time
	reconstructed=np.dot(np.dot(U, sigma), V_t)
	print 'RMSE(reconstruction):', evaluation.RMSE_mat(train, reconstructed)

	# Get Predictions
	pred_mat=train+np.reshape(user_means, [len(user_means), 1])
	rows=[user_map[x] for x in test['userId']]
	cols=[movie_map[x] for x in test['movieId']]
	predictions=pred_mat[rows, cols]
	total_time_svd=time()-start_time
	print 'RMSE:', evaluation.RMSE(np.array(predictions), truth)
	print 'spearman_rank_correlation', evaluation.spearman_rank_correlation(np.array(predictions), truth)
	print 'Top k Precision(k=5):', evaluation.top_k_precision(predictions, 
		test, user_means, user_map, 5)
	print 'Total SVD time:', total_time_svd