truth = testing['rating'].head(10000).as_matrix()
    user_mappings = recsys_utils.read_user_map()
    movie_mappings = recsys_utils.read_movie_map()

    user_means = np.squeeze(np.sum(np.array(training.todense()), axis=1))
    user_means = np.divide(user_means,
                           (np.array(training.todense()) != 0).sum(1))
    print('collaborative filtering for User-User:'******'float32')), metric='cosine')
    print('distance calculation time:', time() - start_time_user)
    predictions = predict(training, user_dist, testing, user_mappings,
                          movie_mappings, 10)
    print('Time for User-User:'******'RMSE:', evaluation.RMSE(predictions, truth))
    print('spearman rank correlation:',
          evaluation.spearman_rank_correlation(predictions, truth))
    print(
        'top k precision:',
        evaluation.top_k_precision(predictions,
                                   testing,
                                   user_means,
                                   user_mappings,
                                   k=5))
    print('Total time:', time() - start_time_user)

    # Item-item collaborative filtering
    it_means = np.squeeze(np.sum(np.array(training.T.todense()), axis=1))
    it_means = np.divide(it_means,
                         (np.array(training.T.todense()) != 0).sum(1))
Example #2
0
Matrix_C = formMat_C(C_frob, train, no_of_param, forbenius_norm_matrix_row)

print(len(Matrix_C), " , ", len(Matrix_C[0]))

Matrix_R = [[(train[int(y[0]), i]) / (sqrt(no_of_param * y[1]))
             for i in range(len(forbenius_norm_matrix_col))] for y in R_frob]

U_mat = Compute_U(train, C_frob, R_frob)

Cur_mat = Compute_Cur(Matrix_C, Matrix_R, U_mat)

end_Time = time()

print("Time taken for CUR : ", (end_Time - start_time_user))

pred_Ratings = []
for idx, row in test.iterrows():
    pred_Ratings.append(Cur_mat[user_map[row['userId']],
                                movie_map[row['movieId']]])

predictions = np.array(pred_Ratings)
print(len(predictions))
print("RMSE ERROR:", evaluation.RMSE(predictions, truth))
print("Spearman Rank Correlation: ",
      evaluation.spearman_rank_correlation(predictions, truth))
print(
    "Top K rank Precision: ",
    evaluation.top_k_precision(predictions, test,
                               np.squeeze(np.array(means_mat)), user_map))
Example #3
0
    def evaluate(self, predictions):
        """Check RMSE of predictions"""
        _, Y = self.preprocess_simple()

        return evaluation.RMSE(predictions, Y)
Example #4
0
        price = torch.from_numpy(price).cuda()

        price_pre = net(desc)
        loss = criterion(price_pre, price)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        for j in range(BATCHSIZE):
            price_trues.append(
                transformer.convert2price(train_price[i * BATCHSIZE + j]))
            price_pres.append(
                transformer.convert2price(
                    price_pre.cpu().detach().numpy()[j][0]))
    train_loss = evaluation.RMSE(price_trues, price_pres)
    train_loss_list.append(train_loss)

    net.eval()
    price_pres = []
    for i in range(len(test_desc)):
        desc = (test_desc[i]).reshape(1, Dimension)
        desc = torch.from_numpy(desc).cuda()
        price_pre = net(desc)
        price_pres.append(
            transformer.convert2price(price_pre.cpu().detach().numpy()[0][0]))

    test_loss = evaluation.eval_test(price_pres)
    test_loss_list.append(test_loss)
    dataloader.write_csv(price_pres,
                         './result/result_epoch' + str(epoch + 1) + '.csv')
Example #5
0
def validation(model, device, val_loader, prediction_dir):
    ground_truths = val_loader.dataset.labels[:, 1]
    preds = test(model, device, val_loader, prediction_dir)['predict']
    print('validation test finish')
    res = evaluation.RMSE(ground_truths, preds, len(ground_truths))
    print('validation : ', res)
for val in nz_val:
    t2[val[0],
       val[1]] = t2[val[0],
                    val[1]] - means_matrix[val[0]] - movie_matrix[val[1]]

means_matrix = np.squeeze(means_matrix)
movie_matrix = np.squeeze(movie_matrix)

user_dist = 1 - pairwise_distances(sub_mean(t), metric='cosine')
start_time_item = time()
predictions_usr = predict_baseline(training, user_dist, testing, user_map,
                                   movie_map, 10, 'user', t2, means_matrix,
                                   movie_matrix)
predictions_usr = np.squeeze(predictions_usr)
print('Total time for User-User:'******'RMSE:', evaluation.RMSE(predictions_usr, truth[0:10000]))
print('spearman_rank_correlation:',
      evaluation.spearman_rank_correlation(predictions_usr, truth[0:10000]))
print(
    'Precision on top K:',
    evaluation.top_k_precision(predictions_usr, testing.head(10000),
                               means_matrix, user_map))

print('collaborative filtering for....')
start_time_item = time()
item_dist = 1 - pairwise_distances(sub_mean(training.T), metric='cosine')
print('Time taken to calculate distances:', time() - start_time_item)
t2 = t2.T
predictions_mov = predict_baseline(training.T, item_dist, testing, user_map,
                                   movie_map, 10, 'item', t2, means_matrix,
                                   movie_matrix)
    truth = test['rating'].head(10000).as_matrix()
    user_map = recsys_utils.read_user_map()
    movie_map = recsys_utils.read_movie_map()

    # User-user collaborative filtering
    # user_means=np.squeeze(np.sum(np.array(train.todense()), axis=1))
    user_means = np.squeeze(np.sum(np.array(train.todense()), axis=1))
    user_means = np.divide(user_means, (np.array(train.todense()) != 0).sum(1))
    print 'User-user collaborative filtering....'
    start_time_user = time()
    user_dist = 1 - pairwise_distances(subtract_mean(train.astype('float32')),
                                       metric='cosine')
    print 'Time taken to calculate distances:', time() - start_time_user
    predictions = predict(train, user_dist, test, user_map, movie_map, 10)
    print 'User-user-> Total time:', time() - start_time_user
    print 'User-user-> RMSE:', evaluation.RMSE(predictions, truth)
    print 'spearman_rank_correlation', evaluation.spearman_rank_correlation(
        predictions, truth)
    print 'top k precision:', evaluation.top_k_precision(predictions,
                                                         test,
                                                         user_means,
                                                         user_map,
                                                         k=5)
    print 'Total time:', time() - start_time_user

    # Item-item collaborative filtering
    # item_means=np.squeeze(np.sum(np.array(train.T.todense()), axis=1))
    item_means = np.squeeze(np.sum(np.array(train.T.todense()), axis=1))
    item_means = np.divide(item_means,
                           (np.array(train.T.todense()) != 0).sum(1))
    print 'Item-item collaborative filtering....'
Example #8
0
    # Read data
    train = np.array(recsys_utils.read_train())
    test = recsys_utils.read_test_table()
    truth = test['rating'].as_matrix()
    user_map = recsys_utils.read_user_map()
    movie_map = recsys_utils.read_movie_map()

    start_time = time()

    # Subtracting mean of data from train set
    user_means = np.squeeze(np.sum(train, axis=1))
    user_means = np.divide(user_means, (train != 0).sum(1))
    for i in range(train.shape[0]):
        train[i, :][train[i, :] != 0] -= user_means[i]

    # Decomposition and Reconstruction of SVD
    U, V_t, sigma = SVD(train, retain_energy=90, save_factorized=True)
    reconstructed = np.dot(np.dot(U, sigma), V_t)

    # Get predicted
    pred_matrix = train + np.reshape(user_means, [len(user_means), 1])
    ro = [user_map[x] for x in test['userId']]
    co = [movie_map[x] for x in test['movieId']]
    predicted = pred_matrix[ro, co]
    total_time_svd = time() - start_time
    print('RMSE:', evaluation.RMSE(np.array(predicted), truth))
    print('spearman_rank_correlation',
          evaluation.spearman_rank_correlation(np.array(predicted), truth))
    print('Top k Precision(k=5):',
          evaluation.top_k_precision(predicted, test, user_means, user_map, 5))
    print('Total SVD time:', total_time_svd)
Example #9
0
	train=np.array(recsys_utils.read_train())
	test=recsys_utils.read_test_table()
	truth=test['rating'].as_matrix()
	user_map=recsys_utils.read_user_map()
	movie_map=recsys_utils.read_movie_map()

	start_time=time()

	# Subtract means from train
	user_means=np.squeeze(np.sum(train, axis=1))
	user_means=np.divide(user_means, (train!=0).sum(1))
	for i in range(train.shape[0]):
		train[i, :][train[i, :]!=0]-=user_means[i]

	# SVD Decomposition and Reconstruction
	U, V_t, sigma=SVD(train, percent_energy_retain=100, save_factorized=True)
	print 'Factorization Time:', time()-start_time
	reconstructed=np.dot(np.dot(U, sigma), V_t)
	print 'RMSE(reconstruction):', evaluation.RMSE_mat(train, reconstructed)

	# Get Predictions
	pred_mat=train+np.reshape(user_means, [len(user_means), 1])
	rows=[user_map[x] for x in test['userId']]
	cols=[movie_map[x] for x in test['movieId']]
	predictions=pred_mat[rows, cols]
	total_time_svd=time()-start_time
	print 'RMSE:', evaluation.RMSE(np.array(predictions), truth)
	print 'spearman_rank_correlation', evaluation.spearman_rank_correlation(np.array(predictions), truth)
	print 'Top k Precision(k=5):', evaluation.top_k_precision(predictions, 
		test, user_means, user_map, 5)
	print 'Total SVD time:', total_time_svd
Example #10
0
print 'means'
means_mat = np.squeeze(means_mat)
movie_mat = np.squeeze(movie_mat)
print means_mat.shape
print movie_mat.shape

print('Time taken:', time() - temp_start_time)

user_dist = 1 - pairwise_distances(subtract_mean(temp), metric='cosine')
start_time_item = time()
predictions_usr = predict_baseline(train, user_dist, test, user_map, movie_map,
                                   10, 'user', temp2, means_mat, movie_mat)
print 'User-User-> Total time:', time() - start_time_item
predictions_usr = np.squeeze(predictions_usr)
print 'User-User-> Total time:', time() - start_time_item
print 'User-User-> RMSE:', evaluation.RMSE(predictions_usr, truth[0:10000])
print 'spearman_rank_correlation', evaluation.spearman_rank_correlation(
    predictions_usr, truth[0:10000])
print 'Precision on top K', evaluation.top_k_precision(predictions_usr,
                                                       test.head(10000),
                                                       means_mat, user_map)

print 'Item-item collaborative filtering....'
start_time_item = time()
item_dist = 1 - pairwise_distances(subtract_mean(train.T), metric='cosine')
print 'Time taken to calculate distances:', time() - start_time_item
temp2 = temp2.T
predictions_mov = predict_baseline(train.T, item_dist, test, user_map,
                                   movie_map, 10, 'item', temp2, means_mat,
                                   movie_mat)
predictions = np.squeeze(predictions_mov)