예제 #1
0
def example():
    """simple test and performance measure
    """
    # reviews = movielens_extractor.get_ml_1m_dataset()
    reviews = movielens_extractor.get_ml_100K_dataset()
    ratings = movielens_extractor.reviews_to_numpy_matrix(reviews)
    # suffle_data
    np.random.seed(0)
    np.set_printoptions(precision=16)

    # print(NormalRandom.generate_matrix(1, 10))
    # np.random.shuffle(ratings)

    # split data to training & validation
    train_pct = 0.9
    train_size = int(train_pct * len(ratings))
    train = ratings[:train_size]
    validation = ratings[train_size:]

    # params
    num_features = 10
    bmf_model = BayesianMatrixFactorization()

    start_time = time.clock()
    bmf_model.load(ratings, train, validation)
    end_time = time.clock()
    print("time spent = %.3f" % (end_time - start_time))

    return bmf_model
예제 #2
0
 def generate_report_ml100k():
     reviews = movielens_extractor.get_ml_100K_dataset()
     file_name = '/Users/fpena/UCC/Thesis/projects/yelp/notebooks/dataset_analysis_report_ml100k.ipynb'
     load_reviews_code =\
         'from tripadvisor.fourcity import movielens_extractor\n' +\
         'reviews = movielens_extractor.get_ml_100K_dataset()'
     dataset_name = 'MovieLens 100k'
     ReviewsDatasetAnalyzerReport.generate_report(
         reviews, dataset_name, file_name, load_reviews_code)
예제 #3
0
def test():
    R = [
        # [5, 3, 0, 1],
        # [4, 0, 0, 1],
        # [1, 1, 0, 5],
        # [1, 0, 0, 4],
        # [0, 1, 5, 4],
        [5.,  7.,  5.,  7.,  0.],
        [5.,  7.,  5.,  7.,  9.],
        [5.,  7.,  5.,  7.,  9.],
        [6.,  6.,  6.,  6.,  5.],
        [6.,  6.,  6.,  6.,  5.]
    ]

    my_reviews = movielens_extractor.clean_reviews(movielens_extractor.get_ml_100K_dataset())
    R = create_matrix(my_reviews).todense()

    R = np.array(R)

    N = len(R)
    M = len(R[0])
    K = 2

    P = np.random.rand(N, K)
    Q = np.random.rand(M, K)
    # P.fill(0.1)
    # Q.fill(0.1)

    nP, nQ = matrix_factorization(R, P, Q, K)

    # print(nP)
    # print(nQ)
    predicted = np.dot(nP[0], nQ[4])
    # predicted = np.dot(nP[0, :], nQ[4, :])
    # print('Predicted', predicted)



# my_reviews = movielens_extractor.get_ml_100K_dataset()
# my_reviews = movielens_extractor.clean_reviews(movielens_extractor.get_ml_100K_dataset())

# sgd = StochasticGradientDescent(2)

# sgd.load(reviews_matrix_5)

# print(build_user_index_map(test_reviews))
# print(build_item_index_map(test_reviews))
#
# print(create_matrix(test_reviews))
# print(create_matrix(test_reviews).todense())
# print(create_matrix(test_reviews).nonzero())
#
# print(create_matrix(reviews_matrix_5))
# print(create_matrix(reviews_matrix_5).todense())
# print(create_matrix(reviews_matrix_5).nonzero())
# print(sgd.predict(4, 0))

# start_time = time.time()
# test()
# end_time = time.time() - start_time
# print('Total time', end_time)
예제 #4
0

__author__ = 'fpena'


start_time = time.time()
# main()
# file_path = '/Users/fpena/tmp/filtered_reviews_multi.json'
# file_path = '/Users/fpena/tmp/filtered_reviews_multi_new.json'
# file_path = '/Users/fpena/tmp/filtered_reviews_multi_non_sparse_shuffled.json'
# file_path = '/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/all_reviews.json'
file_path = '/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/cleaned_reviews.json'
# reviews = ETLUtils.load_json_file(file_path)
# reviews = movielens_extractor.clean_reviews(movielens_extractor.get_ml_100K_dataset())
# reviews = extractor.pre_process_reviews()
reviews = movielens_extractor.get_ml_100K_dataset()

# shuffle(reviews)
# ETLUtils.save_json_file('/Users/fpena/tmp/filtered_reviews_multi_non_sparse_shuffled.json', reviews)
# print(reviews[0])
# print(reviews[1])
# print(reviews[2])
# print(reviews[10])
# print(reviews[100])
#
# for review in reviews:
#     print(review)


my_recommender_list = [
    # SingleCF(),
예제 #5
0
from etl.reviews_dataset_analyzer import ReviewsDatasetAnalyzer
from tripadvisor.fourcity.recommender_evaluator import evaluate_recommenders

__author__ = 'fpena'

start_time = time.time()
# main()
# file_path = '/Users/fpena/tmp/filtered_reviews_multi.json'
# file_path = '/Users/fpena/tmp/filtered_reviews_multi_new.json'
# file_path = '/Users/fpena/tmp/filtered_reviews_multi_non_sparse_shuffled.json'
# file_path = '/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/all_reviews.json'
file_path = '/Users/fpena/UCC/Thesis/datasets/TripHotelReviewXml/cleaned_reviews.json'
# reviews = ETLUtils.load_json_file(file_path)
# reviews = movielens_extractor.clean_reviews(movielens_extractor.get_ml_100K_dataset())
# reviews = extractor.pre_process_reviews()
reviews = movielens_extractor.get_ml_100K_dataset()

# shuffle(reviews)
# ETLUtils.save_json_file('/Users/fpena/tmp/filtered_reviews_multi_non_sparse_shuffled.json', reviews)
# print(reviews[0])
# print(reviews[1])
# print(reviews[2])
# print(reviews[10])
# print(reviews[100])
#
# for review in reviews:
#     print(review)

my_recommender_list = [
    # SingleCF(),
    # AdjustedWeightedSumRecommender(SingleSimilarityMatrixBuilder('euclidean')),