def run_rmse_test_wrapper(args):
    try:
        return recommender_evaluator.perform_cross_validation(*args)
    except Exception as e:
        print('Caught exception in worker thread')

        # This prints the type, value, and stack trace of the
        # current exception being handled.
        traceback.print_exc()

        print()
        raise e
def run_rmse_test_wrapper(args):
    try:
        return recommender_evaluator.perform_cross_validation(*args)
    except Exception as e:
        print('Caught exception in worker thread')

        # This prints the type, value, and stack trace of the
        # current exception being handled.
        traceback.print_exc()

        print()
        raise e
Ejemplo n.º 3
0
def run_rmse_test(records_file,
                  recommenders,
                  binary_reviews_file,
                  reviews_type=None):

    records = load_records(records_file)
    # records = extractor.remove_users_with_low_reviews(records, 2)
    with open(binary_reviews_file, 'rb') as read_file:
        binary_reviews = pickle.load(read_file)

    if len(records) != len(binary_reviews):
        raise ValueError("The records and reviews should have the same length")

    num_folds = 5

    dataset_info_map = {}
    dataset_info_map['dataset'] = records_file.split('/')[-1]
    dataset_info_map['cache_reviews'] = binary_reviews_file.split('/')[-1]
    dataset_info_map['num_records'] = len(records)
    dataset_info_map['reviews_type'] = reviews_type
    dataset_info_map['cross_validation_folds'] = num_folds

    results_list = []
    results_log_list = []
    count = 0
    print('Total recommenders: %d' % (len(recommenders)))

    for recommender in recommenders:

        print('\n**************\n%d/%d\n**************' %
              (count, len(recommenders)))
        results = recommender_evaluator.perform_cross_validation(
            records, recommender, num_folds, binary_reviews, reviews_type)

        results_list.append(results)

        remaining_time = results['Execution time'] * (len(recommenders) -
                                                      count)
        remaining_time /= 3600
        print('Estimated remaining time: %.2f hours' % remaining_time)
        count += 1

    for recommender, results in zip(recommenders, results_list):
        results_log_list.append(
            process_rmse_results(recommender, results, dataset_info_map))

    timestamp = time.strftime("%Y%m%d-%H%M%S")
    file_name = 'recommender-rmse-results' + timestamp

    ETLUtils.save_csv_file(file_name + '.csv', results_log_list, RMSE_HEADERS,
                           '\t')
Ejemplo n.º 4
0
def run_rmse_test(
        records_file, recommenders, binary_reviews_file, reviews_type=None):

    records = load_records(records_file)
    # records = extractor.remove_users_with_low_reviews(records, 2)
    with open(binary_reviews_file, 'rb') as read_file:
        binary_reviews = pickle.load(read_file)

    if len(records) != len(binary_reviews):
        raise ValueError("The records and reviews should have the same length")

    num_folds = 5

    dataset_info_map = {}
    dataset_info_map['dataset'] = records_file.split('/')[-1]
    dataset_info_map['cache_reviews'] = binary_reviews_file.split('/')[-1]
    dataset_info_map['num_records'] = len(records)
    dataset_info_map['reviews_type'] = reviews_type
    dataset_info_map['cross_validation_folds'] = num_folds

    results_list = []
    results_log_list = []
    count = 0
    print('Total recommenders: %d' % (len(recommenders)))

    for recommender in recommenders:

        print('\n**************\n%d/%d\n**************' %
              (count, len(recommenders)))
        results = recommender_evaluator.perform_cross_validation(
            records, recommender, num_folds, binary_reviews, reviews_type)

        results_list.append(results)

        remaining_time = results['Execution time'] * (len(recommenders) - count)
        remaining_time /= 3600
        print('Estimated remaining time: %.2f hours' % remaining_time)
        count += 1

    for recommender, results in zip(recommenders, results_list):
        results_log_list.append(process_rmse_results(recommender, results, dataset_info_map))

    timestamp = time.strftime("%Y%m%d-%H%M%S")
    file_name = 'recommender-rmse-results' + timestamp

    ETLUtils.save_csv_file(file_name + '.csv', results_log_list, RMSE_HEADERS, '\t')
Ejemplo n.º 5
0
def main():
    # reviews_file = "/Users/fpena/tmp/yelp_training_set/yelp_training_set_review_hotels.json"
    reviews_file = "/Users/fpena/UCC/Thesis/datasets/context/yelp_training_set_review_hotels_shuffled.json"
    # reviews_file = "/Users/fpena/UCC/Thesis/datasets/context/yelp_training_set_review_restaurants_shuffled.json"
    # my_records = context_utils.load_reviews(reviews_file)
    my_records = load_data(reviews_file)
    print("records:", len(my_records))
    my_num_topics = 150

    print("\n***************************\n")

    # my_records = load_data(reviews_file)
    # my_records = extractor.remove_users_with_low_reviews(my_records, 200)
    # my_records = extractor.remove_users_with_low_reviews(my_records, 2)
    # shuffle(my_records)

    # my_index = 0
    # my_reviews = []
    # for record in my_records:
    #     my_index += 1
    #     my_reviews.append(Review(record))
    #     print('index', my_index)
    # my_file = '/Users/fpena/UCC/Thesis/datasets/context/reviews_context_restaurants_200.pkl'
    # my_file = '/Users/fpena/UCC/Thesis/datasets/context/reviews_context_hotel_2.pkl'
    my_file = '/Users/fpena/tmp/reviews_hotel_shuffled.pkl'
    # my_file = '/Users/fpena/tmp/reviews_restaurant_shuffled.pkl'
    # with open(my_file, 'wb') as write_file:
    #     pickle.dump(my_reviews, write_file, pickle.HIGHEST_PROTOCOL)

    with open(my_file, 'rb') as read_file:
        my_cache_reviews = pickle.load(read_file)

    print("reviews:", len(my_cache_reviews))
    context_knn = ContextKnn(my_num_topics)

    tknc = TopKNeighbourhoodCalculator()
    nc = ContextNeighbourhoodCalculator()
    ncc = NeighbourContributionCalculator()
    ubc = UserBaselineCalculator()
    usc = PBCSimilarityCalculator()

    # contextual_knn2 = ContextualKNN(my_num_topics, tknc, ncc, ubc, usc, my_reviews)

    bnc = BasicNeighbourhoodCalculator()
    bncc = BasicNeighbourContributionCalculator()
    bubc = BasicUserBaselineCalculator()
    busc = BasicUserSimilarityCalculator()

    snc = SimpleNeighbourhoodCalculator()
    chnc = ContextHybridNeighbourhoodCalculator()

    contextual_knn = ContextualKNN(my_num_topics, nc, ncc, ubc, usc, has_context=True)
    contextual_knn2 = ContextualKNN(my_num_topics, nc, ncc, ubc, busc, has_context=True)
    contextual_knn3 = ContextualKNN(my_num_topics, bnc, bncc, bubc, busc)
    # basic_contextual_knn = BasicContextualKNN(my_num_topics, bnc, bncc, bubc, busc)

    # contextual_knn.threshold1 = 0.8
    # contextual_knn.threshold2 = 0.8
    # contextual_knn.threshold3 = 0.8
    # contextual_knn.threshold4 = 0.8

    # print('Context KNN')
    # context_knn.load(my_records)
    # recommender_evaluator.perform_cross_validation(my_records, context_knn, 5, True)
    basic_knn_rec = BasicKNN(None)
    # print('Basic KNN')
    # recommender_evaluator.perform_cross_validation(my_records, basic_knn_rec, 5)
    # print('Context KNN')
    # recommender_evaluator.perform_cross_validation(my_records, context_knn, 5, my_cache_reviews)
    print('Contextual KNN')
    recommender_evaluator.perform_cross_validation(my_records, contextual_knn, 5, my_cache_reviews)
    print('Contextual KNN2')
    recommender_evaluator.perform_cross_validation(my_records, contextual_knn2, 5, my_cache_reviews)
    print('Contextual KNN3')
    # recommender_evaluator.perform_cross_validation(my_records, contextual_knn3, 5)
    # recommender_evaluator.perform_cross_validation(my_records, contextual_knn3, 5)
    # recommender_evaluator.perform_cross_validation(my_records, contextual_knn3, 5, True, my_cache_reviews)
    # precision_in_top_n.calculate_recall_in_top_n(my_records, basic_contextual_knn, 10, 65)
    # precision_in_top_n.calculate_recall_in_top_n(my_records, basic_knn_rec, 10, 65)
    print('Basic KNN')
    precision_in_top_n.calculate_recall_in_top_n(my_records, basic_knn_rec, 10, 5, 5.0, my_cache_reviews)
    # precision_in_top_n.calculate_recall_in_top_n(my_records, basic_knn_rec, 10, 5, 5.0, False, my_cache_reviews)
    print('Context KNN')
    precision_in_top_n.calculate_recall_in_top_n(my_records, context_knn, 10, 5, 5.0, my_cache_reviews)
    print('Contextual KNN')
    precision_in_top_n.calculate_recall_in_top_n(my_records, contextual_knn, 10, 5, 5.0, my_cache_reviews)
    print('Contextual KNN 2')
    precision_in_top_n.calculate_recall_in_top_n(my_records, contextual_knn2, 10, 5, 5.0, my_cache_reviews)
    print('Contextual KNN 3')
    precision_in_top_n.calculate_recall_in_top_n(my_records, contextual_knn3, 10, 5, 5.0, my_cache_reviews)