Esempio n. 1
0
def get_ml_100K_dataset():
    # records = ETLUtils.load_csv_file('/Users/fpena/tmp/bpmf/ml-1k.csv', '\t')
    records = ETLUtils.load_csv_file('/Users/fpena/tmp/bpmf/ml-100k.csv', '\t')
    # records = ETLUtils.load_csv_file('/Users/fpena/UCC/Thesis/datasets/uncompressed/ml-100k.csv', '\t')
    for record in records:
        record['overall_rating'] = float(record['overall_rating'])
    return records
Esempio n. 2
0
def parse_dafevara_file():

    ARTISTS_NAMES_FIELD = 'artists_names'
    folder = '/Users/fpena/tmp/dafevara/'
    file_path = folder + 'artists-names-by-userId.csv'
    records = ETLUtils.load_csv_file(file_path, '|')

    for record in records:
        artists = record[ARTISTS_NAMES_FIELD].replace(' ', '_')
        record[ARTISTS_NAMES_FIELD] = artists.replace(';', ' ')
        # print(record[ARTISTS_NAMES_FIELD])

    output_file = folder + 'user_artists.txt'
    with open(output_file, 'w') as of:
        for record in records:
            of.write('%s\n' % record[ARTISTS_NAMES_FIELD])
Esempio n. 3
0
def add_extra_column_to_csv():

    csv_file_name = '/tmp/results/rival_yelp_restaurant_results_folds_4.csv'

    records = ETLUtils.load_csv_file(csv_file_name)

    with open(csv_file_name, 'r') as csvinput:
        reader = csv.reader(csvinput)
        headers = next(reader)
        index = headers.index('Evaluation_Set') + 1
        headers.insert(index, Constants.FM_NUM_FACTORS_FIELD)

    print(headers)

    for record in records:
        record[Constants.FM_NUM_FACTORS_FIELD] = 10

    ETLUtils.save_csv_file('/tmp/my_csv_file.csv', records, headers)
Esempio n. 4
0
def export_results(fold):

    recommender = Constants.CARSKIT_RECOMMENDERS
    ratings_fold_folder = Constants.RIVAL_RATINGS_FOLD_FOLDER % fold
    prediction_type_map = {
        'user_test': 'rating',
        'test_items': 'rating',
        'rel_plus_n': 'ranking'
    }
    prediction_type = prediction_type_map[Constants.RIVAL_EVALUATION_STRATEGY]
    # ratings_file = ratings_fold_folder + 'UserSplitting-BiasedMF-rating-predictions.txt'
    ratings_file = ratings_fold_folder + recommender + '-rating-predictions.txt'
    results_file = ratings_fold_folder + 'carskit_' + recommender +\
                   '_results_' + prediction_type + '.txt'

    records = ETLUtils.load_csv_file(ratings_file, '\t')
    predictions = [record['prediction'] for record in records]

    with open(results_file, 'w') as f:
        for prediction in predictions:
            f.write("%s\n" % prediction)
Esempio n. 5
0
def export_results(fold):

    recommender = Constants.CARSKIT_RECOMMENDERS
    ratings_fold_folder = Constants.RIVAL_RATINGS_FOLD_FOLDER % fold
    prediction_type_map = {
        'user_test': 'rating',
        'test_items': 'rating',
        'rel_plus_n': 'ranking'
    }
    prediction_type = prediction_type_map[Constants.RIVAL_EVALUATION_STRATEGY]
    # ratings_file = ratings_fold_folder + 'UserSplitting-BiasedMF-rating-predictions.txt'
    ratings_file = ratings_fold_folder + recommender + '-rating-predictions.txt'
    results_file = ratings_fold_folder + 'carskit_' + recommender +\
                   '_results_' + prediction_type + '.txt'

    records = ETLUtils.load_csv_file(ratings_file, '\t')
    predictions = [record['prediction'] for record in records]

    with open(results_file, 'w') as f:
        for prediction in predictions:
            f.write("%s\n" % prediction)
Esempio n. 6
0
def get_ml_1m_dataset():
    records = ETLUtils.load_csv_file('/Users/fpena/UCC/Thesis/datasets/uncompressed/ml-1m.csv', '|')
    for record in records:
        record['overall_rating'] = float(record['overall_rating'])
    return records
Esempio n. 7
0
                  'ids': str
              })

k = 100
svd.compute(k=k,
            min_values=10,
            pre_normalize=None,
            mean_center=True,
            post_normalize=True)
# predicted_rating = svd.predict(int(5), 'A1', 1, 10)
# predicted_rating2 = svd.predict(int(1), 'A1', 1, 10)

# print('Predicted rating', predicted_rating)
# print('Predicted rating', predicted_rating2)

records = ETLUtils.load_csv_file(file_name_header, '|')
errors = []

for record in records:
    try:
        # print(record['user'], record['item'], record['rating'])
        user = record['user']
        item = int(record['item'])
        predicted_rating = svd.predict(item, user, 1, 5)
        print(record['user'], record['item'], predicted_rating)
        # predicted_rating = round(predicted_rating)
        actual_rating = svd.get_matrix().value(item, user)
        error = abs(predicted_rating - actual_rating)
        errors.append(error)
    except KeyError:
        continue
Esempio n. 8
0
# svd.load_data(filename=file_name, sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int})
file_name = '/Users/fpena/tmp/reviews.csv'
file_name_header = '/Users/fpena/tmp/reviews-header.csv'
# file_name = '/Users/fpena/tmp/small-reviews-matrix.csv'
# file_name_header = '/Users/fpena/tmp/small-reviews-header.csv'
svd.load_data(filename=file_name, sep='|', format={'col':0, 'row':1, 'value':2, 'ids': str})

k = 100
svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True)
# predicted_rating = svd.predict(int(5), 'A1', 1, 10)
# predicted_rating2 = svd.predict(int(1), 'A1', 1, 10)

# print('Predicted rating', predicted_rating)
# print('Predicted rating', predicted_rating2)

records = ETLUtils.load_csv_file(file_name_header, '|')
errors = []

for record in records:
    try:
        # print(record['user'], record['item'], record['rating'])
        user = record['user']
        item = int(record['item'])
        predicted_rating = svd.predict(item, user, 1, 5)
        print(record['user'], record['item'], predicted_rating)
        # predicted_rating = round(predicted_rating)
        actual_rating = svd.get_matrix().value(item, user)
        error = abs(predicted_rating - actual_rating)
        errors.append(error)
    except KeyError:
        continue