def get_ml_100K_dataset(): # records = ETLUtils.load_csv_file('/Users/fpena/tmp/bpmf/ml-1k.csv', '\t') records = ETLUtils.load_csv_file('/Users/fpena/tmp/bpmf/ml-100k.csv', '\t') # records = ETLUtils.load_csv_file('/Users/fpena/UCC/Thesis/datasets/uncompressed/ml-100k.csv', '\t') for record in records: record['overall_rating'] = float(record['overall_rating']) return records
def parse_dafevara_file(): ARTISTS_NAMES_FIELD = 'artists_names' folder = '/Users/fpena/tmp/dafevara/' file_path = folder + 'artists-names-by-userId.csv' records = ETLUtils.load_csv_file(file_path, '|') for record in records: artists = record[ARTISTS_NAMES_FIELD].replace(' ', '_') record[ARTISTS_NAMES_FIELD] = artists.replace(';', ' ') # print(record[ARTISTS_NAMES_FIELD]) output_file = folder + 'user_artists.txt' with open(output_file, 'w') as of: for record in records: of.write('%s\n' % record[ARTISTS_NAMES_FIELD])
def add_extra_column_to_csv(): csv_file_name = '/tmp/results/rival_yelp_restaurant_results_folds_4.csv' records = ETLUtils.load_csv_file(csv_file_name) with open(csv_file_name, 'r') as csvinput: reader = csv.reader(csvinput) headers = next(reader) index = headers.index('Evaluation_Set') + 1 headers.insert(index, Constants.FM_NUM_FACTORS_FIELD) print(headers) for record in records: record[Constants.FM_NUM_FACTORS_FIELD] = 10 ETLUtils.save_csv_file('/tmp/my_csv_file.csv', records, headers)
def export_results(fold): recommender = Constants.CARSKIT_RECOMMENDERS ratings_fold_folder = Constants.RIVAL_RATINGS_FOLD_FOLDER % fold prediction_type_map = { 'user_test': 'rating', 'test_items': 'rating', 'rel_plus_n': 'ranking' } prediction_type = prediction_type_map[Constants.RIVAL_EVALUATION_STRATEGY] # ratings_file = ratings_fold_folder + 'UserSplitting-BiasedMF-rating-predictions.txt' ratings_file = ratings_fold_folder + recommender + '-rating-predictions.txt' results_file = ratings_fold_folder + 'carskit_' + recommender +\ '_results_' + prediction_type + '.txt' records = ETLUtils.load_csv_file(ratings_file, '\t') predictions = [record['prediction'] for record in records] with open(results_file, 'w') as f: for prediction in predictions: f.write("%s\n" % prediction)
def get_ml_1m_dataset(): records = ETLUtils.load_csv_file('/Users/fpena/UCC/Thesis/datasets/uncompressed/ml-1m.csv', '|') for record in records: record['overall_rating'] = float(record['overall_rating']) return records
'ids': str }) k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True) # predicted_rating = svd.predict(int(5), 'A1', 1, 10) # predicted_rating2 = svd.predict(int(1), 'A1', 1, 10) # print('Predicted rating', predicted_rating) # print('Predicted rating', predicted_rating2) records = ETLUtils.load_csv_file(file_name_header, '|') errors = [] for record in records: try: # print(record['user'], record['item'], record['rating']) user = record['user'] item = int(record['item']) predicted_rating = svd.predict(item, user, 1, 5) print(record['user'], record['item'], predicted_rating) # predicted_rating = round(predicted_rating) actual_rating = svd.get_matrix().value(item, user) error = abs(predicted_rating - actual_rating) errors.append(error) except KeyError: continue
# svd.load_data(filename=file_name, sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) file_name = '/Users/fpena/tmp/reviews.csv' file_name_header = '/Users/fpena/tmp/reviews-header.csv' # file_name = '/Users/fpena/tmp/small-reviews-matrix.csv' # file_name_header = '/Users/fpena/tmp/small-reviews-header.csv' svd.load_data(filename=file_name, sep='|', format={'col':0, 'row':1, 'value':2, 'ids': str}) k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True) # predicted_rating = svd.predict(int(5), 'A1', 1, 10) # predicted_rating2 = svd.predict(int(1), 'A1', 1, 10) # print('Predicted rating', predicted_rating) # print('Predicted rating', predicted_rating2) records = ETLUtils.load_csv_file(file_name_header, '|') errors = [] for record in records: try: # print(record['user'], record['item'], record['rating']) user = record['user'] item = int(record['item']) predicted_rating = svd.predict(item, user, 1, 5) print(record['user'], record['item'], predicted_rating) # predicted_rating = round(predicted_rating) actual_rating = svd.get_matrix().value(item, user) error = abs(predicted_rating - actual_rating) errors.append(error) except KeyError: continue