* {userId: {movieId: rating}} * {movieId: {userId: rating}} """ urm = {} movie_ratings = {} with open('resources/train.csv', 'r') as urm_raw: reader = csv.reader(urm_raw) # create a nested dict; {userId: {movieId: rating}} for row in reader: if row[0] != 'userId': urm.setdefault(int(row[0]), {}).setdefault(int(row[1])) urm[int(row[0])][int(row[1])] = float(row[2]) movie_ratings.setdefault(int(row[1]), {}).setdefault(int(row[0])) movie_ratings[int(row[1])][int(row[0])] = float(row[2]) print(top_n_match(movie_ratings, 1, skr=10, similarity=cosine_sim)) time = datetime.now() movie_knn = {} user_knn = {} """ with open('resources/test.csv') as test_raw: reader = csv.reader(test_raw) # create a nested dict for test user for row in reader: if row[0] != 'userId': user_knn[int(row[0])] = top_n_match(urm, int(row[0]), skr=10, n=20, similarity=cosine_sim) print('fatto user: ' + row[0]) """ for movie in movie_ratings: movie_knn.setdefault(movie, {})
icm = {} with open('resources/icm.csv', 'r') as urm_raw: reader = csv.reader(urm_raw) # create a nested dict; {userId: {movieId: rating}} for row in reader: if row[0] != 'itemId': icm.setdefault(int(row[0]), {}).setdefault(int(row[1])) icm[int(row[0])][int(row[1])] = 1 print('computing right now') time = datetime.datetime.now() print(datetime.datetime.now() - time) knn = {} time = datetime.datetime.now() """ for movie in sorted(icm): knn[movie] = top_n_match(icm, movie, skr=5, n=25, similarity=item_sim) print('fatto movie: ' + str(movie)) print(len(knn)) """ with open('resources/knn_item_movie_25.csv', 'w') as knn_raw: fieldnames = ['itemId', 'neighborId', 'similarity'] w = csv.DictWriter(knn_raw, fieldnames=fieldnames) w.writeheader() for movie in sorted(icm): knn[movie] = top_n_match(icm, movie, skr=5, n=25, similarity=item_sim) print('fatto movie: ' + str(movie)) for other in knn[movie]: knn_raw.write(str(movie) + ',' + str(other) + ',' + str(knn[movie][other]) + '\n') print(datetime.datetime.now() - time)
urm = {} with open('resources/train.csv', 'r') as urm_raw: reader = csv.reader(urm_raw) # create a nested dict; {userId: {movieId: rating}} for row in reader: if row[0] != 'userId': urm.setdefault(int(row[1]), {}).setdefault(int(row[0])) urm[int(row[1])][int(row[0])] = float(row[2]) + item_bias[int(row[1])] + user_bias[int(row[0])] print('computing right now') time = datetime.datetime.now() print(datetime.datetime.now() - time) knn = {} time = datetime.datetime.now() """ for movie in sorted(icm): knn[movie] = top_n_match(icm, movie, skr=5, n=25, similarity=item_sim) print('fatto movie: ' + str(movie)) print(len(knn)) """ with open('resources/knn_movie_urm_25.csv', 'w') as knn_raw: fieldnames = ['itemId', 'neighborId', 'similarity'] w = csv.DictWriter(knn_raw, fieldnames=fieldnames) w.writeheader() for movie in sorted(urm): knn[movie] = top_n_match(urm, movie, skr=5, n=25, similarity=cosine_sim) print('fatto movie: ' + str(movie)) for other in knn[movie]: knn_raw.write(str(movie) + ',' + str(other) + ',' + str(knn[movie][other]) + '\n') print(datetime.datetime.now() - time)
rec = sorted(rankings, key=lambda x: -x[0])[0:5] return rec urm = {} with open('resources/train.csv', 'r') as urm_raw: reader = csv.reader(urm_raw) # create a nested dict; {userId: {movieId: rating}} for row in reader: if row[0] != 'userId': urm.setdefault(int(row[0]), {}).setdefault(int(row[1])) urm[int(row[0])][int(row[1])] = float(row[2]) print('computing right now') time = datetime.datetime.now() print(top_n_match(urm, 4, 7, similarity=similarity_pearson)) print(datetime.datetime.now() - time) test = {} time = datetime.datetime.now() with open('resources/test.csv') as test_raw: reader = csv.reader(test_raw) # create a nested dict for test user for row in reader: if row[0] != 'userId': test[int(row[0])] = top_n_match(urm, int(row[0]), skr=10, similarity=cosine_sim) print('fatto user: ' + row[0]) print(datetime.datetime.now() - time) print(len(test)) """