def calculate_predicted_ratings_based_on_user_similarity(ratings_list, users_similarity_list): user_ids_to_real_position = dict() movie_ids_to_real_position = dict() user_position = 0 movie_position = 0 for user_id, movie_id, rating in ratings_list: if user_id not in user_ids_to_real_position: user_ids_to_real_position[user_id] = user_position user_position += 1 if movie_id not in movie_ids_to_real_position: movie_ids_to_real_position[movie_id] = movie_position movie_position += 1 user_size = len(user_ids_to_real_position) movie_size = len(movie_ids_to_real_position) user_user_similarity_matrix = np.zeros((user_size, user_size)) for user_id, compare_user_id, similarity in users_similarity_list: column_number = user_ids_to_real_position[user_id] row_number = user_ids_to_real_position[compare_user_id] user_user_similarity_matrix[row_number, column_number] = similarity items_users_ratings_matrix = np.zeros((movie_size, user_size)) session = Session() ratings_list = session.execute("SELECT user_id, movie_id, rating FROM ratings") for user_id, movie_id, rating in ratings_list: column_number = user_ids_to_real_position[user_id] row_number = movie_ids_to_real_position[movie_id] items_users_ratings_matrix[row_number, column_number] = rating unnormalized_predicted_ratings = np.matmul(items_users_ratings_matrix, user_user_similarity_matrix) items_users_ratings_matrix_value_as_1 = np.nonzero(items_users_ratings_matrix) items_users_ratings_matrix[items_users_ratings_matrix_value_as_1] = 1 absolute_sum_of_similarities = np.matmul(items_users_ratings_matrix, user_user_similarity_matrix) z = np.divide(unnormalized_predicted_ratings, absolute_sum_of_similarities) clear_rating_predictions_table() predictions = [] progress = 0 number_to_calculate = len(user_ids_to_real_position) for user_id, user_real_position in user_ids_to_real_position.items(): for movie_id, movie_real_position in movie_ids_to_real_position.items(): predicted_rating = round(z[movie_real_position, user_real_position], 1) if predicted_rating == 0 or np.isnan(predicted_rating): average_movie_rating = session.execute( "SELECT average_rating FROM average_movie_rating WHERE movie_id = :param_movie_id", {'param_movie_id': movie_id}).fetchone() predicted_rating = average_movie_rating[0] predictions.append( {'user_id': user_id, 'movie_id': movie_id, 'rating': predicted_rating}) session.bulk_insert_mappings(RatingsPredictions, predictions) session.commit() predictions.clear() progress += 1 print('Progress: ', round(100 * (progress / number_to_calculate), 2), '%')
def calculate_users_similarity_for_all_users(): session = Session() ratings_list = session.execute( "SELECT user_id, movie_id, rating FROM ratings") calculate_users_similarity(ratings_list)
def calculate_users_similarity(ratings_list): user_ids_to_movie_ratings = dict() processed_ratings = 0 for user_id, movie_id, rating in ratings_list: processed_ratings += 1 if user_id in user_ids_to_movie_ratings: user_ids_to_movie_ratings[user_id][movie_id] = rating else: user_ids_to_movie_ratings[user_id] = dict([(movie_id, rating)]) if processed_ratings % 100000 == 0: print('Mapping progress: ', processed_ratings / 1000, 'k') clear_users_similarity_table() session = Session() user_similarities_ready_to_save = [] progress = 0 number_to_calculate = len(user_ids_to_movie_ratings) similarity_range_factor = get_parameter("similarity_range_factor").value for compared_user_id, list_of_compared_user_ratings in user_ids_to_movie_ratings.items( ): inner_progress = 0 for id_of_user_for_comparision, list_of_user_for_comparison_ratings in user_ids_to_movie_ratings.items( ): inner_progress += 1 if inner_progress % 10000 == 0: print('Inner progress: ', inner_progress / 1000, 'k') if compared_user_id != id_of_user_for_comparision & id_of_user_for_comparision > compared_user_id: movie_ids_to_ratings_of_compared_user = dict() movie_ids_to_ratings_of_user_for_comparison = dict() prepare_vectors_for_comparison( list_of_compared_user_ratings, list_of_user_for_comparison_ratings, movie_ids_to_ratings_of_compared_user, movie_ids_to_ratings_of_user_for_comparison) normalized_rating_for_compared_user = calculate_normalized_rating_vector( movie_ids_to_ratings_of_compared_user.values()) normalized_rating_for_user_for_comparision = calculate_normalized_rating_vector( movie_ids_to_ratings_of_user_for_comparison.values()) users_similarity = round( 1 - cosine(normalized_rating_for_compared_user, normalized_rating_for_user_for_comparision), 3) if isnan(users_similarity): continue if users_similarity < similarity_range_factor: continue user_similarities_ready_to_save.append({ "user_id": compared_user_id, 'compare_user_id': id_of_user_for_comparision, 'similarity': users_similarity }) user_similarities_ready_to_save.append({ "user_id": id_of_user_for_comparision, 'compare_user_id': compared_user_id, 'similarity': users_similarity }) session.execute( "INSERT INTO users_similarity(user_id, compare_user_id, similarity) VALUES (:user_id, :compare_user_id, :similarity)", { 'user_id': compared_user_id, 'compare_user_id': id_of_user_for_comparision, 'similarity': users_similarity }) session.execute( "INSERT INTO users_similarity(user_id, compare_user_id, similarity) VALUES (:user_id, :compare_user_id, :similarity)", { 'user_id': id_of_user_for_comparision, 'compare_user_id': compared_user_id, 'similarity': users_similarity }) session.commit() progress += 1 print('Progress: ', round(100 * (progress / number_to_calculate), 2), '%')
def calculate_all_predictions(): session = Session() ratings_list = session.execute("SELECT user_id, movie_id, rating FROM ratings") users_similarity_list = session.execute("SELECT user_id, compare_user_id, similarity FROM users_similarity") calculate_predicted_ratings_based_on_user_similarity(ratings_list, users_similarity_list)
def calculate_average_ratings_for_all_movies(): session = Session() ratings = session.execute("SELECT movie_id, rating FROM ratings") calculate_average_rating(ratings)