def create_average_ratings_for_movies_with_ratings(global_average_rating, minimum_number_of_ratings, movie_ids_to_ratings): session = Session() clear_average_rating_table() average_ratings = [] processed_movie = 0 for movie_id, list_of_ratings in movie_ids_to_ratings.items(): processed_movie += 1 number_of_ratings = len(list_of_ratings) if number_of_ratings < minimum_number_of_ratings: average_movie_rating_value = ( sum(list_of_ratings) + global_average_rating * (minimum_number_of_ratings - number_of_ratings) ) / minimum_number_of_ratings else: average_movie_rating_value = sum( list_of_ratings) / number_of_ratings average_ratings.append({ 'movie_id': movie_id, 'average_rating': round(average_movie_rating_value, 1) }) if processed_movie % 10000 == 0: print('Average calculation progress: ', processed_movie / 1000, 'k') session.bulk_insert_mappings(AverageMovieRating, average_ratings) session.commit()
def calculate_predicted_ratings_based_on_user_similarity(ratings_list, users_similarity_list): user_ids_to_real_position = dict() movie_ids_to_real_position = dict() user_position = 0 movie_position = 0 for user_id, movie_id, rating in ratings_list: if user_id not in user_ids_to_real_position: user_ids_to_real_position[user_id] = user_position user_position += 1 if movie_id not in movie_ids_to_real_position: movie_ids_to_real_position[movie_id] = movie_position movie_position += 1 user_size = len(user_ids_to_real_position) movie_size = len(movie_ids_to_real_position) user_user_similarity_matrix = np.zeros((user_size, user_size)) for user_id, compare_user_id, similarity in users_similarity_list: column_number = user_ids_to_real_position[user_id] row_number = user_ids_to_real_position[compare_user_id] user_user_similarity_matrix[row_number, column_number] = similarity items_users_ratings_matrix = np.zeros((movie_size, user_size)) session = Session() ratings_list = session.execute("SELECT user_id, movie_id, rating FROM ratings") for user_id, movie_id, rating in ratings_list: column_number = user_ids_to_real_position[user_id] row_number = movie_ids_to_real_position[movie_id] items_users_ratings_matrix[row_number, column_number] = rating unnormalized_predicted_ratings = np.matmul(items_users_ratings_matrix, user_user_similarity_matrix) items_users_ratings_matrix_value_as_1 = np.nonzero(items_users_ratings_matrix) items_users_ratings_matrix[items_users_ratings_matrix_value_as_1] = 1 absolute_sum_of_similarities = np.matmul(items_users_ratings_matrix, user_user_similarity_matrix) z = np.divide(unnormalized_predicted_ratings, absolute_sum_of_similarities) clear_rating_predictions_table() predictions = [] progress = 0 number_to_calculate = len(user_ids_to_real_position) for user_id, user_real_position in user_ids_to_real_position.items(): for movie_id, movie_real_position in movie_ids_to_real_position.items(): predicted_rating = round(z[movie_real_position, user_real_position], 1) if predicted_rating == 0 or np.isnan(predicted_rating): average_movie_rating = session.execute( "SELECT average_rating FROM average_movie_rating WHERE movie_id = :param_movie_id", {'param_movie_id': movie_id}).fetchone() predicted_rating = average_movie_rating[0] predictions.append( {'user_id': user_id, 'movie_id': movie_id, 'rating': predicted_rating}) session.bulk_insert_mappings(RatingsPredictions, predictions) session.commit() predictions.clear() progress += 1 print('Progress: ', round(100 * (progress / number_to_calculate), 2), '%')
def save_parameter(description, value): session = Session() management = session.query(Management).filter( Management.description == description).first() if management is None: management = Management(description, value) else: management.value = value session.add(management) # commit the record the database session.commit()
import sys import numpy as np import progressbar from dataAccess import Session, Ratings from dataAccess.entities import RatingsPredictionsBySVD, Sample session = Session() session.query(RatingsPredictionsBySVD).delete() session.commit() # Remove limit to use all ratings ratings_list = session.query(Ratings).limit(1000).all() # ratings_list = session.query(Ratings).all() samples = session.query(Sample).all() user_to_index = {} movie_to_index = {} for rating in ratings_list: if rating.user_id not in user_to_index: user_to_index[rating.user_id] = len(user_to_index) if rating.movie_id not in movie_to_index: movie_to_index[rating.movie_id] = len(movie_to_index) for sample in samples: if sample.user_id not in user_to_index: user_to_index[sample.user_id] = len(user_to_index) if sample.movie_id not in movie_to_index:
def clear_users_similarity_table(): session = Session() session.query(UsersSimilarity).delete() session.commit()
def calculate_users_similarity(ratings_list): user_ids_to_movie_ratings = dict() processed_ratings = 0 for user_id, movie_id, rating in ratings_list: processed_ratings += 1 if user_id in user_ids_to_movie_ratings: user_ids_to_movie_ratings[user_id][movie_id] = rating else: user_ids_to_movie_ratings[user_id] = dict([(movie_id, rating)]) if processed_ratings % 100000 == 0: print('Mapping progress: ', processed_ratings / 1000, 'k') clear_users_similarity_table() session = Session() user_similarities_ready_to_save = [] progress = 0 number_to_calculate = len(user_ids_to_movie_ratings) similarity_range_factor = get_parameter("similarity_range_factor").value for compared_user_id, list_of_compared_user_ratings in user_ids_to_movie_ratings.items( ): inner_progress = 0 for id_of_user_for_comparision, list_of_user_for_comparison_ratings in user_ids_to_movie_ratings.items( ): inner_progress += 1 if inner_progress % 10000 == 0: print('Inner progress: ', inner_progress / 1000, 'k') if compared_user_id != id_of_user_for_comparision & id_of_user_for_comparision > compared_user_id: movie_ids_to_ratings_of_compared_user = dict() movie_ids_to_ratings_of_user_for_comparison = dict() prepare_vectors_for_comparison( list_of_compared_user_ratings, list_of_user_for_comparison_ratings, movie_ids_to_ratings_of_compared_user, movie_ids_to_ratings_of_user_for_comparison) normalized_rating_for_compared_user = calculate_normalized_rating_vector( movie_ids_to_ratings_of_compared_user.values()) normalized_rating_for_user_for_comparision = calculate_normalized_rating_vector( movie_ids_to_ratings_of_user_for_comparison.values()) users_similarity = round( 1 - cosine(normalized_rating_for_compared_user, normalized_rating_for_user_for_comparision), 3) if isnan(users_similarity): continue if users_similarity < similarity_range_factor: continue user_similarities_ready_to_save.append({ "user_id": compared_user_id, 'compare_user_id': id_of_user_for_comparision, 'similarity': users_similarity }) user_similarities_ready_to_save.append({ "user_id": id_of_user_for_comparision, 'compare_user_id': compared_user_id, 'similarity': users_similarity }) session.execute( "INSERT INTO users_similarity(user_id, compare_user_id, similarity) VALUES (:user_id, :compare_user_id, :similarity)", { 'user_id': compared_user_id, 'compare_user_id': id_of_user_for_comparision, 'similarity': users_similarity }) session.execute( "INSERT INTO users_similarity(user_id, compare_user_id, similarity) VALUES (:user_id, :compare_user_id, :similarity)", { 'user_id': id_of_user_for_comparision, 'compare_user_id': compared_user_id, 'similarity': users_similarity }) session.commit() progress += 1 print('Progress: ', round(100 * (progress / number_to_calculate), 2), '%')
def clear_rating_predictions_table(): session = Session() session.query(RatingsPredictions).delete() session.commit()
def clear_average_rating_table(): session = Session() session.query(AverageMovieRating).delete() session.commit()