Exemple #1
0
def calculate_predicted_ratings_based_on_user_similarity(ratings_list, users_similarity_list):
    user_ids_to_real_position = dict()
    movie_ids_to_real_position = dict()
    user_position = 0
    movie_position = 0
    for user_id, movie_id, rating in ratings_list:
        if user_id not in user_ids_to_real_position:
            user_ids_to_real_position[user_id] = user_position
            user_position += 1
        if movie_id not in movie_ids_to_real_position:
            movie_ids_to_real_position[movie_id] = movie_position
            movie_position += 1

    user_size = len(user_ids_to_real_position)
    movie_size = len(movie_ids_to_real_position)
    user_user_similarity_matrix = np.zeros((user_size, user_size))
    for user_id, compare_user_id, similarity in users_similarity_list:
        column_number = user_ids_to_real_position[user_id]
        row_number = user_ids_to_real_position[compare_user_id]
        user_user_similarity_matrix[row_number, column_number] = similarity

    items_users_ratings_matrix = np.zeros((movie_size, user_size))
    session = Session()
    ratings_list = session.execute("SELECT user_id, movie_id, rating FROM ratings")
    for user_id, movie_id, rating in ratings_list:
        column_number = user_ids_to_real_position[user_id]
        row_number = movie_ids_to_real_position[movie_id]
        items_users_ratings_matrix[row_number, column_number] = rating

    unnormalized_predicted_ratings = np.matmul(items_users_ratings_matrix, user_user_similarity_matrix)

    items_users_ratings_matrix_value_as_1 = np.nonzero(items_users_ratings_matrix)
    items_users_ratings_matrix[items_users_ratings_matrix_value_as_1] = 1

    absolute_sum_of_similarities = np.matmul(items_users_ratings_matrix, user_user_similarity_matrix)
    z = np.divide(unnormalized_predicted_ratings, absolute_sum_of_similarities)
    clear_rating_predictions_table()
    predictions = []
    progress = 0
    number_to_calculate = len(user_ids_to_real_position)
    for user_id, user_real_position in user_ids_to_real_position.items():
        for movie_id, movie_real_position in movie_ids_to_real_position.items():
            predicted_rating = round(z[movie_real_position, user_real_position], 1)
            if predicted_rating == 0 or np.isnan(predicted_rating):
                average_movie_rating = session.execute(
                    "SELECT average_rating FROM average_movie_rating WHERE movie_id = :param_movie_id",
                    {'param_movie_id': movie_id}).fetchone()
                predicted_rating = average_movie_rating[0]
            predictions.append(
                {'user_id': user_id, 'movie_id': movie_id,
                 'rating': predicted_rating})
        session.bulk_insert_mappings(RatingsPredictions, predictions)
        session.commit()
        predictions.clear()
        progress += 1
        print('Progress: ', round(100 * (progress / number_to_calculate), 2), '%')
Exemple #2
0
def calculate_users_similarity_for_all_users():
    session = Session()
    ratings_list = session.execute(
        "SELECT user_id, movie_id, rating FROM ratings")
    calculate_users_similarity(ratings_list)
Exemple #3
0
def calculate_users_similarity(ratings_list):
    user_ids_to_movie_ratings = dict()
    processed_ratings = 0
    for user_id, movie_id, rating in ratings_list:
        processed_ratings += 1
        if user_id in user_ids_to_movie_ratings:
            user_ids_to_movie_ratings[user_id][movie_id] = rating
        else:
            user_ids_to_movie_ratings[user_id] = dict([(movie_id, rating)])
        if processed_ratings % 100000 == 0:
            print('Mapping progress: ', processed_ratings / 1000, 'k')

    clear_users_similarity_table()
    session = Session()
    user_similarities_ready_to_save = []
    progress = 0
    number_to_calculate = len(user_ids_to_movie_ratings)
    similarity_range_factor = get_parameter("similarity_range_factor").value
    for compared_user_id, list_of_compared_user_ratings in user_ids_to_movie_ratings.items(
    ):
        inner_progress = 0
        for id_of_user_for_comparision, list_of_user_for_comparison_ratings in user_ids_to_movie_ratings.items(
        ):
            inner_progress += 1
            if inner_progress % 10000 == 0:
                print('Inner progress: ', inner_progress / 1000, 'k')
            if compared_user_id != id_of_user_for_comparision & id_of_user_for_comparision > compared_user_id:
                movie_ids_to_ratings_of_compared_user = dict()
                movie_ids_to_ratings_of_user_for_comparison = dict()
                prepare_vectors_for_comparison(
                    list_of_compared_user_ratings,
                    list_of_user_for_comparison_ratings,
                    movie_ids_to_ratings_of_compared_user,
                    movie_ids_to_ratings_of_user_for_comparison)
                normalized_rating_for_compared_user = calculate_normalized_rating_vector(
                    movie_ids_to_ratings_of_compared_user.values())
                normalized_rating_for_user_for_comparision = calculate_normalized_rating_vector(
                    movie_ids_to_ratings_of_user_for_comparison.values())
                users_similarity = round(
                    1 - cosine(normalized_rating_for_compared_user,
                               normalized_rating_for_user_for_comparision), 3)
                if isnan(users_similarity):
                    continue
                if users_similarity < similarity_range_factor:
                    continue
                user_similarities_ready_to_save.append({
                    "user_id":
                    compared_user_id,
                    'compare_user_id':
                    id_of_user_for_comparision,
                    'similarity':
                    users_similarity
                })
                user_similarities_ready_to_save.append({
                    "user_id":
                    id_of_user_for_comparision,
                    'compare_user_id':
                    compared_user_id,
                    'similarity':
                    users_similarity
                })
                session.execute(
                    "INSERT INTO users_similarity(user_id, compare_user_id, similarity) VALUES (:user_id, :compare_user_id, :similarity)",
                    {
                        'user_id': compared_user_id,
                        'compare_user_id': id_of_user_for_comparision,
                        'similarity': users_similarity
                    })
                session.execute(
                    "INSERT INTO users_similarity(user_id, compare_user_id, similarity) VALUES (:user_id, :compare_user_id, :similarity)",
                    {
                        'user_id': id_of_user_for_comparision,
                        'compare_user_id': compared_user_id,
                        'similarity': users_similarity
                    })
        session.commit()
        progress += 1
        print('Progress: ', round(100 * (progress / number_to_calculate), 2),
              '%')
Exemple #4
0
def calculate_all_predictions():
    session = Session()
    ratings_list = session.execute("SELECT user_id, movie_id, rating FROM ratings")
    users_similarity_list = session.execute("SELECT user_id, compare_user_id, similarity FROM users_similarity")
    calculate_predicted_ratings_based_on_user_similarity(ratings_list, users_similarity_list)
Exemple #5
0
def calculate_average_ratings_for_all_movies():
    session = Session()
    ratings = session.execute("SELECT movie_id, rating FROM ratings")
    calculate_average_rating(ratings)