Exemple #1
0
def create_average_ratings_for_movies_with_ratings(global_average_rating,
                                                   minimum_number_of_ratings,
                                                   movie_ids_to_ratings):
    session = Session()
    clear_average_rating_table()
    average_ratings = []
    processed_movie = 0
    for movie_id, list_of_ratings in movie_ids_to_ratings.items():
        processed_movie += 1
        number_of_ratings = len(list_of_ratings)
        if number_of_ratings < minimum_number_of_ratings:
            average_movie_rating_value = (
                sum(list_of_ratings) + global_average_rating *
                (minimum_number_of_ratings - number_of_ratings)
            ) / minimum_number_of_ratings
        else:
            average_movie_rating_value = sum(
                list_of_ratings) / number_of_ratings
        average_ratings.append({
            'movie_id':
            movie_id,
            'average_rating':
            round(average_movie_rating_value, 1)
        })
        if processed_movie % 10000 == 0:
            print('Average calculation progress: ', processed_movie / 1000,
                  'k')
    session.bulk_insert_mappings(AverageMovieRating, average_ratings)
    session.commit()
Exemple #2
0
def calculate_predicted_ratings_based_on_user_similarity(ratings_list, users_similarity_list):
    user_ids_to_real_position = dict()
    movie_ids_to_real_position = dict()
    user_position = 0
    movie_position = 0
    for user_id, movie_id, rating in ratings_list:
        if user_id not in user_ids_to_real_position:
            user_ids_to_real_position[user_id] = user_position
            user_position += 1
        if movie_id not in movie_ids_to_real_position:
            movie_ids_to_real_position[movie_id] = movie_position
            movie_position += 1

    user_size = len(user_ids_to_real_position)
    movie_size = len(movie_ids_to_real_position)
    user_user_similarity_matrix = np.zeros((user_size, user_size))
    for user_id, compare_user_id, similarity in users_similarity_list:
        column_number = user_ids_to_real_position[user_id]
        row_number = user_ids_to_real_position[compare_user_id]
        user_user_similarity_matrix[row_number, column_number] = similarity

    items_users_ratings_matrix = np.zeros((movie_size, user_size))
    session = Session()
    ratings_list = session.execute("SELECT user_id, movie_id, rating FROM ratings")
    for user_id, movie_id, rating in ratings_list:
        column_number = user_ids_to_real_position[user_id]
        row_number = movie_ids_to_real_position[movie_id]
        items_users_ratings_matrix[row_number, column_number] = rating

    unnormalized_predicted_ratings = np.matmul(items_users_ratings_matrix, user_user_similarity_matrix)

    items_users_ratings_matrix_value_as_1 = np.nonzero(items_users_ratings_matrix)
    items_users_ratings_matrix[items_users_ratings_matrix_value_as_1] = 1

    absolute_sum_of_similarities = np.matmul(items_users_ratings_matrix, user_user_similarity_matrix)
    z = np.divide(unnormalized_predicted_ratings, absolute_sum_of_similarities)
    clear_rating_predictions_table()
    predictions = []
    progress = 0
    number_to_calculate = len(user_ids_to_real_position)
    for user_id, user_real_position in user_ids_to_real_position.items():
        for movie_id, movie_real_position in movie_ids_to_real_position.items():
            predicted_rating = round(z[movie_real_position, user_real_position], 1)
            if predicted_rating == 0 or np.isnan(predicted_rating):
                average_movie_rating = session.execute(
                    "SELECT average_rating FROM average_movie_rating WHERE movie_id = :param_movie_id",
                    {'param_movie_id': movie_id}).fetchone()
                predicted_rating = average_movie_rating[0]
            predictions.append(
                {'user_id': user_id, 'movie_id': movie_id,
                 'rating': predicted_rating})
        session.bulk_insert_mappings(RatingsPredictions, predictions)
        session.commit()
        predictions.clear()
        progress += 1
        print('Progress: ', round(100 * (progress / number_to_calculate), 2), '%')
        user_features[:, rank] /= np.linalg.norm(user_features[:, rank])
        movie_features[:, rank] /= np.linalg.norm(movie_features[:, rank])

    return user_features, singular_values, movie_features


def get_predicted_rating(S, V, U, user, item):
    values = []
    for r in range(len(S)):
        values.append(U[user_to_index[user], r] * S[r] *
                      V[movie_to_index[item], r])
    return sum(values)


U, S, V = train()
ratings = []

for sample in progressbar.progressbar(samples):
    user_id = sample.user_id
    movie_id = sample.movie_id
    predicted_rating = get_predicted_rating(S, V, U, user_id, movie_id)
    ratings.append({
        'user_id': user_id,
        'movie_id': movie_id,
        'rating': round(predicted_rating, 2)
    })

print("Writing to DB...")
session.bulk_insert_mappings(RatingsPredictionsBySVD, ratings)
session.commit()