예제 #1
0
def calculate_and_save_global_average_rating():
    session = Session()
    # calculate average rating from table ratings
    global_average_rating = round(
        session.query(Ratings).value(func.avg(Ratings.rating)), 3)
    # Save average_rating to DB
    save_parameter("average_rating", global_average_rating)
    return global_average_rating
예제 #2
0
def get_top_scored_movies(number_of_movies):
    top_scored_movies = []
    session = Session()
    top_scores = session.query(AverageMovieRating).order_by(
        AverageMovieRating.average_rating.desc()).limit(number_of_movies)

    for instance in top_scores:
        movie = session.query(Movies).filter(
            Movies.movie_id == instance.movie_id).first()
        top_scored_movies.append(movie)
    return top_scored_movies
예제 #3
0
def calculate_predicted_ratings_based_on_user_similarity(ratings_list, users_similarity_list):
    user_ids_to_real_position = dict()
    movie_ids_to_real_position = dict()
    user_position = 0
    movie_position = 0
    for user_id, movie_id, rating in ratings_list:
        if user_id not in user_ids_to_real_position:
            user_ids_to_real_position[user_id] = user_position
            user_position += 1
        if movie_id not in movie_ids_to_real_position:
            movie_ids_to_real_position[movie_id] = movie_position
            movie_position += 1

    user_size = len(user_ids_to_real_position)
    movie_size = len(movie_ids_to_real_position)
    user_user_similarity_matrix = np.zeros((user_size, user_size))
    for user_id, compare_user_id, similarity in users_similarity_list:
        column_number = user_ids_to_real_position[user_id]
        row_number = user_ids_to_real_position[compare_user_id]
        user_user_similarity_matrix[row_number, column_number] = similarity

    items_users_ratings_matrix = np.zeros((movie_size, user_size))
    session = Session()
    ratings_list = session.execute("SELECT user_id, movie_id, rating FROM ratings")
    for user_id, movie_id, rating in ratings_list:
        column_number = user_ids_to_real_position[user_id]
        row_number = movie_ids_to_real_position[movie_id]
        items_users_ratings_matrix[row_number, column_number] = rating

    unnormalized_predicted_ratings = np.matmul(items_users_ratings_matrix, user_user_similarity_matrix)

    items_users_ratings_matrix_value_as_1 = np.nonzero(items_users_ratings_matrix)
    items_users_ratings_matrix[items_users_ratings_matrix_value_as_1] = 1

    absolute_sum_of_similarities = np.matmul(items_users_ratings_matrix, user_user_similarity_matrix)
    z = np.divide(unnormalized_predicted_ratings, absolute_sum_of_similarities)
    clear_rating_predictions_table()
    predictions = []
    progress = 0
    number_to_calculate = len(user_ids_to_real_position)
    for user_id, user_real_position in user_ids_to_real_position.items():
        for movie_id, movie_real_position in movie_ids_to_real_position.items():
            predicted_rating = round(z[movie_real_position, user_real_position], 1)
            if predicted_rating == 0 or np.isnan(predicted_rating):
                average_movie_rating = session.execute(
                    "SELECT average_rating FROM average_movie_rating WHERE movie_id = :param_movie_id",
                    {'param_movie_id': movie_id}).fetchone()
                predicted_rating = average_movie_rating[0]
            predictions.append(
                {'user_id': user_id, 'movie_id': movie_id,
                 'rating': predicted_rating})
        session.bulk_insert_mappings(RatingsPredictions, predictions)
        session.commit()
        predictions.clear()
        progress += 1
        print('Progress: ', round(100 * (progress / number_to_calculate), 2), '%')
예제 #4
0
def create_average_ratings_for_movies_with_ratings(global_average_rating,
                                                   minimum_number_of_ratings,
                                                   movie_ids_to_ratings):
    session = Session()
    clear_average_rating_table()
    average_ratings = []
    processed_movie = 0
    for movie_id, list_of_ratings in movie_ids_to_ratings.items():
        processed_movie += 1
        number_of_ratings = len(list_of_ratings)
        if number_of_ratings < minimum_number_of_ratings:
            average_movie_rating_value = (
                sum(list_of_ratings) + global_average_rating *
                (minimum_number_of_ratings - number_of_ratings)
            ) / minimum_number_of_ratings
        else:
            average_movie_rating_value = sum(
                list_of_ratings) / number_of_ratings
        average_ratings.append({
            'movie_id':
            movie_id,
            'average_rating':
            round(average_movie_rating_value, 1)
        })
        if processed_movie % 10000 == 0:
            print('Average calculation progress: ', processed_movie / 1000,
                  'k')
    session.bulk_insert_mappings(AverageMovieRating, average_ratings)
    session.commit()
예제 #5
0
def save_parameter(description, value):
    session = Session()
    management = session.query(Management).filter(
        Management.description == description).first()
    if management is None:
        management = Management(description, value)
    else:
        management.value = value
    session.add(management)
    # commit the record the database
    session.commit()
예제 #6
0
import sys
import csv

import numpy as np
import progressbar

from dataAccess import Session, Ratings

session = Session()
# Remove limit to use all ratings
ratings_list = session.query(Ratings).limit(10000).all()

user_to_index = {}
for rating in ratings_list:
    if rating.user_id not in user_to_index:
        user_to_index[rating.user_id] = len(user_to_index)

movie_to_index = {}
for rating in ratings_list:
    if rating.movie_id not in movie_to_index:
        movie_to_index[rating.movie_id] = len(movie_to_index)


class Residual:

    __slots__ = ('value', 'current_error', 'prev_error')

    def __init__(self, value, current_error, prev_error):
        self.value = value
        self.current_error = current_error
        self.prev_error = prev_error
예제 #7
0
import sys

import numpy as np
import progressbar

from dataAccess import Session, Ratings
from dataAccess.entities import RatingsPredictionsBySVD, Sample

session = Session()

session.query(RatingsPredictionsBySVD).delete()
session.commit()

# Remove limit to use all ratings
ratings_list = session.query(Ratings).limit(1000).all()
# ratings_list = session.query(Ratings).all()
samples = session.query(Sample).all()

user_to_index = {}
movie_to_index = {}

for rating in ratings_list:
    if rating.user_id not in user_to_index:
        user_to_index[rating.user_id] = len(user_to_index)
    if rating.movie_id not in movie_to_index:
        movie_to_index[rating.movie_id] = len(movie_to_index)

for sample in samples:
    if sample.user_id not in user_to_index:
        user_to_index[sample.user_id] = len(user_to_index)
    if sample.movie_id not in movie_to_index:
예제 #8
0
def get_parameter(description):
    session = Session()
    return session.query(Management).filter(
        Management.description == description).first()
예제 #9
0
def clear_users_similarity_table():
    session = Session()
    session.query(UsersSimilarity).delete()
    session.commit()
예제 #10
0
def calculate_users_similarity_for_all_users():
    session = Session()
    ratings_list = session.execute(
        "SELECT user_id, movie_id, rating FROM ratings")
    calculate_users_similarity(ratings_list)
예제 #11
0
def calculate_users_similarity(ratings_list):
    user_ids_to_movie_ratings = dict()
    processed_ratings = 0
    for user_id, movie_id, rating in ratings_list:
        processed_ratings += 1
        if user_id in user_ids_to_movie_ratings:
            user_ids_to_movie_ratings[user_id][movie_id] = rating
        else:
            user_ids_to_movie_ratings[user_id] = dict([(movie_id, rating)])
        if processed_ratings % 100000 == 0:
            print('Mapping progress: ', processed_ratings / 1000, 'k')

    clear_users_similarity_table()
    session = Session()
    user_similarities_ready_to_save = []
    progress = 0
    number_to_calculate = len(user_ids_to_movie_ratings)
    similarity_range_factor = get_parameter("similarity_range_factor").value
    for compared_user_id, list_of_compared_user_ratings in user_ids_to_movie_ratings.items(
    ):
        inner_progress = 0
        for id_of_user_for_comparision, list_of_user_for_comparison_ratings in user_ids_to_movie_ratings.items(
        ):
            inner_progress += 1
            if inner_progress % 10000 == 0:
                print('Inner progress: ', inner_progress / 1000, 'k')
            if compared_user_id != id_of_user_for_comparision & id_of_user_for_comparision > compared_user_id:
                movie_ids_to_ratings_of_compared_user = dict()
                movie_ids_to_ratings_of_user_for_comparison = dict()
                prepare_vectors_for_comparison(
                    list_of_compared_user_ratings,
                    list_of_user_for_comparison_ratings,
                    movie_ids_to_ratings_of_compared_user,
                    movie_ids_to_ratings_of_user_for_comparison)
                normalized_rating_for_compared_user = calculate_normalized_rating_vector(
                    movie_ids_to_ratings_of_compared_user.values())
                normalized_rating_for_user_for_comparision = calculate_normalized_rating_vector(
                    movie_ids_to_ratings_of_user_for_comparison.values())
                users_similarity = round(
                    1 - cosine(normalized_rating_for_compared_user,
                               normalized_rating_for_user_for_comparision), 3)
                if isnan(users_similarity):
                    continue
                if users_similarity < similarity_range_factor:
                    continue
                user_similarities_ready_to_save.append({
                    "user_id":
                    compared_user_id,
                    'compare_user_id':
                    id_of_user_for_comparision,
                    'similarity':
                    users_similarity
                })
                user_similarities_ready_to_save.append({
                    "user_id":
                    id_of_user_for_comparision,
                    'compare_user_id':
                    compared_user_id,
                    'similarity':
                    users_similarity
                })
                session.execute(
                    "INSERT INTO users_similarity(user_id, compare_user_id, similarity) VALUES (:user_id, :compare_user_id, :similarity)",
                    {
                        'user_id': compared_user_id,
                        'compare_user_id': id_of_user_for_comparision,
                        'similarity': users_similarity
                    })
                session.execute(
                    "INSERT INTO users_similarity(user_id, compare_user_id, similarity) VALUES (:user_id, :compare_user_id, :similarity)",
                    {
                        'user_id': id_of_user_for_comparision,
                        'compare_user_id': compared_user_id,
                        'similarity': users_similarity
                    })
        session.commit()
        progress += 1
        print('Progress: ', round(100 * (progress / number_to_calculate), 2),
              '%')
예제 #12
0
def calculate_all_predictions():
    session = Session()
    ratings_list = session.execute("SELECT user_id, movie_id, rating FROM ratings")
    users_similarity_list = session.execute("SELECT user_id, compare_user_id, similarity FROM users_similarity")
    calculate_predicted_ratings_based_on_user_similarity(ratings_list, users_similarity_list)
예제 #13
0
def clear_rating_predictions_table():
    session = Session()
    session.query(RatingsPredictions).delete()
    session.commit()
예제 #14
0
def clear_average_rating_table():
    session = Session()
    session.query(AverageMovieRating).delete()
    session.commit()
예제 #15
0
def calculate_average_ratings_for_all_movies():
    session = Session()
    ratings = session.execute("SELECT movie_id, rating FROM ratings")
    calculate_average_rating(ratings)