Ejemplo n.º 1
0
def analyze_user(user_id, overwrite):
    """Analyze a user, to find out which predictor works best for them.

    :param user_id: A user ID. The user for which an analysis is being
        performed.
    :param overwrite: If a user has already been analyzed, should the analysis
        be overwritten?
    :returns: Nothing.
    """
    with common.get_db_conn() as conn:
        # What if a user already has a predictor?
        if not overwrite and conn.execute(
                'SELECT * FROM predictors WHERE userId=?',
                (user_id,)
        ).fetchall():
            return

        # Create a personalized predictor for this user. SQLite added
        # support for UPSERT in version 3.24.0, which was released on
        # 2018-06-24. See: https://www.sqlite.org/lang_UPSERT.html
        sses = calc_sse(user_id)
        predictor = min_sse(sses)
        with conn:
            conn.execute(
                """
                INSERT INTO predictors VALUES (?, ?)
                ON CONFLICT (userId) DO UPDATE SET predictor=?
                """,
                (user_id, predictor, predictor)
            )
Ejemplo n.º 2
0
def similarity(movie_a, movie_b):
    """Return the similarity score for the two given movies.

    :param movie_a: A movie ID.
    :param movie_b: A movie ID.
    :return: The similarity score for the pair of movies.
    :raise movie_recommender.exceptions.MissingSimilarityError: If no
        similarity score has been computed for this pair of movies.
    """
    movies = [movie_a, movie_b]
    movies.sort()
    with common.get_db_conn() as conn:
        row = conn.execute(
            """
            SELECT similarity
            FROM similarities
            WHERE movieAId=? AND movieBId=?
            """,
            movies,
        ).fetchone()
    if not row:
        raise exceptions.MissingSimilarityError(f"""
            A similarity score hasn't been computed for movies {movies[0]} and
            {movies[1]}.
            """)
    return row[0]
Ejemplo n.º 3
0
def rating_pairs(movie_a, movie_b):
    """Count the number of rating pairs for the given movies.

    See :meth:`movie_recommender.db.read.rating_pairs`.

    :param movie_a: A movie ID.
    :param movie_b: A movie ID.
    :return: An integer. The number of movie rating pairs for the given movies.
    """
    if movie_a == movie_b:
        raise ValueError(f"""
            Fetching pairs of ratings for a movie and itself is disallowed.
            Movie IDs: {movie_a}, {movie_b}
            """)
    movies = [movie_a, movie_b]
    movies.sort()
    with common.get_db_conn() as conn:
        row = conn.execute(
            """
            SELECT COUNT (*)
            FROM (
                SELECT userId, rating
                FROM ratings
                WHERE movieId = ?
            ) movieARatings
            INNER JOIN (
                SELECT userId, rating
                FROM ratings
                WHERE movieId = ?
            ) movieBRatings
            WHERE movieARatings.userId = movieBRatings.userId
            """,
            movies,
        ).fetchone()
    return row[0]
Ejemplo n.º 4
0
def similar_movies_for_user(movie, user):
    """Yield movies similar to ``movie`` that ``user`` has rated.

    .. NOTE:: A "similar" movie is one with a non-zero similarity score. This
        includes negative similarity scores!

    :param movie: A movie ID.
    :param user: A user ID.
    :return: A generator yielding tuples of the form ``(movie_id,
        similarity)``.
    """
    rated_movies_ = rated_movies((user, ))
    with common.get_db_conn() as conn:
        # There's probably some clever technique for expressing the following
        # queries as a single SQL query.
        for row in conn.execute(
                """
                SELECT movieAId, similarity
                FROM similarities
                WHERE movieBId = ? AND similarity != 0
                """, (movie, )):
            if row[0] in rated_movies_:
                yield row
        for row in conn.execute(
                """
                SELECT movieBId, similarity
                FROM similarities
                WHERE movieAId = ? AND similarity != 0
                """, (movie, )):
            if row[0] in rated_movies_:
                yield row
Ejemplo n.º 5
0
def all_movies():
    """Yield the IDs of every movie.

    :return: A generator that yields movie IDs.
    """
    with common.get_db_conn() as conn:
        for row in conn.execute('SELECT DISTINCT movieId FROM movies'):
            yield row[0]
Ejemplo n.º 6
0
def user_ids():
    """Count the number of users in the current dataset.

    :return: An integer.
    """
    with common.get_db_conn() as conn:
        return conn.execute(
            'SELECT COUNT(DISTINCT userId) FROM ratings').fetchone()[0]
Ejemplo n.º 7
0
def avg_ratings():
    """Count the number of entries in the "avgRatings" table.

    :return: An integer.
    """
    with common.get_db_conn() as conn:
        return conn.execute(
            'SELECT COUNT(DISTINCT userId) FROM avgRatings').fetchone()[0]
Ejemplo n.º 8
0
def users():
    """Get the ID of every user.

    :return: A tuple of user IDs.
    """
    with common.get_db_conn() as conn:
        return {
            row[0]
            for row in conn.execute('SELECT DISTINCT userId FROM ratings')
        }
Ejemplo n.º 9
0
def users_in_avg_ratings():
    """Get the ID of every user in the avgRatings table.

    :return: A tuple of user IDs.
    """
    with common.get_db_conn() as conn:
        return {
            row[0]
            for row in conn.execute('SELECT DISTINCT userId FROM avgRatings')
        }
Ejemplo n.º 10
0
def genres(movie_id):
    """Get the genres of the given movie.

    :param movie_id: A movie ID.
    :return: An iterable of genres, as strings.
    """
    with common.get_db_conn() as conn:
        genres_strings = tuple(row[0] for row in conn.execute(
            'SELECT genres FROM movies WHERE movieId=?', (movie_id, )))
    assert len(genres_strings) == 1
    return genres_strings[0].split('|')
Ejemplo n.º 11
0
def unrated_movies(user_id):
    """Count the number of movies the given user hasn't rated.

    :param user_id: A user ID.
    :return: An integer.
    """
    with common.get_db_conn() as conn:
        return conn.execute(
            """
            SELECT COUNT(DISTINCT movieId) FROM movies WHERE movieId NOT IN
            (SELECT DISTINCT movieId FROM ratings WHERE userId=?)
            """, (user_id, )).fetchone()[0]
Ejemplo n.º 12
0
def title(movie_id):
    """Get the title of the given movie.

    :param movie_id: A movie ID.
    :return: The title of the given movie.
    """
    with common.get_db_conn() as conn:
        row = conn.execute('SELECT title FROM movies WHERE movieId=?',
                           (movie_id, )).fetchone()
    if not row:
        raise ValueError(f'Movie ID {movie_id} not in database.')
    return row[0]
Ejemplo n.º 13
0
def unrated_movies(user_id):
    """Yield the ID of each movie the given user hasn't rated.

    :param user_id: A user ID.
    :return: A generator that yields movie IDs.
    """
    with common.get_db_conn() as conn:
        for row in conn.execute(
                """
                SELECT DISTINCT movieId FROM movies WHERE movieId NOT IN
                (SELECT DISTINCT movieId FROM ratings WHERE userId=?)
                """, (user_id, )):
            yield row[0]
Ejemplo n.º 14
0
def rating(user_id, movie_id):
    """Get the rating that the given user gave to the given movie.

    :param user_id: A user ID.
    :param movie_id: A movie ID.
    :return: A movie rating. (A float.)
    """
    with common.get_db_conn() as conn:
        ratings = tuple(row[0] for row in conn.execute(
            'SELECT rating FROM ratings WHERE userId=? and movieId=?', (
                user_id, movie_id)))
    assert len(ratings) == 1
    return ratings[0]
Ejemplo n.º 15
0
def avg_user_rating(user):
    """Calculate the average of a user's movie ratings.

    :param user: A user ID. The user whose average ratings are being computed.
    :return: A value such as 3.5.
    """
    with common.get_db_conn() as conn:
        return conn.execute(
            """
            SELECT AVG(rating)
            FROM ratings
            WHERE userId=?
            """, (user, )).fetchone()[0]
Ejemplo n.º 16
0
def cpop_db(dataset):
    """Create and populate a new database.

    More specifically:

    * Create database tables for the datasets.
    * Create database tables for calculated data. (i.e. Create a table which
      maps userId → predictorName.)
    * Populate the dataset tables.

    :param dataset: The dataset to populate the new database with. Use one of
        the keys from :data:`movie_recommender.constants.DATASETS`.
    :return: Nothing
    :raises DatabaseAlreadyExistsError: If the target database already exists.
    :raises DatasetAbsentError: If the referenced dataset isn't installed.
    """
    # Check whether a conflicting database exists.
    save_path = Path(common.get_save_path())
    if save_path.exists():
        raise exceptions.DatasetAbsentError(
            "Can't create a new database, as a file already exists at: {}".
            format(save_path), )

    # Check whether the dataset is installed or not.
    installed_datasets = datasets.get_installed_datasets()
    if dataset not in installed_datasets:
        raise exceptions.DatabaseAlreadyExistsError(
            "Can't create a database from the {} dataset, as it isn't "
            'installed.'.format(dataset))

    # Create and populate a new database.
    with common.get_db_conn(save_path) as conn:
        cpop_links_table(
            conn,
            Path(installed_datasets[dataset], 'links.csv'),
        )
        cpop_movies_table(
            conn,
            Path(installed_datasets[dataset], 'movies.csv'),
        )
        cpop_ratings_table(
            conn,
            Path(installed_datasets[dataset], 'ratings.csv'),
        )
        cpop_tags_table(
            conn,
            Path(installed_datasets[dataset], 'tags.csv'),
        )
        c_predictors_table(conn)
        c_similarities_table(conn)
        c_avg_ratings_table(conn)
Ejemplo n.º 17
0
def rating_pairs(movie_a, movie_b):
    """Yield pairs of ratings for the given movies.

    Logically, this function generates a table in the following form. Notice
    that the table is perfectly dense.

    ====  =======  =======
    User  Movie A  Movie B
    ====  =======  =======
    1     1.0      0.5
    26    4.0      1.5
    2     5.0      0.5
    ====  =======  =======

    :param movie_a: A movie ID. A movie to get ratings for.
    :param movie_b: A movie ID. A movie to get ratings for.
    :rtype movie_recommender.db.RatingPair:
    :return: A generator that yields every pair of ratings for the two given
        movies.
    :raise: ``ValueError`` if ``movie_a`` and ``movie_b`` are equal.
    """
    if movie_a == movie_b:
        raise ValueError(f"""
            Fetching pairs of ratings for a movie and itself is disallowed.
            Movie IDs: {movie_a}, {movie_b}
            """)
    movies = [movie_a, movie_b]
    movies.sort()
    with common.get_db_conn() as conn:
        for row in conn.execute(
                """
                SELECT
                    movieARatings.userId,
                    movieARatings.rating,
                    movieBRatings.rating
                FROM (
                    SELECT userId, rating
                    FROM ratings
                    WHERE movieId = ?
                ) movieARatings
                INNER JOIN (
                    SELECT userId, rating
                    FROM ratings
                    WHERE movieId = ?
                ) movieBRatings
                WHERE movieARatings.userId = movieBRatings.userId
                """, movies):
            yield common.RatingPair(row[0], row[1], row[2])
Ejemplo n.º 18
0
def make_genre_predictor(genre, user_id, forbidden_movie=None):
    """Make a predictor for the given genre for the given user.

    :param genre: A genre name, as a string.
    :param user_id: A user ID. The user for which a predictor is being created.
    :param forbidden_movie: A movie ID. A movie to ignore when creating the
        predictor.
    :return: A function which accepts a movie ID and returns a predicted
        rating.
    """
    query = """
            SELECT movies.genres, ratings.rating
            FROM movies JOIN ratings USING (movieId)
            WHERE ratings.userId == ?
            """
    params = [user_id]
    if forbidden_movie:
        query += 'AND movieId != ?'
        params.append(forbidden_movie)

    # Iterate through movies this user has rated. For each movie, create a
    # Cartesian point, where X is whether the move has the given genre, and Y
    # is the rating this user has given to this movie.
    points = []
    with common.get_db_conn() as conn:
        for row in conn.execute(query, params):
            genres = row[0].split('|')
            genre_present = 1 if genre in genres else 0
            rating = row[1]
            points.append(Point(genre_present, rating))
    graph = Graph(points)

    def predictor(movie_id):
        """Predict a user's rating for the given movie.

        :param movie_id: A movie ID.
        :return: A predicted rating for the given movie.
        """
        genres = read.genres(movie_id)
        genre_present = 1 if genre in genres else 0
        try:
            rating = graph.predict_y(genre_present)
        except exceptions.VerticalLineOfBestFitGraphError:
            rating = graph.avg_point.y
        return clamp_rating(rating)

    return predictor
Ejemplo n.º 19
0
def avg_rating(user_id):
    """Get the average of a user's ratings, from the avgRatings table.

    :param user_id: A user ID. The user whose average rating is being fetched.
    :return: An average rating, such as 3.5.
    """
    with common.get_db_conn() as conn:
        row = conn.execute(
            """
            SELECT avgRating FROM avgRatings WHERE userId=?
            """,
            (user_id, ),
        ).fetchone()
    if not row:
        raise exceptions.MissingAverageRatingError(
            f'No average rating for user {user_id} has been calculated.')
    return row[0]
Ejemplo n.º 20
0
def predictor_name(user_id):
    """Get the personalized predictor name for the given user.

    :param user_id: A user ID. The user for which the personalized predictor
        name is being fetched.
    :return: The name of the personalized predictor for the given user.
    :raise movie_recommender.exceptions.NoPersonalizedPredictorError: If the
        given user doesn't have a personalized predictor.
    """
    with common.get_db_conn() as conn:
        row = conn.execute('SELECT predictor FROM predictors WHERE userId=?',
                           (user_id, )).fetchone()
    if not row:
        raise exceptions.NoPersonalizedPredictorError(
            f'User {user_id} has no personalized predictors. Please generate '
            'one with "mr-analyze".', )
    return row[0]
Ejemplo n.º 21
0
def similarities(similarities_):
    """Write movies similarity scores to the database.

    :param similarities_: An iterable of
        :class:`movie_recommender.db.common.Similarity` objects.
    """
    # SQLite added support for UPSERT in version 3.24.0, which was released on
    # 2018-06-24. See: https://www.sqlite.org/lang_UPSERT.html
    with common.get_db_conn() as conn:
        with conn:
            conn.executemany(
                """
                INSERT INTO similarities VALUES (?, ?, ?)
                ON CONFLICT (movieAId, movieBId) DO UPDATE SET similarity=?
                """,
                _similarities_values(similarities_),
            )
Ejemplo n.º 22
0
def to_user_id(arg):
    """Cast the given string argument to a user ID, if possible.

    An exception of some kind is raised if ``arg`` can't be cast to a user ID.
    The specific type of exception varies.

    :param arg: A string argument passed on the command line. Semantically, a
        user ID.
    :return: A user ID.
    """
    user_id = int(arg)
    with common.get_db_conn() as conn:
        user_ids = tuple(row[0] for row in conn.execute(
            'SELECT DISTINCT userId FROM ratings WHERE userId=?', (user_id, )))
    if user_id not in user_ids:
        raise ValueError(f'User ID {user_id} not in database.')
    return user_id
Ejemplo n.º 23
0
def avg_ratings(avg_ratings_):
    """Write user average ratings to the database.

    :param avg_ratings_: An iterable of
        :class:`movie_recommender.db.common.AvgRating` objects.
    """
    with common.get_db_conn() as conn:
        with conn:
            values = (avg_rating + (avg_rating.avg_rating, )
                      for avg_rating in avg_ratings_)
            conn.executemany(
                """
                INSERT INTO avgRatings VALUES (?, ?)
                ON CONFLICT (userId) DO UPDATE SET avgRating=?
                """,
                values,
            )
Ejemplo n.º 24
0
def rated_movies(user_ids):
    """Get the IDs of the movies the given users have rated.

    :param user_ids: An iterable of user IDs.
    :return: A set of movie IDs.
    """
    with common.get_db_conn() as conn:
        return {
            row[0]
            for row in conn.execute(
                f"""
                SELECT DISTINCT movieId FROM ratings WHERE userId IN ({
                ', '.join('?' for _ in range(len(user_ids)))
                })
                """,
                tuple(user_ids),
            )
        }
Ejemplo n.º 25
0
def avg_movie_rating(movie):
    """Calculate the average of a movie's ratings.

    :param movie: A movie ID. The movie whose average rating is being computed.
    :return: A value such as 3.5.
    :raise movie_recommender.exceptions.NoMovieRatingsError: If an average
        can't be calculated due to a lack of ratings.
    """
    with common.get_db_conn() as conn:
        avg = conn.execute(
            """
            SELECT AVG(rating)
            FROM ratings
            WHERE movieId=?
            """, (movie, )).fetchone()[0]
    if avg is None:
        raise exceptions.NoMovieRatingsError(
            f"Can't calculate the average rating for movie {movie}, as no "
            'ratings have been assigned to it.')
    return avg
Ejemplo n.º 26
0
def make_year_predictor(user_id, forbidden_movie=None):
    """Make a year-based predictor for the given user.

    If year information can't be extracted from a movie's title, then that
    movie is skipped when generating a predictor. This is done because so few
    movies have this issue. See:
    `class:`movie_recommender.exceptions.NoMovieYearError`.

    :param user_id: A user ID. The user for which a predictor is being created.
    :param forbidden_movie: A movie ID. A movie to ignore when creating the
        predictor.
    :return: A function which accepts a movie ID and returns a predicted
        rating.
    """
    query = """
            SELECT movies.title, ratings.rating
            FROM movies JOIN ratings USING (movieId)
            WHERE ratings.userId == ?
            """
    params = [user_id]
    if forbidden_movie:
        query += 'AND movieId != ?'
        params.append(forbidden_movie)

    # Iterate through movies this user has rated. For each movie, create a
    # Cartesian point, where X is the movie's year, and Y is the rating this
    # user has given to this movie.
    points = []
    with common.get_db_conn() as conn:
        for row in conn.execute(query, params):
            try:
                year = read.year(row[0])
            except exceptions.NoMovieYearError:
                continue
            rating = row[1]
            points.append(Point(year, rating))
    graph = Graph(points)

    def predictor(movie_id):
        """Predict a user's rating for the given movie.

        :param movie_id: A movie ID.
        :return: A predicted rating for the given movie.
        :raise movie_recommender.exceptions.NoMovieYearError: If the given
            movie's title doesn't include a year, and this predictor makes use
            of year data.
        :raise movie_recommender.exceptions.EmptyGraphError: If this predictor
            can't predict movie ratings at all, due to a lack of relevant data.
            For example, this will occur if the given movie's title does
            include a year, but all of the movies this user has rated lack a
            year.
        """
        title = read.title(movie_id)
        year = read.year(title)
        try:
            rating = graph.predict_y(year)
        except exceptions.VerticalLineOfBestFitGraphError:
            rating = graph.avg_point.y
        return clamp_rating(rating)

    return predictor