Python Data.ratings Examples

Programming Language: Python

Namespace/Package Name: util.data

Class/Type: Data

Method/Function: ratings

Examples at hotexamples.com: 3

Python Data.ratings - 3 examples found. These are the top rated real world Python examples of util.data.Data.ratings extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

movie_meta(21)

Data(14)

set_data(7)

ratings(3)

load(3)

add_timer(2)

add_to_array(2)

start_timer(2)

sample_timer(2)

reset_timers(2)

loadCoNLL(2)

add_arrays(2)

add_timers(2)

print_times(2)

save(1)

ratings_as_series(1)

predictWithFeature(1)

push(1)

init(1)

get_test_case(1)

get_sql(1)

get_keys(1)

async_save(1)

temp_save(1)

Example #1

Show file

def get_movies_with_similar_genres(movie_id: int, n: int = 5, popularity_bias: bool = False
                                   , user_bias: bool = False, movies: pd.DataFrame = None):
    # Get all movies and split them into the base movie and the rest

    if n is None:
        n = 5

    # Use the preferred movie df
    if movies is None:
        all_movies = Data.movie_meta()[Column.genres.value]
    else:
        all_movies = movies[Column.genres.value]

    # get the base out of the df and remove it from the rest
    base_genres = eval(all_movies.loc[movie_id])
    all_movies = all_movies.drop(movie_id)

    # count similar genres
    all_movies = all_movies.apply(
        lambda row: count_elements_in_set(row, base_genres)
    )
    # remove all movies which have no genre in common
    filtered_movies_sum = all_movies[all_movies > 0]

    # if user_bias is true
    if user_bias:
        # reduce the amount of movies to n * 10 movies
        top_n_mul_ten = filtered_movies_sum.nlargest(n * 10)
        ratings = Data.ratings()

        # group by movie
        ratings_grouped = ratings.groupby(str(Column.movie_id))
        # calculate mean rating and number of ratings for each movie
        # (select rating to remove first level of column index. before: (rating: (mean, count)), after: (mean, count) )
        measures: pd.DataFrame = ratings_grouped.agg(['mean', 'count'])[str(Column.rating)]

        # merging mean, count and genre sum into one DataFrame
        measures_movies = pd.merge(measures, pd.DataFrame(top_n_mul_ten), left_index=True, right_index=True)

        if popularity_bias:
            # give more weight to the number of ratings (~popularity)
            # by raising the avg ratings to some power (to preserve some notion of good vs. bad ratings)
            # and multiplying the count back in
            # additionally multiply the genre back in
            # to prevent good rated movies with little correlation to the genres
            results = measures_movies.eval('(mean ** 3) * count * genres')
        else:
            # multiply genre to prevent good rated movies with little correlation to the genres
            results = measures_movies.eval('mean * genres')
    else:
        results = filtered_movies_sum

    # breakpoint()
    return results

Example #2

Show file

File: users_who_enjoy_this_also_like.py Project: madmini/recommender_systems_group4

def recommend_movies(movie_id: int, n: int = 5, filter_below_avg_ratings: bool = False, popularity_bias: bool = False) \
        -> List[int]:
    ratings = Data.ratings()

    # first get the ratings for the base movie
    ratings_of_base_movie = ratings.query('movie_id == %s' % movie_id)

    # check if there are reviews for this movie
    if ratings_of_base_movie.empty:
        raise MissingDataException('no ratings for movie_id %s' % movie_id)

    if filter_below_avg_ratings:
        # of those, select the above average ratings
        avg_rating = ratings_of_base_movie['rating'].mean()
        # query is actually faster than the python subscription syntax ( users[users['rating'] >= avg] )
        ratings_of_base_movie = ratings_of_base_movie.query('rating >= %f' %
                                                            avg_rating)

    # to get ratings from all the users that have rated/liked the base movie,
    # perform a (left outer) join on all the ratings on user_id
    relevant_movies = ratings_of_base_movie.join(ratings,
                                                 on='user_id',
                                                 lsuffix='_L')
    # remove the columns that were duplicated as result of the join
    relevant_movies = relevant_movies[['movie_id', 'rating']]
    # remove the base movie from the results
    relevant_movies = relevant_movies.query('movie_id != %s' % movie_id)

    if relevant_movies.empty:
        raise MissingDataException(
            'no other ratings from users that rated movie_id %s' % movie_id)

    # group by movie
    relevant_movie_groups = relevant_movies.groupby('movie_id')
    # calculate mean rating and number of ratings for each movie
    # (select rating to remove first level of column index. before: (rating: (mean, count)), after: (mean, count) )
    measures: pd.DataFrame = relevant_movie_groups.agg(['mean',
                                                        'count'])['rating']

    if popularity_bias:
        # give more weight to the number of ratings (~popularity)
        # by raising the avg ratings to some power (to preserve some notion of good vs. bad ratings)
        # and multiplying the count back in
        results = measures.eval('(mean ** 3) * count')
    else:
        results = measures['mean']

    return results

Example #3

Show file

File: meta_mix.py Project: madmini/recommender_systems_group4

def recommend_movie_meta(movie_id: int,
                         n: int = 5,
                         popularity_bias: bool = False,
                         user_bias: bool = False):
    # Get movie_meta data and set the index on movie_id
    movies_meta = Data.movie_meta()
    # Get the meta data from the base movie
    base_movie_meta = movies_meta.loc[movie_id, :]

    # filtered movies based on color and adult
    filtered_movies = movies_meta.query('tmdb_adult == {}'.format(
        base_movie_meta['tmdb_adult']))
    filtered_movies = filtered_movies.query('imdb_color == "{}"'.format(
        base_movie_meta['imdb_color']))

    # filtered movies based on genre
    movies = genre_filter.get_movies_with_similar_genres(
        movie_id, n, movies=filtered_movies)

    # merge the number of similar genres back to the main df
    merged_movies = pd.merge(pd.DataFrame(movies),
                             filtered_movies,
                             left_index=True,
                             right_index=True)
    merged_movies = merged_movies.rename(
        columns={"{}_x".format(Column.genres.value): Column.genres.value})

    # preparing data for the score calculation
    # count similar items in the columns or calculate the difference
    merged_movies = calculate_column(merged_movies, base_movie_meta, 'actors')
    merged_movies = calculate_column(merged_movies, base_movie_meta,
                                     'directors')
    merged_movies = calculate_column(merged_movies, base_movie_meta,
                                     'tmdb_keywords')
    merged_movies = calculate_column(merged_movies, base_movie_meta,
                                     'tmdb_production_countries')
    merged_movies = calculate_column(merged_movies,
                                     base_movie_meta,
                                     'release_year',
                                     year=True)

    # score calculation
    score = compute_score(merged_movies)

    # calculate the ranking with the avg user rating
    if user_bias:
        # get the ratings/results like in recommend_movie
        ratings = Data.ratings().query('movie_id != %s' % movie_id)
        merged_ratings = pd.merge(ratings,
                                  merged_movies,
                                  left_on='movie_id',
                                  right_index=True)

        # group by movie
        ratings_grouped = merged_ratings.groupby('movie_id')
        # calculate mean rating and number of ratings for each movie
        # (select rating to remove first level of column index. before: (rating: (mean, count)), after: (mean, count) )
        measures: pd.DataFrame = ratings_grouped.agg(['mean',
                                                      'count'])['rating']

        # merging mean, count and genre sum into one DataFrame
        measures_movies = pd.merge(measures,
                                   pd.DataFrame(score),
                                   left_index=True,
                                   right_index=True)
        measures_movies = measures_movies.rename(columns={0: 'score'})

        # additionally calculate it with the popularity of the movies
        if popularity_bias:
            # give more weight to the number of ratings (~popularity)
            # by raising the avg ratings to some power (to preserve some notion of good vs. bad ratings)
            # and multiplying the count back in
            # additionally multiply the genre back in
            # to prevent good rated movies with little correlation to the genres
            results = measures_movies.eval('((mean * score) ** 3) * count')
        else:
            # multiply genre to prevent good rated movies with little correlation to the genres
            results = measures_movies.eval('mean * score')

    else:
        results = score

    return results