Example #1
0
def infer_true_movie_ratings(num_observations=-1):
    """
	For every movie, computes the posterior distribution and MAP estimate of
	the movie's true/inherent rating given the movie's observed ratings.

	Input
	-----
	- num_observations: integer that specifies how many available ratings to
		use per movie (the default value of -1 indicates that all available
		ratings will be used).

	Output
	------
	- posteriors: a 2D array consisting of the posterior distributions where
		the number of rows is the number of movies, and the number of columns
		is M, i.e., the number of possible ratings (remember ratings are
		0, 1, ..., M-1); posteriors[i] gives a length M vector that is the
		posterior distribution of the true/inherent rating of the i-th movie
		given ratings for the i-th movie (where for each movie, the number of
		observations used is precisely what is specified by the input variable
		`num_observations`)
	- MAP_ratings: a 1D array with length given by the number of movies;
		MAP_ratings[i] gives the true/inherent rating with the highest
		posterior probability in the distribution `posteriors[i]`
	"""

    M = 11  # all of our ratings are between 0 and 10
    prior = np.array([1.0 / M] * M)  # uniform distribution
    likelihood = compute_movie_rating_likelihood(M)

    # get the list of all movie IDs to process
    movie_id_list = movie_data_helper.get_movie_id_list()
    num_movies = len(movie_id_list)

    # -------------------------------------------------------------------------
    # YOUR CODE GOES HERE FOR PART (d)
    #
    # Your code should iterate through the movies. For each movie, your code
    # should:
    #   1. Get all the observed ratings for the movie. You can artificially
    #	  limit the number of available ratings used by truncating the ratings
    #	  vector according to num_observations.
    #   2. Use the ratings you retrieved and the function compute_posterior to
    #	  obtain the posterior of the true/inherent rating of the movie
    #	  given the observed ratings
    #   3. Find the rating for each movie that maximizes the posterior

    # These are the output variables - it's your job to fill them.
    posteriors = np.zeros((num_movies, M))
    MAP_ratings = np.zeros(num_movies)
    for movie in range(num_movies):
        ratings = movie_data_helper.get_ratings(movie)
        posteriors[movie, :] = compute_posterior(prior, likelihood, ratings)
        MAP_ratings[movie] = posteriors[movie, :].argmax()

    #
    # END OF YOUR CODE FOR PART (d)
    # -------------------------------------------------------------------------

    return posteriors, MAP_ratings
Example #2
0
def infer_true_movie_ratings(num_observations=-1):
    """
    For every movie, computes the posterior distribution and MAP estimate of
    the movie's true/inherent rating given the movie's observed ratings.

    Input
    -----
    - num_observations: integer that specifies how many available ratings to
        use per movie (the default value of -1 indicates that all available
        ratings will be used).

    Output
    ------
    - posteriors: a 2D array consisting of the posterior distributions where
        the number of rows is the number of movies, and the number of columns
        is M, i.e., the number of possible ratings (remember ratings are
        0, 1, ..., M-1); posteriors[i] gives a length M vector that is the
        posterior distribution of the true/inherent rating of the i-th movie
        given ratings for the i-th movie (where for each movie, the number of
        observations used is precisely what is specified by the input variable
        `num_observations`)
    - MAP_ratings: a 1D array with length given by the number of movies;
        MAP_ratings[i] gives the true/inherent rating with the highest
        posterior probability in the distribution `posteriors[i]`
    """
    M = 11  # all of our ratings are between 0 and 10
    prior = np.array([1.0 / M] * M)  # uniform distribution
    likelihood = compute_movie_rating_likelihood(M)

    # get the list of all movie IDs to process
    movie_id_list = movie_data_helper.get_movie_id_list()
    num_movies = len(movie_id_list)

    # Allocate output variables.
    posteriors = np.zeros((num_movies, M))
    MAP_ratings = np.zeros(num_movies)

    for i, movie_id in enumerate(movie_id_list):
        # Truncate the number of movies if necessary.
        ratings = movie_data_helper.get_ratings(movie_id)[:num_observations]

        # Compute the posterior probability.
        posteriors[i, :] = compute_posterior(prior, likelihood, ratings)

        # MAP Rating is simply the rating with maximum posterior probability.
        MAP_ratings[i] = np.argmax(posteriors[i, :])

    return posteriors, MAP_ratings
Example #3
0
def infer_true_movie_ratings(num_observations=-1):
    """
    For every movie, computes the posterior distribution and MAP estimate of
    the movie's true/inherent rating given the movie's observed ratings.

    Input
    -----
    - num_observations: integer that specifies how many available ratings to
        use per movie (the default value of -1 indicates that all available
        ratings will be used).

    Output
    ------
    - posteriors: a 2D array consisting of the posterior distributions where
        the number of rows is the number of movies, and the number of columns
        is M, i.e., the number of possible ratings (remember ratings are
        0, 1, ..., M-1); posteriors[i] gives a length M vector that is the
        posterior distribution of the true/inherent rating of the i-th movie
        given ratings for the i-th movie (where for each movie, the number of
        observations used is precisely what is specified by the input variable
        `num_observations`)
    - MAP_ratings: a 1D array with length given by the number of movies;
        MAP_ratings[i] gives the true/inherent rating with the highest
        posterior probability in the distribution `posteriors[i]`
    """

    M = 11
    prior = np.array([1.0 / M] * M)  # uniform distribution
    likelihood = compute_movie_rating_likelihood(M)

    movie_id_list = movie_data_helper.get_movie_id_list()
    num_movies = len(movie_id_list)

    posteriors = np.zeros((num_movies, M))
    MAP_ratings = np.zeros(num_movies)

    for id in movie_id_list:
        ratings = movie_data_helper.get_ratings(id)
        if num_observations > 0:
            ratings = ratings[:num_observations]
        posteriors_ = compute_posterior(prior, likelihood, ratings)
        posteriors[id] = posteriors_
        MAP_ratings[id] = np.argmax(posteriors_, axis=0)

    return posteriors, MAP_ratings
def compute_true_movie_rating_posterior_entropies_fast():
    movie_id_list = movie_data_helper.get_movie_id_list()
    num_movies = len(movie_id_list)
    N = 200
    finAns = np.zeros((num_movies, N))
    for movie_id in movie_id_list:
        print("Working on {0} of {1} movies".format(movie_id, num_movies))
        allRatings = movie_data_helper.get_ratings(movie_id)
        for nObs in range(1, N + 1):
            ratings = allRatings[:nObs]
            posterior, _ = infer_true_movie_ratings_fast(
                movie_id_list, ratings)

            finAns[movie_id, nObs - 1] = compute_entropy(posterior)

    # mean and plot
    tmp = finAns.mean(axis=0)
    plt.plot(tmp)
def plot_entropies():

    # Now let's try it a faster way (we should only have to read the ratings once)
    # First I'm going to migrate the infer_true_movie_ratings() method to here
    M = 11  # all of our ratings are between 0 and 10
    prior = np.array([1.0 / M] * M)  # uniform distribution
    likelihood = compute_movie_rating_likelihood(M)

    # get the list of all movie IDs to process
    movie_id_list = movie_data_helper.get_movie_id_list()
    num_movies = len(movie_id_list)

    posteriors = np.zeros((num_movies, M))

    ratings = [0 for x in range(num_movies)]

    # Collecting the rating data for all movies
    for i in range(num_movies):
        ratings[i] = movie_data_helper.get_ratings(movie_id_list[i])[0:200]

    total_entropies = np.zeros((num_movies, 200))

    # Now I'll compute the posteriors and entropies with my matrix ratings
    print(
        "Computing entropies for plotting (this should take about a minute) ... "
    )
    for i in range(num_movies):
        for j in range(0, 200):
            post = compute_posterior(prior, likelihood, ratings[i][0:j])
            total_entropies[i, j] = compute_entropy(post)

    plotting_data = np.mean(total_entropies, axis=0)
    print("Plotting now ... ")
    plt.plot(plotting_data)
    plt.axis([0, 200, 0, 4])
    plt.show()
Example #6
0
 def get_ratings(self, movie_id):
     if movie_id not in self.ratings:
         self.ratings[movie_id] = movie_data_helper.get_ratings(movie_id)
     return self.ratings[movie_id]