Ejemplo n.º 1
0
def top_recommendations_poisson():
    params = get_best_params("poisson")
    info = getMeta()
    beta = params["beta"]
    theta = params["theta"]

    reviews = get_test_reviews()

    precision = 0.0
    num_users = 0
    for user in xrange(info["users"]):
        movie_ratings = []
        for movie in xrange(info["movies"]):
            rating = np.dot(theta[user, :], beta[movie, :])
            movie_ratings.append((movie, rating))

        movie_ratings = sorted(movie_ratings, key=lambda x: x[1])
        top_movies_for_user = set(movie for movie, rating in movie_ratings[-1000:])

        user_precision = 0.0
        movies = reviews[user, :].nonzero()[0]

        for movie in movies:
            if movie in top_movies_for_user:
                user_precision += 1
        if len(movies) > 0:
            num_users += 1
            precision += (user_precision / len(movies))

    return precision / num_users
Ejemplo n.º 2
0
def top_recommendations_lda():
    params = get_best_params("lda")
    info = getMeta()
    phi = params["phi"]
    kappa = params["kappa"]

    reviews = get_test_reviews()
    rating_values = np.asarray([0,1.0,2.0,3.0,4.0,5.0])
    precision = 0.0
    num_users = 0
    for user in xrange(info["users"]):
        movie_ratings = []
        for movie in xrange(info["movies"]):
            topic = np.argmax(phi[movie,:])
            rating = np.dot(kappa[:,user,topic]/np.sum(kappa[:,user,topic]), rating_values)
            movie_ratings.append((movie, rating))
        movie_ratings = sorted(movie_ratings, key=lambda x: x[1])
        top_movies_for_user = set(movie for movie, rating in movie_ratings[-1000:])

        user_precision = 0.0
        movies = reviews[user, :].nonzero()[0]
        for movie in movies:
            if movie in top_movies_for_user:
                user_precision += 1
        if len(movies) > 0:
            num_users += 1
            precision += (user_precision / len(movies))
    return precision / num_users
Ejemplo n.º 3
0
def top_recommendations_poisson():
    params = get_best_params("poisson")
    info = getMeta()
    beta = params["beta"]
    theta = params["theta"]

    reviews = get_test_reviews()

    precision = 0.0
    num_users = 0
    for user in xrange(info["users"]):
        movie_ratings = []
        for movie in xrange(info["movies"]):
            rating = np.dot(theta[user, :], beta[movie, :])
            movie_ratings.append((movie, rating))

        movie_ratings = sorted(movie_ratings, key=lambda x: x[1])
        top_movies_for_user = set(movie
                                  for movie, rating in movie_ratings[-1000:])

        user_precision = 0.0
        movies = reviews[user, :].nonzero()[0]

        for movie in movies:
            if movie in top_movies_for_user:
                user_precision += 1
        if len(movies) > 0:
            num_users += 1
            precision += (user_precision / len(movies))

    return precision / num_users
Ejemplo n.º 4
0
def top_recommendations_lda():
    params = get_best_params("lda")
    info = getMeta()
    phi = params["phi"]
    kappa = params["kappa"]

    reviews = get_test_reviews()
    rating_values = np.asarray([0, 1.0, 2.0, 3.0, 4.0, 5.0])
    precision = 0.0
    num_users = 0
    for user in xrange(info["users"]):
        movie_ratings = []
        for movie in xrange(info["movies"]):
            topic = np.argmax(phi[movie, :])
            rating = np.dot(
                kappa[:, user, topic] / np.sum(kappa[:, user, topic]),
                rating_values)
            movie_ratings.append((movie, rating))
        movie_ratings = sorted(movie_ratings, key=lambda x: x[1])
        top_movies_for_user = set(movie
                                  for movie, rating in movie_ratings[-1000:])

        user_precision = 0.0
        movies = reviews[user, :].nonzero()[0]
        for movie in movies:
            if movie in top_movies_for_user:
                user_precision += 1
        if len(movies) > 0:
            num_users += 1
            precision += (user_precision / len(movies))
    return precision / num_users
Ejemplo n.º 5
0
    def __init__(self, numTopics, alpha, beta, gamma):
        # Setup logger
        self.log = logging.getLogger("Gibbs")
        self.log.setLevel(logging.DEBUG)
        formatter = logging.Formatter("%(asctime)s %(message)s",
                                      datefmt="%m/%d/%Y %I:%M:%S %p")
        fh = logging.handlers.TimedRotatingFileHandler("logs/gibbs.log",
                                                       when="D",
                                                       interval=1,
                                                       backupCount=10)
        ch = logging.StreamHandler()
        fh.setFormatter(formatter)
        ch.setFormatter(formatter)
        self.log.addHandler(fh)
        self.log.addHandler(ch)

        self.numTopics = numTopics
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma

        self.info = getMeta()

        self.user_movies, _ = get_split_review_mats()
        user_indices, movie_indices = self.user_movies.nonzero()
        self.user_movie_indices = zip(user_indices, movie_indices)

        self.CountMT = np.zeros((self.info["movies"], numTopics), dtype=np.int)
        self.CountRUT = np.zeros((6, self.info["users"], numTopics),
                                 dtype=np.int)  # ratings 1-5 and 0
        self.CountUT = np.zeros((self.info["users"], numTopics), dtype=np.int)
        self.topic_assignments = np.zeros(
            (self.info["users"], self.info["movies"]), dtype=np.int)

        # Normalization factors
        self.CountT = np.zeros(numTopics, dtype=np.int)
        self.CountU = np.zeros(self.info["users"], dtype=np.int)
        self.CountRU = np.zeros((6, self.info["users"]), dtype=np.int)

        for userid, movieid in self.user_movie_indices:
            topic = randint(0, numTopics - 1)
            self.CountMT[movieid, topic] += 1
            rating = self.user_movies[userid, movieid]
            self.CountRUT[rating, userid, topic] += 1
            self.CountUT[userid, topic] += 1
            self.topic_assignments[userid, movieid] = topic

            self.CountT[topic] += 1
            self.CountU[userid] += 1
            self.CountRU[rating, userid] += 1
Ejemplo n.º 6
0
    def __init__(self, numTopics, alpha, beta, gamma):
        # Setup logger
        self.log = logging.getLogger("Gibbs")
        self.log.setLevel(logging.DEBUG)
        formatter = logging.Formatter("%(asctime)s %(message)s",
                                      datefmt="%m/%d/%Y %I:%M:%S %p")
        fh = logging.handlers.TimedRotatingFileHandler("logs/gibbs.log",
                                                       when="D",
                                                       interval=1,
                                                       backupCount=10)
        ch = logging.StreamHandler()
        fh.setFormatter(formatter)
        ch.setFormatter(formatter)
        self.log.addHandler(fh)
        self.log.addHandler(ch)

        self.numTopics = numTopics
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma

        self.info = getMeta()

        self.user_movies, _ = get_split_review_mats()
        user_indices, movie_indices = self.user_movies.nonzero()
        self.user_movie_indices = zip(user_indices, movie_indices)

        self.CountMT = np.zeros((self.info["movies"], numTopics), dtype=np.int)
        self.CountRUT = np.zeros((6, self.info["users"], numTopics), dtype=np.int)  # ratings 1-5 and 0
        self.CountUT = np.zeros((self.info["users"], numTopics), dtype=np.int)
        self.topic_assignments = np.zeros((self.info["users"], self.info["movies"]), dtype=np.int)

        # Normalization factors
        self.CountT = np.zeros(numTopics, dtype=np.int)
        self.CountU = np.zeros(self.info["users"], dtype=np.int)
        self.CountRU = np.zeros((6, self.info["users"]), dtype=np.int)

        for userid, movieid in self.user_movie_indices:
            topic = randint(0, numTopics - 1)
            self.CountMT[movieid, topic] += 1
            rating = self.user_movies[userid, movieid]
            self.CountRUT[rating, userid, topic] += 1
            self.CountUT[userid, topic] += 1
            self.topic_assignments[userid, movieid] = topic

            self.CountT[topic] += 1
            self.CountU[userid] += 1
            self.CountRU[rating, userid] += 1
Ejemplo n.º 7
0
def test_lda():
    params = get_best_params("lda")
    info = getMeta()
    phi = params["phi"]
    kappa = params["kappa"]

    reviews = get_test_reviews()
    rmse = 0.0
    count = 0

    rating_values = np.asarray([0,1.0,2.0,3.0,4.0,5.0])
    for user, movie in izip(*reviews.nonzero()):
        topic = np.argmax(phi[movie,:])
        estimated_rating = np.dot(kappa[:,user,topic]/np.sum(kappa[:,user,topic]), rating_values)
        true_rating = reviews[user, movie]
        rmse += (true_rating - estimated_rating) ** 2
        count += 1
    return math.sqrt(rmse / count)
Ejemplo n.º 8
0
def test_lda():
    params = get_best_params("lda")
    info = getMeta()
    phi = params["phi"]
    kappa = params["kappa"]

    reviews = get_test_reviews()
    rmse = 0.0
    count = 0

    rating_values = np.asarray([0, 1.0, 2.0, 3.0, 4.0, 5.0])
    for user, movie in izip(*reviews.nonzero()):
        topic = np.argmax(phi[movie, :])
        estimated_rating = np.dot(
            kappa[:, user, topic] / np.sum(kappa[:, user, topic]),
            rating_values)
        true_rating = reviews[user, movie]
        rmse += (true_rating - estimated_rating)**2
        count += 1
    return math.sqrt(rmse / count)
Ejemplo n.º 9
0
def top_recommendations_iid():
    info = getMeta()
    train, reviews = get_split_review_mats()

    avg_ratings = train.sum(axis=0) / (train != 0).sum(axis=0).astype(np.float)
    top_movies = sorted(((movie, rating) for movie, rating in enumerate(avg_ratings)),
                        key=lambda x: x[1])
    top_movies = set(movie for movie, rating in top_movies[-1000:])

    precision = 0.0
    num_users = 0
    for user in xrange(info["users"]):
        movies = reviews[user, :].nonzero()[0]
        user_precision = 0.0
        for movie in movies:
            if movie in top_movies:
                user_precision += 1
        if len(movies) > 0:
            num_users += 1
            precision += (user_precision / len(movies))
        return precision / num_users
Ejemplo n.º 10
0
def top_recommendations_iid():
    info = getMeta()
    train, reviews = get_split_review_mats()

    avg_ratings = train.sum(axis=0) / (train != 0).sum(axis=0).astype(np.float)
    top_movies = sorted(
        ((movie, rating) for movie, rating in enumerate(avg_ratings)),
        key=lambda x: x[1])
    top_movies = set(movie for movie, rating in top_movies[-1000:])

    precision = 0.0
    num_users = 0
    for user in xrange(info["users"]):
        movies = reviews[user, :].nonzero()[0]
        user_precision = 0.0
        for movie in movies:
            if movie in top_movies:
                user_precision += 1
        if len(movies) > 0:
            num_users += 1
            precision += (user_precision / len(movies))
        return precision / num_users