Beispiel #1
0
 def genMostLikelyMovies(self):
     movies = parseMovies()
     phi = self.calcPhi()
     for topic in xrange(15):
         top_movies = np.argsort(phi[:, topic])
         print "Topic: %d" % topic
         print "\n".join("%s: %.4f" % (movies[movieid][0], phi[movieid, topic]) for movieid in top_movies[-10:])
         print ""
Beispiel #2
0
def print_most_likely_movies(fname, collection_name):
        topic_dist = topic_distribution(fname, collection_name)
        movies = parseMovies()

        for topic in xrange(topic_dist.shape[1]):
            top_movies = np.argsort(topic_dist[:, topic])
            print "Topic: %d" % topic
            print "\n".join("%s: %.4f" % (movies[movieid][0], topic_dist[movieid, topic]) for movieid in top_movies[-10:])
            print ""
Beispiel #3
0
    def genMostLikelyTopic(self):
        phi = self.calcPhi()
        movies = parseMovies()
        topics = defaultdict(list)
        for movieid in xrange(self.info["movies"]):
            top_topic = np.argsort(phi[movieid, :])[-1]

            topics[top_topic].append((movies[movieid][0],
                                     phi[movieid, top_topic]))
        return topics
Beispiel #4
0
 def genMostLikelyMovies(self):
     movies = parseMovies()
     phi = self.calcPhi()
     for topic in xrange(15):
         top_movies = np.argsort(phi[:, topic])
         print "Topic: %d" % topic
         print "\n".join("%s: %.4f" %
                         (movies[movieid][0], phi[movieid, topic])
                         for movieid in top_movies[-10:])
         print ""
Beispiel #5
0
    def genMostLikelyTopic(self):
        phi = self.calcPhi()
        movies = parseMovies()
        topics = defaultdict(list)
        for movieid in xrange(self.info["movies"]):
            top_topic = np.argsort(phi[movieid, :])[-1]

            topics[top_topic].append((movies[movieid][0], phi[movieid,
                                                              top_topic]))
        return topics
Beispiel #6
0
def print_most_likely_movies(fname, collection_name):
    topic_dist = topic_distribution(fname, collection_name)
    movies = parseMovies()

    for topic in xrange(topic_dist.shape[1]):
        top_movies = np.argsort(topic_dist[:, topic])
        print "Topic: %d" % topic
        print "\n".join("%s: %.4f" %
                        (movies[movieid][0], topic_dist[movieid, topic])
                        for movieid in top_movies[-10:])
        print ""
Beispiel #7
0
    def visualizePCA(self, samples=20):
        phi = self.calcPhi()
        movies = parseMovies()
        pca = PCA(phi)

        indices = sample(xrange(len(movies)), samples)

        x_axis = pca.Y[indices, 0]
        y_axis = pca.Y[indices, 1]

        fig = plt.figure()
        fig.set_size_inches(10, 8)
        ax = fig.add_subplot(111)

        ax.scatter(x_axis, y_axis)
        for idx, x, y in izip(indices, x_axis, y_axis):
            ax.annotate(movies[idx][0].decode('ascii', 'ignore').encode('ascii', 'ignore'), (x, y))
        return fig
Beispiel #8
0
    def visualizePCA(self, samples=20):
        phi = self.calcPhi()
        movies = parseMovies()
        pca = PCA(phi)

        indices = sample(xrange(len(movies)), samples)

        x_axis = pca.Y[indices, 0]
        y_axis = pca.Y[indices, 1]

        fig = plt.figure()
        fig.set_size_inches(10, 8)
        ax = fig.add_subplot(111)

        ax.scatter(x_axis, y_axis)
        for idx, x, y in izip(indices, x_axis, y_axis):
            ax.annotate(
                movies[idx][0].decode('ascii',
                                      'ignore').encode('ascii', 'ignore'),
                (x, y))
        return fig