Ejemplo n.º 1
0
    def main(self, movies):
        
        vectorize = vectorization.vector()

        classify_movies = {}
        print "#################################"
        print "Movies to classify"
        for movie in movies:
            classify_movies[movie['imdb_id']] = movie
            print "Title: %s" % movie['title']
        print "#################################"
        
        classify_movies_vectors = vectorize.vectorize(classify_movies.itervalues(), self.classified, 'classified')

        # Execute classifier algorithm

        errorsum = 0
        count = 0

        for movie in classify_movies_vectors:
            oldrating = classify_movies_vectors[movie]['rating']
            classify_movies_vectors[movie]['rating'] = self.k.avgrating
            actor_rating = self.k.classify(classify_movies_vectors[movie], 'actors')
            director_rating = self.k.classify(classify_movies_vectors[movie], 'directors')
            writer_rating = self.k.classify(classify_movies_vectors[movie], 'writers')
            plot_rating = self.k.classify(classify_movies_vectors[movie], 'plot')
            genre_rating = self.k.classify(classify_movies_vectors[movie], 'genres')
            count = count+1
            avg = (actor_rating*.14) + (director_rating*.2) + (plot_rating*.19) + (genre_rating*.28) + (writer_rating*.13)

            classify_movies_vectors[movie]['rating'] = avg
            classify_movies[movie]['rating'] = avg

            self.classified.insert(classify_movies[movie])

            error = math.fabs((oldrating - avg)/oldrating)
            
            if error < 90:
                errorsum = errorsum + error
            title = classify_movies_vectors[movie]['title']

            """
            print count
            
            print
            print "Title: %s" % title
            print "Actor rating: %f" % (actor_rating)
            print "Director rating: %f" % (director_rating)
            print "Writer rating: %f" % (writer_rating)
            print "Genre rating: %f" % (genre_rating)
            print "Plot rating: %f" % (plot_rating)
            print "----------------------"
            print "Final Rating: %f" %(avg)
            print "Actual Rating: %f" %(oldrating)
            print "Error: %f" %(error*100)
            """
        
        self.classified.create_index('imdb_id')
        
        """
Ejemplo n.º 2
0
 def train_classifier(self, training_movies):
     vectorize = vectorization.vector()
     
     # Vectorize movie data and traing classifier
     training_movies = vectorize.vectorize(training_movies, self.training, 'training')
     self.training.create_index('imdb_id')
     print "Training with %d movies" % len(training_movies)
 
     self.k.train(training_movies)
Ejemplo n.º 3
0
 def setUp(self):
     self.vector = vectorization.vector()
     self.training = self.vector.vectorize(iter(training_movies))
     self.knn = knn.knn()
Ejemplo n.º 4
0
 def setUp(self):
     self.vector = vectorization.vector()
     self.movie_vectors = self.vector.vectorize(iter(training_movies))