def init_tagger(self):
     try:
         print 'loading tagger...'
         self.tagger = Serializer.get_object(self.ser_path) 
         print 'DONE...'           
         if self.tagger == False:
             print 'tagger not found in disk. Training one'
             self.tagger = self.train_classifier_tagger()
             print 'saving tagger to disk'
             Serializer.save_object(self.tagger, self.ser_path)
             print 'DONE...'
     except Exception:
         print Exception
Beispiel #2
0
 def load_data(self,new=0):
     if(new == 0):
         try:
             self.list_reviews = Serializer.get_object('../data/reviews.dat')
         except:
             print 'impossible to load'
     elif new == 2:
         self.list_reviews = self.create_reviews([0 for n in range(NUM_RANDOM)], [1 for n in range(NUM_RANDOM)])
     else:
         self.parser = Parser(self.lang)
         self.grammar = FeaturesGrammar()
         self.tagger = NGramTagger(self.lang)
     
         url = "http://www.booking.com/reviewlist.en.html" 
         self.data_pos, self.data_neg = self.parser.get_from_url(url,50)
         self.list_reviews = self.create_reviews(self.data_pos, self.data_neg)
         self.save_reviews()
Beispiel #3
0
    def load_data(self, new=0):
        """
        :method:: load_data  
        :param new: new == 0 try to load list already created, else create a new one.
        """
        if new == 0:
            try:
                self.list_reviews = Serializer.get_object("../data/reviews.dat")
            except:
                print "impossible to load"
        elif new == 2:
            self.list_reviews = self.create_reviews([0 for n in range(NUM_RANDOM)], [1 for n in range(NUM_RANDOM)])
        else:
            self.parser = Parser(self.lang)
            self.grammar = FeaturesGrammar()
            self.tagger = NGramTagger(self.lang)

            url = "http://www.booking.com/reviewlist.en.html"
            self.data_pos, self.data_neg = self.parser.get_from_url(url, 50)
            self.list_reviews = self.create_reviews(self.data_pos, self.data_neg)
            self.save_reviews()
Beispiel #4
0
 def save_reviews(self):
     try:
         Serializer.save_object(self.list_reviews, '../data/reviews.dat') 
     except:
         print 'i/o error. Not saving'