def init_tagger(self): try: print 'loading tagger...' self.tagger = Serializer.get_object(self.ser_path) print 'DONE...' if self.tagger == False: print 'tagger not found in disk. Training one' self.tagger = self.train_classifier_tagger() print 'saving tagger to disk' Serializer.save_object(self.tagger, self.ser_path) print 'DONE...' except Exception: print Exception
def load_data(self,new=0): if(new == 0): try: self.list_reviews = Serializer.get_object('../data/reviews.dat') except: print 'impossible to load' elif new == 2: self.list_reviews = self.create_reviews([0 for n in range(NUM_RANDOM)], [1 for n in range(NUM_RANDOM)]) else: self.parser = Parser(self.lang) self.grammar = FeaturesGrammar() self.tagger = NGramTagger(self.lang) url = "http://www.booking.com/reviewlist.en.html" self.data_pos, self.data_neg = self.parser.get_from_url(url,50) self.list_reviews = self.create_reviews(self.data_pos, self.data_neg) self.save_reviews()
def load_data(self, new=0): """ :method:: load_data :param new: new == 0 try to load list already created, else create a new one. """ if new == 0: try: self.list_reviews = Serializer.get_object("../data/reviews.dat") except: print "impossible to load" elif new == 2: self.list_reviews = self.create_reviews([0 for n in range(NUM_RANDOM)], [1 for n in range(NUM_RANDOM)]) else: self.parser = Parser(self.lang) self.grammar = FeaturesGrammar() self.tagger = NGramTagger(self.lang) url = "http://www.booking.com/reviewlist.en.html" self.data_pos, self.data_neg = self.parser.get_from_url(url, 50) self.list_reviews = self.create_reviews(self.data_pos, self.data_neg) self.save_reviews()
def save_reviews(self): try: Serializer.save_object(self.list_reviews, '../data/reviews.dat') except: print 'i/o error. Not saving'