def load_data(self, new=0): """ :method:: load_data :param new: new == 0 try to load list already created, else create a new one. """ if new == 0: try: self.list_reviews = Serializer.get_object("../data/reviews.dat") except: print "impossible to load" elif new == 2: self.list_reviews = self.create_reviews([0 for n in range(NUM_RANDOM)], [1 for n in range(NUM_RANDOM)]) else: self.parser = Parser(self.lang) self.grammar = FeaturesGrammar() self.tagger = NGramTagger(self.lang) url = "http://www.booking.com/reviewlist.en.html" self.data_pos, self.data_neg = self.parser.get_from_url(url, 50) self.list_reviews = self.create_reviews(self.data_pos, self.data_neg) self.save_reviews()
class ReviewsList: """ :class::ReviewsList """ def __init__(self): """constructor :attribute::self.list_reviews:the list of reviews """ self.list_reviews = [] self.lang = "en-us" self.load_data(2) def load_data(self, new=0): """ :method:: load_data :param new: new == 0 try to load list already created, else create a new one. """ if new == 0: try: self.list_reviews = Serializer.get_object("../data/reviews.dat") except: print "impossible to load" elif new == 2: self.list_reviews = self.create_reviews([0 for n in range(NUM_RANDOM)], [1 for n in range(NUM_RANDOM)]) else: self.parser = Parser(self.lang) self.grammar = FeaturesGrammar() self.tagger = NGramTagger(self.lang) url = "http://www.booking.com/reviewlist.en.html" self.data_pos, self.data_neg = self.parser.get_from_url(url, 50) self.list_reviews = self.create_reviews(self.data_pos, self.data_neg) self.save_reviews() def create_reviews(self, data_pos, data_neg): # todo: here will pass more parameters: name, age, etc review_list = [] count = 0 print "creating reviews..." for revi_pos, revi_neg in zip(data_pos, data_neg): rev_id = count count += 1 positive_tags = [] # self.tagger.tag(word_tokenize(revi_pos)) negative_tags = [] # self.tagger.tag(word_tokenize(revi_neg)) # positive_atts = self.grammar.parse_features(tags_pos) # negative_atts = self.grammar.parse_features(tags_neg) l1 = [ "bathroom", "stuff", "location", "room", "bed", "breakfast", "clean", "food", "price", "friendly", "atmosphere", "size", "tranquility", ] random.shuffle(l1) qpos = random.randrange(1, 3) qneg = random.randrange(0, 3) positive_feats = l1[0:qpos] negative_feats = l1[qpos + 1 : qpos + qneg] positive = {"feats": positive_feats, "text": positive_tags} negative = {"feats": negative_feats, "text": negative_tags} review_list.append(Review(rev_id, positive, negative)) return review_list def save_reviews(self): try: Serializer.save_object(self.list_reviews, "../data/reviews.dat") except: print "i/o error. Not saving" def list_features(self): for review in self.list_reviews: print "---REVIEW---" print "rev id: ", review.id print "pos: ", review.get_positive_feats() print "neg: ", review.get_negative_feats() def list_pos_tags(self): """ This method prints all the reviews by id and its positives and negatives tagged text. """ for review in self.list_reviews: print "---REVIEW---" print "rev id: ", review.id print "pos: ", review.get_positive_text() print "neg: ", review.get_negative_text() def get_review(self, n): """ :param n:the n-th element to retrieve :returns a review in the n-th position of list_reviews """ return self.list_reviews[n] def get_all_reviews(self): """ :returns all the list_reviews (list of reviews). """ return self.list_reviews