コード例 #1
0
ファイル: model.py プロジェクト: nicorotstein/juan_alberto
    def load_data(self, new=0):
        """
        :method:: load_data  
        :param new: new == 0 try to load list already created, else create a new one.
        """
        if new == 0:
            try:
                self.list_reviews = Serializer.get_object("../data/reviews.dat")
            except:
                print "impossible to load"
        elif new == 2:
            self.list_reviews = self.create_reviews([0 for n in range(NUM_RANDOM)], [1 for n in range(NUM_RANDOM)])
        else:
            self.parser = Parser(self.lang)
            self.grammar = FeaturesGrammar()
            self.tagger = NGramTagger(self.lang)

            url = "http://www.booking.com/reviewlist.en.html"
            self.data_pos, self.data_neg = self.parser.get_from_url(url, 50)
            self.list_reviews = self.create_reviews(self.data_pos, self.data_neg)
            self.save_reviews()
コード例 #2
0
ファイル: model.py プロジェクト: nicorotstein/juan_alberto
class ReviewsList:
    """
    :class::ReviewsList
    """

    def __init__(self):
        """constructor
        :attribute::self.list_reviews:the list of reviews
        """
        self.list_reviews = []
        self.lang = "en-us"
        self.load_data(2)

    def load_data(self, new=0):
        """
        :method:: load_data  
        :param new: new == 0 try to load list already created, else create a new one.
        """
        if new == 0:
            try:
                self.list_reviews = Serializer.get_object("../data/reviews.dat")
            except:
                print "impossible to load"
        elif new == 2:
            self.list_reviews = self.create_reviews([0 for n in range(NUM_RANDOM)], [1 for n in range(NUM_RANDOM)])
        else:
            self.parser = Parser(self.lang)
            self.grammar = FeaturesGrammar()
            self.tagger = NGramTagger(self.lang)

            url = "http://www.booking.com/reviewlist.en.html"
            self.data_pos, self.data_neg = self.parser.get_from_url(url, 50)
            self.list_reviews = self.create_reviews(self.data_pos, self.data_neg)
            self.save_reviews()

    def create_reviews(self, data_pos, data_neg):
        # todo: here will pass more parameters: name, age, etc
        review_list = []
        count = 0
        print "creating reviews..."

        for revi_pos, revi_neg in zip(data_pos, data_neg):
            rev_id = count
            count += 1

            positive_tags = []  # self.tagger.tag(word_tokenize(revi_pos))
            negative_tags = []  # self.tagger.tag(word_tokenize(revi_neg))

            # positive_atts = self.grammar.parse_features(tags_pos)
            # negative_atts = self.grammar.parse_features(tags_neg)

            l1 = [
                "bathroom",
                "stuff",
                "location",
                "room",
                "bed",
                "breakfast",
                "clean",
                "food",
                "price",
                "friendly",
                "atmosphere",
                "size",
                "tranquility",
            ]
            random.shuffle(l1)
            qpos = random.randrange(1, 3)
            qneg = random.randrange(0, 3)
            positive_feats = l1[0:qpos]
            negative_feats = l1[qpos + 1 : qpos + qneg]

            positive = {"feats": positive_feats, "text": positive_tags}
            negative = {"feats": negative_feats, "text": negative_tags}

            review_list.append(Review(rev_id, positive, negative))

        return review_list

    def save_reviews(self):
        try:
            Serializer.save_object(self.list_reviews, "../data/reviews.dat")
        except:
            print "i/o error. Not saving"

    def list_features(self):
        for review in self.list_reviews:

            print "---REVIEW---"
            print "rev id: ", review.id

            print "pos: ", review.get_positive_feats()
            print "neg: ", review.get_negative_feats()

    def list_pos_tags(self):
        """
        This method prints all the reviews by id and its positives and negatives tagged text. 
        """
        for review in self.list_reviews:

            print "---REVIEW---"
            print "rev id: ", review.id

            print "pos: ", review.get_positive_text()
            print "neg: ", review.get_negative_text()

    def get_review(self, n):
        """
        :param n:the n-th element to retrieve
        :returns a review in the n-th position of list_reviews
        """
        return self.list_reviews[n]

    def get_all_reviews(self):
        """
        :returns all the list_reviews (list of reviews).
        """
        return self.list_reviews