def get_reviews(hotel_file): hotel = TaHotel(hotel_file) for index,tareview in enumerate( hotel.reviews): review = Review() review.business_id = hotel.id review.ratings = tareview.ratings review.assign_content(tareview.entire_content(),StopWords) yield review
def run(self): for index,fname in enumerate(self.allfiles): hotel = TaHotel(fname) reviews = [] for tareview in hotel.reviews: review = Review() review.business_id = hotel.id review.ratings = tareview.ratings review.assign_comment(tareview.entire_content(),self.stopwords) reviews.append(review) if len(reviews)>0: self.queue.put(reviews) print "[PARSER]: {}-th file<{}> parsed, {} reviews enqueued".format(index+1,fname,len(reviews))