def parse_init(self, hotel_url, is_hotel_page): """ select review parts of hotel html. click the first review title to expand the the review entry """ review_strainer = SoupStrainer(id='REVIEWS') self.hotel_html = TAutil.open_page(hotel_url) self.hotel_ori_url = hotel_url self.soup = BeautifulSoup(self.hotel_html, parse_only = review_strainer) # open the first review if is_hotel_page: first_review = TAutil.wrap_tripadvisor(str(self.soup.find('div', class_='quote').a['href'])) hotel_html = TAutil.open_page(first_review) self.soup = BeautifulSoup(hotel_html, parse_only = review_strainer)
def parse_init(self, city_page_url): """ initialize the soup """ city_html = TAutil.open_page(city_page_url) strainer = SoupStrainer(id="ACCOM_OVERVIEW") self.soup = BeautifulSoup(city_html, parse_only = strainer)