def _parse_restaurant(self, restaurant_soup): restaurant = Restaurant() # ranking ranking_soup = restaurant_soup.find('span', class_='indexed-biz-name') if ranking_soup: ranking_match = re.search('(\d+)\.', ranking_soup.text) if ranking_match: restaurant.ranking = ranking_match.group(1) # name & link link_soup = restaurant_soup.find('a', class_='biz-name') if link_soup: restaurant.url = self.base_url + link_soup['href'] restaurant_name_soup = link_soup.find('span') if restaurant_name_soup: restaurant.name = restaurant_name_soup.text # reviews reviews_soup = restaurant_soup.find('span', class_='review-count') if reviews_soup: reviews_match = re.search('(\d+)', reviews_soup.text) if reviews_match: restaurant.review_count = reviews_match.group(1) # rating rating_soup = restaurant_soup.find('i', class_='star-img') if rating_soup: rating_match = re.search('(\d(\.\d)?)', rating_soup['title']) if rating_match: restaurant.rating = rating_match.group(1) # cuisines cuisines_soup = restaurant_soup.find('span', 'category-str-list') if cuisines_soup: cuisine_soup_list = cuisines_soup.find_all('a') restaurant.cuisines = [] for cuisine_soup in cuisine_soup_list: restaurant.cuisines.append(cuisine_soup.text) # address address_soup = restaurant_soup.find('address') if address_soup: address_match = re.search('(.*)(\d{5})([\s\S]*)', address_soup.text.strip()) if address_match: restaurant.address = '%s, %s' % (address_match.group(1), address_match.group(3)) restaurant.postcode = address_match.group(2) # price price_soup = restaurant_soup.find('span', class_='price-range') if price_soup: restaurant.price = price_soup.text return restaurant
def _parse_restaurant(self, restaurant_soup): restaurant = Restaurant() # name and url title_soup = restaurant_soup.find('a', class_='property_title') if title_soup is not None: restaurant.name = title_soup.text.strip() restaurant.url = self.base_url + title_soup['href'].strip() # price price_soup = restaurant_soup.find('span', class_='price_range') if price_soup is not None: restaurant.price = price_soup.text.strip() # reviews reviews_soup = restaurant_soup.find('span', class_='reviewCount') restaurant.review_count = 0 if reviews_soup is not None: reviews_match = re.findall('^\d+\.?\d+', reviews_soup.a.text.strip()) if reviews_match and len(reviews_match) > 0: restaurant.review_count = reviews_match[0].replace('.', '') # rating rating_soup = restaurant_soup.find('img', class_='sprite-ratings') if rating_soup is not None: rating = rating_soup['alt'].strip() restaurant.rating = self._parse_rating(rating) restaurant.max_rating = self._parse_max_rating(rating) # ranking ranking_soup = restaurant_soup.find('div', class_='popIndexDefault') if ranking_soup is not None: ranking = ranking_soup.text.strip().replace('.', '') restaurant.ranking = self._parse_ranking(ranking) restaurant.ranking_len = self._parse_ranking_len(ranking) # cuisines cuisine_soup_list = restaurant_soup.find_all('a', class_='cuisine') if cuisine_soup_list is not None: restaurant.cuisines = [] for tag in cuisine_soup_list: restaurant.cuisines.append(tag.text.strip()) return restaurant