def parse_restaurant(result_soup): restaurant = Restaurant() # name name_soup = result_soup.find('a', class_='name') if name_soup is not None: restaurant.name = name_soup.text.strip() # address address1_soup = result_soup.find('strong') if address1_soup is not None: address_match = re.search('(.*) (\d{5})', address1_soup.text.strip()) if address_match: restaurant.address = address_match.group(1) restaurant.postcode = address_match.group(2) address2_soup = result_soup.find('span', class_='address') if address2_soup is not None: restaurant.address += ' ' + address2_soup.text.strip() # cuisines cuisine_soup = result_soup.find('span', class_='tipo-label') if cuisine_soup is not None: restaurant.cuisines = [cuisine_soup.text.strip()] return restaurant
def _parse_restaurant(self, restaurant_soup): restaurant = Restaurant() # ranking ranking_soup = restaurant_soup.find('span', class_='indexed-biz-name') if ranking_soup: ranking_match = re.search('(\d+)\.', ranking_soup.text) if ranking_match: restaurant.ranking = ranking_match.group(1) # name & link link_soup = restaurant_soup.find('a', class_='biz-name') if link_soup: restaurant.url = self.base_url + link_soup['href'] restaurant_name_soup = link_soup.find('span') if restaurant_name_soup: restaurant.name = restaurant_name_soup.text # reviews reviews_soup = restaurant_soup.find('span', class_='review-count') if reviews_soup: reviews_match = re.search('(\d+)', reviews_soup.text) if reviews_match: restaurant.review_count = reviews_match.group(1) # rating rating_soup = restaurant_soup.find('i', class_='star-img') if rating_soup: rating_match = re.search('(\d(\.\d)?)', rating_soup['title']) if rating_match: restaurant.rating = rating_match.group(1) # cuisines cuisines_soup = restaurant_soup.find('span', 'category-str-list') if cuisines_soup: cuisine_soup_list = cuisines_soup.find_all('a') restaurant.cuisines = [] for cuisine_soup in cuisine_soup_list: restaurant.cuisines.append(cuisine_soup.text) # address address_soup = restaurant_soup.find('address') if address_soup: address_match = re.search('(.*)(\d{5})([\s\S]*)', address_soup.text.strip()) if address_match: restaurant.address = '%s, %s' % (address_match.group(1), address_match.group(3)) restaurant.postcode = address_match.group(2) # price price_soup = restaurant_soup.find('span', class_='price-range') if price_soup: restaurant.price = price_soup.text return restaurant