Beispiel #1
0
    def _parse_restaurant(self, restaurant_soup):
        restaurant = Restaurant()

        # ranking
        ranking_soup = restaurant_soup.find('span', class_='indexed-biz-name')
        if ranking_soup:
            ranking_match = re.search('(\d+)\.', ranking_soup.text)
            if ranking_match:
                restaurant.ranking = ranking_match.group(1)

        # name & link
        link_soup = restaurant_soup.find('a', class_='biz-name')
        if link_soup:
            restaurant.url = self.base_url + link_soup['href']
            restaurant_name_soup = link_soup.find('span')
            if restaurant_name_soup:
                restaurant.name = restaurant_name_soup.text

        # reviews
        reviews_soup = restaurant_soup.find('span', class_='review-count')
        if reviews_soup:
            reviews_match = re.search('(\d+)', reviews_soup.text)
            if reviews_match:
                restaurant.review_count = reviews_match.group(1)

        # rating
        rating_soup = restaurant_soup.find('i', class_='star-img')
        if rating_soup:
            rating_match = re.search('(\d(\.\d)?)', rating_soup['title'])
            if rating_match:
                restaurant.rating = rating_match.group(1)

        # cuisines
        cuisines_soup = restaurant_soup.find('span', 'category-str-list')
        if cuisines_soup:
            cuisine_soup_list = cuisines_soup.find_all('a')
            restaurant.cuisines = []
            for cuisine_soup in cuisine_soup_list:
                restaurant.cuisines.append(cuisine_soup.text)

        # address
        address_soup = restaurant_soup.find('address')
        if address_soup:
            address_match = re.search('(.*)(\d{5})([\s\S]*)', address_soup.text.strip())
            if address_match:
                restaurant.address = '%s, %s' % (address_match.group(1), address_match.group(3))
                restaurant.postcode = address_match.group(2)

        # price
        price_soup = restaurant_soup.find('span', class_='price-range')
        if price_soup:
            restaurant.price = price_soup.text

        return restaurant
    def _parse_restaurant(self, restaurant_soup):
        restaurant = Restaurant()

        # name and url
        title_soup = restaurant_soup.find('a', class_='property_title')
        if title_soup is not None:
            restaurant.name = title_soup.text.strip()
            restaurant.url = self.base_url + title_soup['href'].strip()

        # price
        price_soup = restaurant_soup.find('span', class_='price_range')
        if price_soup is not None:
            restaurant.price = price_soup.text.strip()

        # reviews
        reviews_soup = restaurant_soup.find('span', class_='reviewCount')
        restaurant.review_count = 0
        if reviews_soup is not None:
            reviews_match = re.findall('^\d+\.?\d+', reviews_soup.a.text.strip())
            if reviews_match and len(reviews_match) > 0:
                restaurant.review_count = reviews_match[0].replace('.', '')

        # rating
        rating_soup = restaurant_soup.find('img', class_='sprite-ratings')
        if rating_soup is not None:
            rating = rating_soup['alt'].strip()
            restaurant.rating = self._parse_rating(rating)
            restaurant.max_rating = self._parse_max_rating(rating)

        # ranking
        ranking_soup = restaurant_soup.find('div', class_='popIndexDefault')
        if ranking_soup is not None:
            ranking = ranking_soup.text.strip().replace('.', '')
            restaurant.ranking = self._parse_ranking(ranking)
            restaurant.ranking_len = self._parse_ranking_len(ranking)

        # cuisines
        cuisine_soup_list = restaurant_soup.find_all('a', class_='cuisine')
        if cuisine_soup_list is not None:
            restaurant.cuisines = []
            for tag in cuisine_soup_list:
                restaurant.cuisines.append(tag.text.strip())
        return restaurant