コード例 #1
0
ファイル: truelocal.py プロジェクト: dsantmajor/trolley
    def parse_item(self, response):
        soup = BeautifulSoup(response.body, 'lxml')
        item = TrueLocalItem()
        item["url"] = response.url

        item["name"] = utils.select_first_text(soup, "h1[itemprop=name]")

        try:
            address = soup.select("li#business-address")[0]
        except:
            address = None

        item["street"] = utils.select_first_text(
            address, "strong[itemprop=streetAddress]")
        item["suburb"] = utils.select_first_text(
            address, "span[itemprop=addressLocality]")
        item["state"] = utils.select_first_text(
            address, "span[itemprop=addressRegion]")

        item["local_phone"] = utils.select_first_attribute(
            soup, "a[phonetype=local]", "phonenumber")
        item["mobile_phone"] = utils.select_first_attribute(
            soup, "a[phonetype=mobile]", "phonenumber")
        item["fax_phone"] = utils.select_first_attribute(
            soup, "a[phonetype=fax]", "phonenumber")

        item["website"] = utils.select_first_attribute(
            soup, "li#business-links a.url", "href")
        yield item
コード例 #2
0
    def parse_item(self, response):
        soup = BeautifulSoup(response.body, 'lxml')
        item = AccommodationItem()

        item["name"] = utils.select_first_text(soup, ".panel.product h1")
        item["email"] = utils.select_first_attribute(soup, ".icon-link-email", "href")
        yield item
コード例 #3
0
ファイル: southaustralia.py プロジェクト: johnjiang/trolley
    def parse_item(self, response):
        soup = BeautifulSoup(response.body, 'lxml')

        item = AccommodationItem()

        item["name"] = utils.select_first_text(soup, ".pagetitle")
        item["email"] = utils.select_first_attribute(soup, "#PageContentUserControl_lnkContactEmail", "href")
        yield item
コード例 #4
0
    def parse_item(self, response):
        soup = BeautifulSoup(response.body, 'lxml')

        item = AccommodationItem()

        item["name"] = utils.select_first_text(soup, ".pagetitle")
        item["email"] = utils.select_first_attribute(
            soup, "#PageContentUserControl_lnkContactEmail", "href")
        yield item
コード例 #5
0
ファイル: westernaustralia.py プロジェクト: johnjiang/trolley
    def parse_item(self, response):
        self.driver.get(response.url)
        soup = BeautifulSoup(self.driver.page_source, 'lxml')

        item = AccommodationItem()

        item["name"] = utils.select_first_text(soup, "#product_title")
        item["email"] = utils.select_first_attribute(soup, "#email a", "href")
        yield item
コード例 #6
0
ファイル: truelocal.py プロジェクト: johnjiang/trolley
    def parse_item(self, response):
        soup = BeautifulSoup(response.body, 'lxml')
        item = TrueLocalItem()
        item["url"] = response.url

        item["name"] = utils.select_first_text(soup, "h1[itemprop=name]")

        try:
            address = soup.select("li#business-address")[0]
        except:
            address = None

        item["street"] = utils.select_first_text(address, "strong[itemprop=streetAddress]")
        item["suburb"] = utils.select_first_text(address, "span[itemprop=addressLocality]")
        item["state"] = utils.select_first_text(address, "span[itemprop=addressRegion]")

        item["local_phone"] = utils.select_first_attribute(soup, "a[phonetype=local]", "phonenumber")
        item["mobile_phone"] = utils.select_first_attribute(soup, "a[phonetype=mobile]", "phonenumber")
        item["fax_phone"] = utils.select_first_attribute(soup, "a[phonetype=fax]", "phonenumber")

        item["website"] = utils.select_first_attribute(soup, "li#business-links a.url", "href")
        yield item
コード例 #7
0
 def parse_page(self, soup):
     for item in soup.select('.wrapper.clearfix'):
         name = select_first_text(item, '.item a')
         price = select_first_text(item, '.price')
         yield ShopItem(name=name, price=price)