Example #1
0
    def parse_item(self, response):
        soup = BeautifulSoup(response.body, 'lxml')
        item = TrueLocalItem()
        item["url"] = response.url

        item["name"] = utils.select_first_text(soup, "h1[itemprop=name]")

        try:
            address = soup.select("li#business-address")[0]
        except:
            address = None

        item["street"] = utils.select_first_text(
            address, "strong[itemprop=streetAddress]")
        item["suburb"] = utils.select_first_text(
            address, "span[itemprop=addressLocality]")
        item["state"] = utils.select_first_text(
            address, "span[itemprop=addressRegion]")

        item["local_phone"] = utils.select_first_attribute(
            soup, "a[phonetype=local]", "phonenumber")
        item["mobile_phone"] = utils.select_first_attribute(
            soup, "a[phonetype=mobile]", "phonenumber")
        item["fax_phone"] = utils.select_first_attribute(
            soup, "a[phonetype=fax]", "phonenumber")

        item["website"] = utils.select_first_attribute(
            soup, "li#business-links a.url", "href")
        yield item
Example #2
0
    def parse(self, response):
        soup = BeautifulSoup(response.body, 'lxml')

        for item in soup.select(".searchResultHolder"):
            yield Request("%s%s" % (self.URL, utils.select_first_attribute(item, '.searchResultHeading a', 'href')), callback=self.parse_item)

        next_page = utils.select_first_attribute(soup, "#SearchResults_lvwPageLinks_lnkNext", 'href')
        if next_page:
            yield Request("%s%s" % (self.URL, next_page), callback=self.parse)
Example #3
0
    def parse(self, response):
        soup = BeautifulSoup(response.body, 'lxml')

        for item in soup.select(".item"):
            yield Request("%s%s" % (VisitVictoria.URL, utils.select_first_attribute(item, 'h2 a', 'href')), callback=self.parse_item)

        next_page = utils.select_first_attribute(soup, '.next a', 'href')
        if next_page:
            yield Request("%s%s" % (VisitVictoria.SEARCH, next_page), callback=self.parse)
Example #4
0
    def parse(self, response):
        soup = BeautifulSoup(response.body, 'lxml')

        for item in soup.select(".searchResultHolder"):
            yield Request("%s%s" %
                          (self.URL,
                           utils.select_first_attribute(
                               item, '.searchResultHeading a', 'href')),
                          callback=self.parse_item)

        next_page = utils.select_first_attribute(
            soup, "#SearchResults_lvwPageLinks_lnkNext", 'href')
        if next_page:
            yield Request("%s%s" % (self.URL, next_page), callback=self.parse)
Example #5
0
    def parse_item(self, response):
        soup = BeautifulSoup(response.body, 'lxml')
        item = AccommodationItem()

        item["name"] = utils.select_first_text(soup, ".panel.product h1")
        item["email"] = utils.select_first_attribute(soup, ".icon-link-email", "href")
        yield item
Example #6
0
    def parse_item(self, response):
        soup = BeautifulSoup(response.body, 'lxml')

        item = AccommodationItem()

        item["name"] = utils.select_first_text(soup, ".pagetitle")
        item["email"] = utils.select_first_attribute(soup, "#PageContentUserControl_lnkContactEmail", "href")
        yield item
Example #7
0
    def parse_item(self, response):
        soup = BeautifulSoup(response.body, 'lxml')

        item = AccommodationItem()

        item["name"] = utils.select_first_text(soup, ".pagetitle")
        item["email"] = utils.select_first_attribute(
            soup, "#PageContentUserControl_lnkContactEmail", "href")
        yield item
Example #8
0
    def parse_item(self, response):
        self.driver.get(response.url)
        soup = BeautifulSoup(self.driver.page_source, 'lxml')

        item = AccommodationItem()

        item["name"] = utils.select_first_text(soup, "#product_title")
        item["email"] = utils.select_first_attribute(soup, "#email a", "href")
        yield item
Example #9
0
    def parse(self, response):
        soup = BeautifulSoup(response.body, 'lxml')

        for item in soup.select(".resultWrapper"):
            yield Request(utils.select_first_attribute(item, '.resultLeft h3 a', 'href'), callback=self.parse_item)

        page_info = soup.select("[id*=pagingInfo_Bottom]")[0]
        is_next = page_info.select("[id*=Results_cmdNext_Bottom]")
        if is_next:
            next_page = page_info.select('span a')[-1]
            yield Request("%s%s" % (self.URL, next_page['href']), callback=self.parse)
Example #10
0
    def parse_item(self, response):
        soup = BeautifulSoup(response.body, 'lxml')
        item = TrueLocalItem()
        item["url"] = response.url

        item["name"] = utils.select_first_text(soup, "h1[itemprop=name]")

        try:
            address = soup.select("li#business-address")[0]
        except:
            address = None

        item["street"] = utils.select_first_text(address, "strong[itemprop=streetAddress]")
        item["suburb"] = utils.select_first_text(address, "span[itemprop=addressLocality]")
        item["state"] = utils.select_first_text(address, "span[itemprop=addressRegion]")

        item["local_phone"] = utils.select_first_attribute(soup, "a[phonetype=local]", "phonenumber")
        item["mobile_phone"] = utils.select_first_attribute(soup, "a[phonetype=mobile]", "phonenumber")
        item["fax_phone"] = utils.select_first_attribute(soup, "a[phonetype=fax]", "phonenumber")

        item["website"] = utils.select_first_attribute(soup, "li#business-links a.url", "href")
        yield item