def parse_item(self, response): soup = BeautifulSoup(response.body, 'lxml') item = TrueLocalItem() item["url"] = response.url item["name"] = utils.select_first_text(soup, "h1[itemprop=name]") try: address = soup.select("li#business-address")[0] except: address = None item["street"] = utils.select_first_text( address, "strong[itemprop=streetAddress]") item["suburb"] = utils.select_first_text( address, "span[itemprop=addressLocality]") item["state"] = utils.select_first_text( address, "span[itemprop=addressRegion]") item["local_phone"] = utils.select_first_attribute( soup, "a[phonetype=local]", "phonenumber") item["mobile_phone"] = utils.select_first_attribute( soup, "a[phonetype=mobile]", "phonenumber") item["fax_phone"] = utils.select_first_attribute( soup, "a[phonetype=fax]", "phonenumber") item["website"] = utils.select_first_attribute( soup, "li#business-links a.url", "href") yield item
def parse(self, response): soup = BeautifulSoup(response.body, 'lxml') for item in soup.select(".searchResultHolder"): yield Request("%s%s" % (self.URL, utils.select_first_attribute(item, '.searchResultHeading a', 'href')), callback=self.parse_item) next_page = utils.select_first_attribute(soup, "#SearchResults_lvwPageLinks_lnkNext", 'href') if next_page: yield Request("%s%s" % (self.URL, next_page), callback=self.parse)
def parse(self, response): soup = BeautifulSoup(response.body, 'lxml') for item in soup.select(".item"): yield Request("%s%s" % (VisitVictoria.URL, utils.select_first_attribute(item, 'h2 a', 'href')), callback=self.parse_item) next_page = utils.select_first_attribute(soup, '.next a', 'href') if next_page: yield Request("%s%s" % (VisitVictoria.SEARCH, next_page), callback=self.parse)
def parse(self, response): soup = BeautifulSoup(response.body, 'lxml') for item in soup.select(".searchResultHolder"): yield Request("%s%s" % (self.URL, utils.select_first_attribute( item, '.searchResultHeading a', 'href')), callback=self.parse_item) next_page = utils.select_first_attribute( soup, "#SearchResults_lvwPageLinks_lnkNext", 'href') if next_page: yield Request("%s%s" % (self.URL, next_page), callback=self.parse)
def parse_item(self, response): soup = BeautifulSoup(response.body, 'lxml') item = AccommodationItem() item["name"] = utils.select_first_text(soup, ".panel.product h1") item["email"] = utils.select_first_attribute(soup, ".icon-link-email", "href") yield item
def parse_item(self, response): soup = BeautifulSoup(response.body, 'lxml') item = AccommodationItem() item["name"] = utils.select_first_text(soup, ".pagetitle") item["email"] = utils.select_first_attribute(soup, "#PageContentUserControl_lnkContactEmail", "href") yield item
def parse_item(self, response): soup = BeautifulSoup(response.body, 'lxml') item = AccommodationItem() item["name"] = utils.select_first_text(soup, ".pagetitle") item["email"] = utils.select_first_attribute( soup, "#PageContentUserControl_lnkContactEmail", "href") yield item
def parse_item(self, response): self.driver.get(response.url) soup = BeautifulSoup(self.driver.page_source, 'lxml') item = AccommodationItem() item["name"] = utils.select_first_text(soup, "#product_title") item["email"] = utils.select_first_attribute(soup, "#email a", "href") yield item
def parse(self, response): soup = BeautifulSoup(response.body, 'lxml') for item in soup.select(".resultWrapper"): yield Request(utils.select_first_attribute(item, '.resultLeft h3 a', 'href'), callback=self.parse_item) page_info = soup.select("[id*=pagingInfo_Bottom]")[0] is_next = page_info.select("[id*=Results_cmdNext_Bottom]") if is_next: next_page = page_info.select('span a')[-1] yield Request("%s%s" % (self.URL, next_page['href']), callback=self.parse)
def parse_item(self, response): soup = BeautifulSoup(response.body, 'lxml') item = TrueLocalItem() item["url"] = response.url item["name"] = utils.select_first_text(soup, "h1[itemprop=name]") try: address = soup.select("li#business-address")[0] except: address = None item["street"] = utils.select_first_text(address, "strong[itemprop=streetAddress]") item["suburb"] = utils.select_first_text(address, "span[itemprop=addressLocality]") item["state"] = utils.select_first_text(address, "span[itemprop=addressRegion]") item["local_phone"] = utils.select_first_attribute(soup, "a[phonetype=local]", "phonenumber") item["mobile_phone"] = utils.select_first_attribute(soup, "a[phonetype=mobile]", "phonenumber") item["fax_phone"] = utils.select_first_attribute(soup, "a[phonetype=fax]", "phonenumber") item["website"] = utils.select_first_attribute(soup, "li#business-links a.url", "href") yield item