def parse_item(self, response): soup = BeautifulSoup(response.body, 'lxml') item = TrueLocalItem() item["url"] = response.url item["name"] = utils.select_first_text(soup, "h1[itemprop=name]") try: address = soup.select("li#business-address")[0] except: address = None item["street"] = utils.select_first_text( address, "strong[itemprop=streetAddress]") item["suburb"] = utils.select_first_text( address, "span[itemprop=addressLocality]") item["state"] = utils.select_first_text( address, "span[itemprop=addressRegion]") item["local_phone"] = utils.select_first_attribute( soup, "a[phonetype=local]", "phonenumber") item["mobile_phone"] = utils.select_first_attribute( soup, "a[phonetype=mobile]", "phonenumber") item["fax_phone"] = utils.select_first_attribute( soup, "a[phonetype=fax]", "phonenumber") item["website"] = utils.select_first_attribute( soup, "li#business-links a.url", "href") yield item
def parse_item(self, response): soup = BeautifulSoup(response.body, 'lxml') item = AccommodationItem() item["name"] = utils.select_first_text(soup, ".panel.product h1") item["email"] = utils.select_first_attribute(soup, ".icon-link-email", "href") yield item
def parse_item(self, response): soup = BeautifulSoup(response.body, 'lxml') item = AccommodationItem() item["name"] = utils.select_first_text(soup, ".pagetitle") item["email"] = utils.select_first_attribute(soup, "#PageContentUserControl_lnkContactEmail", "href") yield item
def parse_item(self, response): soup = BeautifulSoup(response.body, 'lxml') item = AccommodationItem() item["name"] = utils.select_first_text(soup, ".pagetitle") item["email"] = utils.select_first_attribute( soup, "#PageContentUserControl_lnkContactEmail", "href") yield item
def parse_item(self, response): self.driver.get(response.url) soup = BeautifulSoup(self.driver.page_source, 'lxml') item = AccommodationItem() item["name"] = utils.select_first_text(soup, "#product_title") item["email"] = utils.select_first_attribute(soup, "#email a", "href") yield item
def parse_item(self, response): soup = BeautifulSoup(response.body, 'lxml') item = TrueLocalItem() item["url"] = response.url item["name"] = utils.select_first_text(soup, "h1[itemprop=name]") try: address = soup.select("li#business-address")[0] except: address = None item["street"] = utils.select_first_text(address, "strong[itemprop=streetAddress]") item["suburb"] = utils.select_first_text(address, "span[itemprop=addressLocality]") item["state"] = utils.select_first_text(address, "span[itemprop=addressRegion]") item["local_phone"] = utils.select_first_attribute(soup, "a[phonetype=local]", "phonenumber") item["mobile_phone"] = utils.select_first_attribute(soup, "a[phonetype=mobile]", "phonenumber") item["fax_phone"] = utils.select_first_attribute(soup, "a[phonetype=fax]", "phonenumber") item["website"] = utils.select_first_attribute(soup, "li#business-links a.url", "href") yield item
def parse_page(self, soup): for item in soup.select('.wrapper.clearfix'): name = select_first_text(item, '.item a') price = select_first_text(item, '.price') yield ShopItem(name=name, price=price)