def parse_item(self, response): loader = IeBusinessLoader(item=IeBusinessItem(), response=response) loader.add_value("source", self.name) # These are the standard ones loader.add_xpath("name", '//td[@itemprop="name"]/text()') loader.add_xpath("business_type", '//meta[@property="og:type"]/@content') loader.add_value("url", response.url) loader.add_xpath("address", '//td[text()="Address:"]/following-sibling::td[1]/text()') loader.add_xpath("address", '//td[@itemprop="location"]/text()') loader.add_xpath("latitude", "//script", re='Latitude"\s*:\s*([\d\-\.]+)') loader.add_xpath("longitude", "//script", re='Longitude"\s*:\s*([\d\-\.]+)') # loader.add_xpath('postal_code', '//meta[@property="og:postal-code"]/@content') # loader.add_xpath('email', '//meta[@property="og:email"]/@content') loader.add_xpath("phone_number", '//td[@itemprop="phone"]/text()') # loader.add_xpath('fax_number', '//meta[@property="og:fax_number"]/@content') # loader.add_xpath('country', '//meta[@property="og:country-name"]/@content') # The ones where there is a special page for a business # loader.add_xpath('country', '//span[@property="v:name"]/text()') # Presumably loader.add_value("business_type", "company") # There must be a postcode? # loader.add_xpath('url', '//meta[@property="og:url"]/@content') # loader.add_xpath('address', '//span[@property="v:street-address"]/text()') loader.add_xpath("website", '//td[text()="Web:"]/following-sibling::td[1]/a/text()') # loader.add_xpath('phone_number', '//span[@property="v:tel"]/text()') return loader.load_item()
def parse_item(self, response): loader = IeBusinessLoader(item=IeBusinessItem(), response=response) loader.add_value("source", response.url) # These are the standard ones loader.add_xpath("name", '//meta[@property="og:title"]/@content') loader.add_xpath("business_type", '//meta[@property="og:type"]/@content') loader.add_xpath("url", '//meta[@property="og:url"]/@content') loader.add_xpath("address", '//meta[@property="og:street-address"]/@content') loader.add_xpath("latitude", '//meta[@property="og:latitude"]/@content') loader.add_xpath("longitude", '//meta[@property="og:longitude"]/@content') loader.add_xpath("postal_code", '//meta[@property="og:postal-code"]/@content') loader.add_xpath("email", '//meta[@property="og:email"]/@content') loader.add_xpath("phone_number", '//meta[@property="og:phone_number"]/@content') loader.add_xpath("phone_number", '//div[@itemprop="telephone"]/text()') loader.add_xpath("fax_number", '//meta[@property="og:fax_number"]/@content') loader.add_xpath("country", '//meta[@property="og:country-name"]/@content') # The ones where there is a special page for a business loader.add_xpath("country", '//span[@property="v:name"]/text()') # Presumably loader.add_value("business_type", "company") # There must be a postcode? loader.add_xpath("url", '//meta[@property="og:url"]/@content') loader.add_xpath("address", '//span[@property="v:street-address"]/text()') loader.add_xpath("website", '//a[@property="v:url"]/text()') loader.add_xpath("phone_number", '//span[@property="v:tel"]/text()') return loader.load_item()