def parse(self, response): data = json.loads(response.body_as_unicode()) for store in data['restaurants']: addr_full = "{}, {}, {} {}".format(store['streetaddress'], store['city'], store['state'], store['zip']) properties = { "ref": store['id'], "name": store['name'], "addr_full": addr_full, "city": store['city'], "state": store['state'], "postcode": store['zip'], "country": store['country'], "lon": float(store['longitude']), "lat": float(store['latitude']), "phone": store['telephone'], } yield GeojsonPointItem(**properties)
def parse_location(self, response): ref = re.search(r'.+/(.+?)/?(?:\.html|$)', response.url).group(1) data = json.loads(response.xpath('//script[@type="application/ld+json" and contains(text(), "streetAddress")]/text()').extract_first()) mapdata = response.xpath('//div[@class="clearfix map_equipment"]/script[2]').extract_first() lat = re.search('(?:lat":)(-?\d+\.\d+),.*(?:long":)(-?\d*.\d*)', mapdata).group(1) lon = re.search('(?:lat":)(-?\d+\.\d+),.*(?:long":)(-?\d*.\d*)', mapdata).group(2) properties = { 'ref': ref, 'name': data["name"], 'addr_full': data["address"]["streetAddress"], 'city': data["address"]["addressLocality"], 'state': data["address"]["addressRegion"], 'postcode': data["address"]["postalCode"], 'phone': data.get("telephone"), 'lat': float(lat), 'lon': float(lon), 'website': data.get("url") } yield GeojsonPointItem(**properties)
def parse(self, response): data = response.body_as_unicode() data.replace('" ', '"') data.replace(' "', '"') results = json.loads(data) results = results["content"]["restaurants"] for data in results: properties = { 'ref': data['id'], 'lon': float(data['longitude']), 'lat': float(data['latitude']), } contact_info = data['name'][:data['name'].find("<br")] name = contact_info[:contact_info.find("</br")] properties["name"] = name properties["addr_full"] = data['name'][data['name'].find("<small>"):-8][8:] yield GeojsonPointItem(**properties)
def parse_info(self, response): data = json.loads(response.body_as_unicode()) for row in data: properties = { "ref": row['community_id'], "name": row['name'], "lat": row['latitude'], "lon": row['longitude'], "addr_full": row['address1'], "city": row['city'], "state": row['state'], "country": row['country_code'], "postcode": row['zip_postal_code'], "website": row['website'], "phone": row['contact_center_phone'], } yield GeojsonPointItem(**properties)
def parse_details(self, response): name = street = zip = city = phone = website = latitude = longitude = "" name = response.xpath('//h1[@itemprop="name"]/text()').get() street = response.xpath( '//span[@itemprop="streetAddress"]/text()').get() zip = response.xpath('//span[@itemprop="postalCode"]/text()').get() city = response.xpath( '//span[@itemprop="addressLocality"]/text()').get() phone = response.xpath( '//li[@itemprop="telephone"]/a/span/text()').get() website = response.xpath('//li[@itemprop="url"]/a/span/text()').get() m = re.search(r'lat":([-+]?[0-9]*\.?[0-9]*)', response.text) if m: latitude = m.group(1) m = re.search(r'lng":([-+]?[0-9]*\.?[0-9]*)', response.text) if m: longitude = m.group(1) hours = response.xpath( '//p[@itemprop="openingHoursSpecification"]/text()').getall() properties = { 'ref': response.request.url, 'name': name, 'city': city, 'street': street, 'postcode': zip, 'phone': phone, 'website': website, 'lat': latitude, 'lon': longitude } if hours: properties['opening_hours'] = self.process_hours(hours) yield GeojsonPointItem(**properties)
def parse_store(self, response): properties = { 'ref': response.url, 'lat': response.xpath('//div[@class="map-container"]/div/@data-latitude' ).extract_first(), 'lon': response.xpath('//div[@class="map-container"]/div/@data-longitude' ).extract_first(), 'phone': response.xpath( '//a[@class="phone-link"]/span/text()').extract_first(), 'addr_full': response.xpath('//span[@itemprop="streetAddress"]/text()' ).extract_first().strip(), 'name': response.xpath( '//meta[@itemprop="name legalName"]/@content').extract_first(), 'city': response.xpath('//span[@itemprop="addressLocality"]/text()' ).extract_first()[:-1], 'state': response.xpath('//span[@itemprop="addressRegion"]/text()' ).extract_first().strip(), 'postcode': response.xpath('//span[@itemprop="postalCode"]/text()'). extract_first().strip(), } o = OpeningHours() for h in response.css('#LocalMapAreaOpenHourBanner li.h-day'): day = h.xpath('em/span/text()').extract_first().strip()[:2] day_range = h.xpath('em/text()').extract_first().strip(':').strip() open_time, close_time = day_range.split(' - ') o.add_range(day, open_time, close_time, '%I:%M %p') properties['opening_hours'] = o.as_opening_hours() yield GeojsonPointItem(**properties)
def parse_location(self, response): opening_hours = response.css('.js-location-hours').xpath( '@data-days').extract_first() if opening_hours: opening_hours = json.loads(opening_hours) opening_hours = self.normalize_hours(opening_hours) props = { 'addr_full': response.xpath( '//meta[@itemprop="streetAddress"]/@content').extract_first(), 'lat': float( response.xpath( '//meta[@itemprop="latitude"]/@content').extract_first()), 'lon': float( response.xpath( '//meta[@itemprop="longitude"]/@content').extract_first()), 'city': response.xpath( '//span[@class="c-address-city"]/text()').extract_first(), 'postcode': response.xpath('//span[@class="c-address-postal-code"]/text()'). extract_first(), 'state': response.xpath( '//abbr[@class="c-address-state"]/text()').extract_first(), 'phone': response.xpath( '//span[@class="c-phone-number-span c-phone-main-number-span"]/text()' ).extract_first(), 'ref': response.url, 'website': response.url, 'opening_hours': opening_hours } return GeojsonPointItem(**props)
def parse(self, response): for store in response.xpath('//div[@class="item"]//div[@class="row"]'): street = city = zip = lat = lon = '' address = store.xpath( './/div[@class="col-xs-12 col-sm-6 col-md-3 col-lg-3"]//p' ).get() match = re.search(r'<p>(.*?)<br>', address) if match: street = match.group(1) match = re.search(r'(\d{5}) (.*?)<\/p>', address) if match: zip = match.group(1) city = match.group(2) position = store.xpath( './/div[@class="col-xs-12 col-sm-6 col-md-2 col-lg-2 actions"]' '//a/@href').get() if position: match = re.search(r'lng=(.*?)&lat=(.*?)&', position) if match: lat = match.group(2) lon = match.group(1) properties = { 'ref': f"{lat}_{lon}", 'street': street, 'city': city, 'postcode': zip, 'country': 'DE', 'lat': lat, 'lon': lon, } hours = self.parse_hours(store) if hours: properties["opening_hours"] = hours yield GeojsonPointItem(**properties)
def parse_store(self, response): ref = re.search(r'.+/(.+)', response.url).group(1) properties = { 'ref': ref.strip('/'), 'name': response.xpath('//h2[@class="About-title"]/text()').extract_first( ).strip('About '), 'addr_full': response.xpath( '//meta[@itemprop="streetAddress"]/@content').extract_first(), 'city': response.xpath('//meta[@itemprop="addressLocality"]/@content'). extract_first(), 'state': response.xpath( '//meta[@itemprop="addressRegion"]/@content').extract_first(), 'postcode': response.xpath( '//meta[@itemprop="postalCode"]/@content').extract_first(), 'country': response.xpath( '//meta[@itemprop="addressCountry"]/@content').extract_first(), 'phone': response.xpath( '//meta[@itemprop="telephone"]/@content').extract_first(), 'website': response.url, 'lat': float( response.xpath( '//meta[@itemprop="latitude"]/@content').extract_first()), 'lon': float( response.xpath( '//meta[@itemprop="longitude"]/@content').extract_first()), } yield GeojsonPointItem(**properties)
def parse(self, response): data = json.loads(response.body_as_unicode()) for store in data['results']: open_hours = self.parse_hours(store['opening_hours']) properties = { "ref": store['url'], "name": store['name'], "opening_hours": open_hours, "website": "https://www.coop.co.uk" + store['url'], "addr_full": " ".join([ store['street_address'], store['street_address2'], store['street_address3'] ]), "city": store['town'], "postcode": store['postcode'], "country": 'United Kingdom', "lon": float(store['position']['x']), "lat": float(store['position']['y']), "phone": store["phone"], } yield GeojsonPointItem(**properties) if data['next'] is not None: self.page_number = self.page_number + 1 yield scrapy.Request(self.start_urls[0] + '&page=' + str(self.page_number))
def parse(self, response): next_page = response.xpath( '//li[@class="next"]/a/@href').extract_first() stores = response.xpath('//ol[contains(@class,"results")]/li') for store in stores: position = re.search( r'\?daddr=(.*),(.*)', store.xpath('.//a[contains(.,"Driving Directions")]/@href'). extract_first()) phone = store.xpath( './/span[contains(@class,"tel")]/text()').extract_first() if phone: phone = phone.replace('- Main', '').strip() yield GeojsonPointItem( lat=float(position[1]), lon=float(position[2]), phone=phone, website=store.xpath( './/a[contains(.,"View Our Website")]/@href' ).extract_first(), ref=store.xpath( './/div[contains(@class,"org")]/text()').extract_first(), addr_full=store.xpath( './/div[contains(@class,"street-address")]/text()'). extract_first(), city=store.xpath('.//span[contains(@class,"locality")]/text()' ).extract_first().rstrip(','), state=store.xpath('.//span[contains(@class,"region")]/text()' ).extract_first().strip(), postcode=store.xpath( './/span[contains(@class,"postal-code")]/text()'). extract_first().strip(), country='USA', ) if next_page: yield scrapy.Request(response.urljoin(next_page))
def parse(self, response): xxs = Selector(response) pois = xxs.xpath('//poi').extract() for poi in pois: state = Selector(text=poi).xpath('//state/text()').get() if state == None: state = Selector(text=poi).xpath('//province/text()').get() addr = Selector(text=poi).xpath('//address1/text()').get() if addr == None: addr = Selector(text=poi).xpath('//address2/text()').get() if addr == None: addr = Selector(text=poi).xpath('//dsply_adr/text()').get() name = Selector(text=poi).xpath('//name/text()').get() name = name.replace('<br>', '') name = name.replace('®', ' ') name = name.replace(';', '') name = name.replace(' ', ' ') properties = { 'ref': Selector(text=poi).xpath('//clientkey/text()').get(), 'name': name, 'addr_full': addr, 'city': Selector(text=poi).xpath('//city/text()').get(), 'state': state, 'postcode': Selector(text=poi).xpath('//postalcode/text()').get(), 'country': Selector(text=poi).xpath('//country/text()').get(), 'lat': Selector(text=poi).xpath('//latitude/text()').get(), 'lon': Selector(text=poi).xpath('//longitude/text()').get(), 'phone': Selector(text=poi).xpath('//phone/text()').get(), 'extras': { 'brand': "Timberland" } } yield GeojsonPointItem(**properties)
def parse_store(self, response): properties = { 'name': response.xpath( '//span[@class="LocationName-geo"]/text()').extract_first(), 'website': response.url, 'ref': response.url, 'addr_full': response.xpath('//span[@itemprop="streetAddress"]/span/text()' ).extract_first(), 'city': response.xpath( '//span[@itemprop="addressLocality"]/text()').extract_first(), 'state': response.xpath( '//abbr[@itemprop="addressRegion"]/text()').extract_first(), 'postcode': response.xpath('//span[@itemprop="postalCode"]/text()'). extract_first().strip(), 'lat': float( response.xpath( '//meta[@itemprop="latitude"]/@content').extract_first()), 'lon': float( response.xpath( '//meta[@itemprop="longitude"]/@content').extract_first()), } hours = json.loads( response.xpath( '//div[@class="c-location-hours-details-wrapper js-location-hours"]/@data-days' ).extract_first()) opening_hours = self.store_hours(hours) if hours else None if opening_hours: properties['opening_hours'] = opening_hours yield GeojsonPointItem(**properties)
def parse_entity(self, ldjson, page_ref): # Each feature has a BankOrCreditUnion and then either a FinancialService # or an AutomatedTeller. Loop over them to find a single item. properties = {"extras": {}} for ent in json.loads(ldjson): if "geo" not in ent: continue properties.update({ "ref": page_ref, "website": ent["url"], "lat": ent["geo"]["latitude"], "lon": ent["geo"]["longitude"], "addr_full": ent["address"]["streetAddress"], "city": ent["address"]["addressLocality"], "state": ent["address"]["addressRegion"], "postcode": ent["address"]["postalCode"], "country": ent["address"]["addressCountry"]["name"], }) if ent["@type"] != "AutomatedTeller": # Skip over ATM customer service and the shorter name properties.update({ "name": ent["name"], "phone": ent["telephone"] }) if "openingHours" in ent: properties.update({"opening_hours": ent["openingHours"]}) if ent["@type"] != "BankOrCreditUnion": # i.e. the interesting of the two types properties["extras"].update({"type": ent["@type"]}) return GeojsonPointItem(**properties)
def parse(self, response): stores = response.xpath( '//table[@id="store-search-result"]/tbody/tr[@class="" or @class="store-grey"]' ) for store in stores: properties = { "ref": store.xpath('td[@class="store-result-address"]/text()' ).extract_first(), "name": store.xpath('td[@class="store-result-address"]/text()' ).extract_first(), "opening_hours": self.store_hours( store.xpath( 'td[@class="store-result-address"]/text()[last()-1]'). extract_first()), "addr_full": store.xpath('td[@class="store-result-address"]/text()') [1].extract(), "city": self.city( store.xpath('td[@class="store-result-address"]/text()') [2].extract()), "state": self.state( store.xpath('td[@class="store-result-address"]/text()') [2].extract()), "postcode": self.postCode( store.xpath('td[@class="store-result-address"]/text()') [2].extract()), "phone": self.phone( store.xpath('td[@class="store-result-phone"]/strong/text()' )[0].extract()), } yield GeojsonPointItem(**properties)
def parse_store(self, response): properties = { 'name': response.xpath( '//meta[@property="og:title"]/@content').extract_first(), 'ref': response.url, 'addr_full': response.xpath('//meta[@property="og:street_address"]/@content' ).extract_first(), 'city': response.xpath( '//meta[@property="og:locality"]/@content').extract_first(), 'state': response.xpath( '//meta[@property="og:region"]/@content').extract_first(), 'postcode': response.xpath( '//meta[@property="og:postal_code"]/@content').extract_first(), 'country': response.xpath('//meta[@property="og:country_name"]/@content'). extract_first(), 'phone': response.xpath( '//@href[contains(.,"tel:")]').extract_first().replace( 'tel:', ''), 'website': response.url, 'opening_hours': ''.join(response.css('.oh-wrapper ::text').extract()), 'lon': response.xpath( '//meta[@property="og:longitude"]/@content').extract_first(), 'lat': response.xpath( '//meta[@property="og:latitude"]/@content').extract_first(), } yield GeojsonPointItem(**properties)
def parse(self, response): data = json.loads(response.body_as_unicode()) places = data["outlets"] for place in places: address = place["address"] city = place["town"].split(',') addr = address.split(', ') if len(addr) == 4: str_addr = addr[0] state = addr[2] postal = addr[3] elif len(addr) == 3: str_addr = addr[0] state = "" postal = addr[2] elif len(addr) == 5: str_addr = addr[0] state = addr[3] postal = addr[4] elif len(addr) == 6: str_addr = addr[0] state = addr[4] postal = addr[5] properties = { 'ref': place["phc"], 'name': place["name"], 'addr_full': str_addr, 'city': city[0], 'state': state, 'postcode': postal, 'country': "GB", 'lat': place["lat"], 'lon': place["lng"], 'phone': place["tel"], 'website': place["url"] } yield GeojsonPointItem(**properties)
def parse_location(self, response): properties = { 'name': response.xpath( '//h1[@class="lp-yellow-text"]/text()').extract_first(), 'addr_full': response.xpath( '//span[@itemprop="streetAddress"]/text()').extract_first(), 'city': response.xpath( '//span[@itemprop="addressLocality"]/text()').extract_first(), 'state': response.xpath( '//span[@itemprop="addressRegion"]/text()').extract_first(), 'postcode': response.xpath( '//span[@itemprop="postalCode"]/text()').extract_first(), 'phone': response.xpath( '//a[@itemprop="telephone"]/text()').extract_first(), 'opening_hours': response.xpath( '//tr[@itemprop="openingHours"]/@datetime').extract_first(), 'ref': response.request.url, 'website': response.request.url, 'lon': float( response.xpath( '//meta[@name="place:location:longitude"]/@content'). extract_first()), 'lat': float( response.xpath( '//meta[@name="place:location:latitude"]/@content'). extract_first()), } yield GeojsonPointItem(**properties)
def parse_location(self, response): store_js = response.xpath( '//script[@type="text/javascript" and contains(text(), "formattedAddress")]/text()' ).extract() storetext = store_js[0] data = json.loads( response.xpath( '//script[@type="application/ld+json" and contains(text(), "streetAddress")]/text()' ).extract_first()) json_prelim_data = re.search('name(.*)', storetext).group() json_data = json_prelim_data.split(",") lat = 0.0 lon = 0.0 if json_data[5].startswith('"lati'): lat = float(json_data[5].replace('"latitude":', '')) lon = float(json_data[6].replace('"longitude":', '')) elif json_data[6].startswith('"lati'): lat = float(json_data[6].replace('"latitude":', '')) lon = float(json_data[7].replace('"longitude":', '')) else: for i in json_data: if i.startswith('"lati'): lat = float(i.replace('"latitude":', '')) elif i.startswith('"longit'): lon = float(i.replace('"longitude":', '')) properties = { 'ref': (json_data[0].replace('name":', '').strip('"')), 'name': data['description'], 'addr_full': data['address']['streetAddress'], 'city': data['address']['addressLocality'], 'state': data['address']['addressRegion'], 'postcode': data['address']['postalCode'], 'country': data['address']['addressCountry'], 'phone': data.get("telephone"), 'lat': lat, 'lon': lon, } yield GeojsonPointItem(**properties)
def parse_stores(self, response): delivery_only = response.xpath( '//div[@class="hours-wrapper"]/p/strong/text()').extract_first() if delivery_only != 'DELIVERY HOURS': locationid = re.search(r'.*/locations/(.*)/$', response.url).groups()[0] location = [ loc for loc in self.locations if loc["LocationId"] == locationid ][0] properties = { 'addr_full': response.xpath('//span[@itemprop="streetAddress"]/text()' ).extract_first().strip(','), 'phone': response.xpath( '//span[@itemprop="telephone"]/text()').extract_first(), 'name': response.xpath( '//h2[@class="ppb_title"]/text()').extract_first(), 'city': response.xpath('//span[@itemprop="addressLocality"]/text()' ).extract_first().strip(','), 'state': response.xpath('//span[@itemprop="addressRegion"]/text()'). extract_first(), 'postcode': response.xpath( '//span[@itemprop="postalCode"]/text()').extract_first(), 'ref': locationid, 'website': response.url, 'lat': float(location["lat"]), 'lon': float(location["lng"]), } yield GeojsonPointItem(**properties)
def parse(self, response): data = json.loads(response.body_as_unicode()) stores = data["stores"] for store in stores: addr_1 = store["address_1"] addr_2 = store["address_2"] addr_3 = store["address_3"] properties = { 'name': store["name"], 'ref': store["id"], 'addr_full': re.sub( ' +', ' ', ' '.join(filter(None, [addr_1, addr_2, addr_3])).strip()), 'city': store["city"], 'state': store["state"], 'postcode': store["postal_code"], 'country': store["country_code"], 'phone': store.get("phone_number"), 'website': response.url, 'lat': float(store["latitude"]), 'lon': float(store["longitude"]), 'extras': { 'store_type': store["type"], }, } yield GeojsonPointItem(**properties)
def parse(self, response): #high-level list of states stri = response.text begin_st = stri.find('t=[[')+2 end_st = stri[begin_st:].find(']]')+begin_st+2 shops = json.loads(stri[begin_st:end_st].replace('"','\\"').replace("\'", '"')) for shop in shops: address_parts = re.search(r"(.*),\s*(.*)\s*,\s*(\D{2})\s*(\d{5})",shop[1]) try: state = address_parts[3] except Exception as e: state = '' try: addess = address_parts[1] except Exception as e: addess = '' try: city = address_parts[2] except Exception as e: city = '' try: zip_code = address_parts[4] except Exception as e: zip_code = '' yield GeojsonPointItem( lat=float(shop[2]), lon=float(shop[3]), phone=shop[4], ref=shop[0], opening_hours=self.store_hours(shop[8:15]), addr_full=addess, city=city, state=state, postcode=zip_code, country='US', )
def parse(self, response): # high-level list of states washers_str = response.xpath('//script[contains(.,"markers =")]').extract_first() j_beg = washers_str.find('markers =') + 10 j_end = washers_str.find('\n\t', j_beg) wash_list = json.loads(washers_str[j_beg:j_end].strip().rstrip(';')) for wash in wash_list: address_parts = re.match(r"(.*),\s*(\D{2,}\s?\D{2,}?\s?\D*)\s*,\s*(\D{2})\s*(\d{5})?,(\D{4,})?",wash['address']) if not address_parts: address_parts = re.match(r"(.*),\s*(\D{2,}\s?\D{2,}?\s?\D*)\s(\D{2})",wash['address']) if not address_parts: address_parts = re.match(r"(.*),\s?(\D*)",wash['address']) try: zip_code = address_parts[4] except Exception: zip_code = '' try: state = address_parts[3] except Exception: state = '' try: country = address_parts[6].strip() except Exception: country = 'US' phone = self.phone_normalize(wash['infoContent'][wash['infoContent'].find('<b>Phone:</b>')+13:wash['infoContent'].find('/div',wash['infoContent'].find('<b>Phone:</b>'))-1]) yield GeojsonPointItem( lat=float(wash['lat']), lon=float(wash['lng']), phone=phone, website='http://mistercarwash.com/locations/' + wash['name'].lower().replace(' ', '-'), ref=wash['loc_id'], opening_hours=self.store_hours(wash['loc_hours']), addr_full=address_parts[1], city=address_parts[2], state=state, postcode=zip_code, # no ZIP information :-( country=country, )
def parse_stores(self, response): lat = re.findall(r'\"Latitude\":\"[0-9-.]+', response.body_as_unicode())[0] lng = re.findall(r'\"Longitude\":\"[0-9-.]+', response.body_as_unicode())[0] lat = re.findall(r"[0-9.-]+$", lat)[0] lng = re.findall(r"[0-9.-]+$", lng)[0] properties = { 'addr_full': response.xpath( 'normalize-space(//span[@itemprop="streetAddress"]/text())' ).extract_first().replace(',', ''), 'phone': response.xpath( 'normalize-space(//span[@itemprop="telephone"]/text())' ).extract_first(), 'city': response.xpath( 'normalize-space(//span[@itemprop="addressLocality"]/text())' ).extract_first().replace(',', ''), 'state': response.xpath( 'normalize-space(//span[@itemprop="addressRegion"]/text())' ).extract_first(), 'postcode': response.xpath( 'normalize-space(//span[@itemprop="postalCode"]/text())'). extract_first(), 'ref': response.xpath( 'normalize-space(//div[@class="store-number"]/text())'). extract_first(), 'website': response.url, 'lat': float(lat), 'lon': float(lng), } yield GeojsonPointItem(**properties)
def parse_store(self, response): hours = response.xpath( '//div[@class="hours-carryout"]/p[starts-with(@class, "schedule")]//text()' ).extract() opening_hours = self.parse_hours(hours) props = { 'ref': response.xpath( '//p[@class="store-number"]/strong/text()').extract_first(), 'website': response.url, 'addr_full': response.xpath( '//div[@class="streetAddress"]/text()').extract_first(), 'phone': response.xpath( '//span[@itemprop="telephone"]/a/text()').extract_first(), 'city': response.xpath( '//span[@itemprop="addressLocality"]/text()').extract_first(), 'postcode': response.xpath( '//span[@itemprop="postalCode"]/text()').extract_first(), 'state': response.xpath( '//span[@itemprop="addressRegion"]/text()').extract_first(), 'opening_hours': opening_hours, 'lat': float( response.xpath( '//meta[@itemprop="latitude"]/@content').extract_first()), 'lon': float( response.xpath( '//meta[@itemprop="longitude"]/@content').extract_first()), } yield GeojsonPointItem(**props)
def parse_store(self, response): google_pos = response.xpath('//div[@class="mcm-logo-address"]')[ 0].xpath('.//a[contains(@href,"maps.google")]/@href')[0].extract() address_full = response.xpath('//div[@class="mcm-logo-address"]')[ 0].xpath('.//a/p/text()').extract_first() address_parts = re.match(r"(.{3,}),\s?(.{3,}),\s?(\w{2}) (\d{5})", address_full) yield GeojsonPointItem( ref=response.meta.get('ref'), website=response.url, addr_full=address_parts[1].strip(), city=address_parts[2].strip(), state=address_parts[3].strip(), postcode=address_parts[4].strip(), phone=self.phone_normalize( response.xpath('//div[@class="mcm-logo-address"]')[0].xpath( './/ul/li/a/@href').extract_first()), opening_hours=self.store_hours( response.xpath( '//div[@id="MainContent_hoursText"]/p/text()').extract()), )
def parse_store(self, response): json_data = response.xpath( '//script[@type="application/ld+json"]/text()').extract_first( ).replace('"Chic"', 'Chic') data = json.loads(json_data) data = data[0] properties = { 'name': data['name'], 'ref': data['name'], 'addr_full': data['address']['streetAddress'], 'city': data['address']['addressLocality'], 'state': data['address']['addressRegion'], 'postcode': data['address']['postalCode'], 'phone': data['address']['telephone'], 'website': data['url'], 'opening_hours': data['openingHours'], 'lat': float(data['geo']['latitude']), 'lon': float(data['geo']['longitude']), } yield GeojsonPointItem(**properties)
def parse_store(self, response): ref = re.search(r'.+/(.+)$', response.url).group(1) properties = { 'addr_full': response.xpath('//div[@id="location-left"]/p/text()').extract_first(), 'phone': response.xpath('//span[@itemprop="telephone"]/text()').extract_first(), 'city': response.xpath('//span[@itemprop="addressLocality"]/text()').extract_first(), 'state': response.xpath('//span[@itemprop="addressRegion"]/text()').extract_first(), 'postcode': response.xpath('//span[@itemprop="postalCode"]/text()').extract_first(), 'ref': ref, 'website': response.url, 'lat': float(response.xpath('//dt[@itemprop="latitude"]/text()').extract_first()), 'lon': float(response.xpath('//dt[@itemprop="longitude"]/text()').extract_first()), 'name': response.xpath('//h1[@itemprop="name"]/text()').extract_first() } hours = self.parse_hours(response.xpath('//dl[@class="hours"]')) if hours: properties['opening_hours'] = hours yield GeojsonPointItem(**properties)
def parse(self, response): jsonresponse = json.loads(response.body_as_unicode()) for stores in jsonresponse["stores"]: store = json.dumps(stores) store_data = json.loads(store) addr_full = store_data["storeAddress"][ "addressLine1"] + ' ' + store_data["storeAddress"][ "addressLine2"] + ' ' + store_data["storeAddress"][ "addressLine2"] properties = { 'ref': store_data["storeNo8Digit"], 'name': store_data["storeNameExternal"], 'addr_full': addr_full.strip(), 'city': store_data["storeAddress"]["city"], 'postcode': store_data["storeAddress"]["postCode"], 'country': store_data["storeAddress"]["country"], 'lat': float(store_data["latitude"]), 'lon': float(store_data["longitude"]) } yield GeojsonPointItem(**properties)
def parse_store(self, response): store_name = response.xpath("//h4/text()").get().replace( "Target – ", "") address_header = response.xpath( "//span[@itemprop='streetAddress']/strong/text()").get() address = " ".join( response.xpath( "//span[@itemprop='streetAddress']/text()").getall()).strip() if address_header: address = address_header + " " + address locality = response.xpath( "//span[@itemprop='addressLocality']/text()").get() region = response.xpath( "//span[@itemprop='addressRegion']/text()").get() post_code = response.xpath( "//span[@itemprop='postalCode']/text()").get() phone_number = response.xpath( "//span[@itemprop='telephone']/text()").get() hours_section = response.xpath("(//dl)[1]")[0] opening_hours = self.parse_hours(hours_section) lat = response.xpath( "//div[@data-embedded-json='store-content-data']//@data-lat").get( ) lon = response.xpath( "//div[@data-embedded-json='store-content-data']//@data-lng").get( ) yield GeojsonPointItem(lat=lat, lon=lon, name=store_name, addr_full=address, city=locality, state=region, postcode=post_code, country="AU", phone=phone_number, website=response.url, opening_hours=opening_hours, ref=response.url.split("/")[-1])