예제 #1
0
    def parse(self, response):
        data = json.loads(response.body_as_unicode())

        for store in data['restaurants']:
            addr_full = "{}, {}, {} {}".format(store['streetaddress'],
                                               store['city'], store['state'],
                                               store['zip'])
            properties = {
                "ref": store['id'],
                "name": store['name'],
                "addr_full": addr_full,
                "city": store['city'],
                "state": store['state'],
                "postcode": store['zip'],
                "country": store['country'],
                "lon": float(store['longitude']),
                "lat": float(store['latitude']),
                "phone": store['telephone'],
            }

            yield GeojsonPointItem(**properties)
예제 #2
0
    def parse_location(self, response):
        ref = re.search(r'.+/(.+?)/?(?:\.html|$)', response.url).group(1)
        data = json.loads(response.xpath('//script[@type="application/ld+json" and contains(text(), "streetAddress")]/text()').extract_first())
        mapdata = response.xpath('//div[@class="clearfix map_equipment"]/script[2]').extract_first()
        lat = re.search('(?:lat":)(-?\d+\.\d+),.*(?:long":)(-?\d*.\d*)', mapdata).group(1)
        lon = re.search('(?:lat":)(-?\d+\.\d+),.*(?:long":)(-?\d*.\d*)', mapdata).group(2)
        properties = {

            'ref': ref,
            'name': data["name"],
            'addr_full': data["address"]["streetAddress"],
            'city': data["address"]["addressLocality"],
            'state': data["address"]["addressRegion"],
            'postcode': data["address"]["postalCode"],
            'phone': data.get("telephone"),
            'lat': float(lat),
            'lon': float(lon),
            'website': data.get("url")
        }

        yield GeojsonPointItem(**properties)
예제 #3
0
    def parse(self, response):
        data = response.body_as_unicode()
        data.replace('" ', '"')
        data.replace(' "', '"')
        results = json.loads(data)
        results = results["content"]["restaurants"]
        for data in results:
            properties = {
                'ref': data['id'],
                'lon': float(data['longitude']),
                'lat': float(data['latitude']),
                
            }

            contact_info = data['name'][:data['name'].find("<br")]
            name = contact_info[:contact_info.find("</br")]

            properties["name"] = name
            properties["addr_full"] = data['name'][data['name'].find("<small>"):-8][8:]

            yield GeojsonPointItem(**properties)
예제 #4
0
    def parse_info(self, response):

        data = json.loads(response.body_as_unicode())

        for row in data:

            properties = {
                "ref": row['community_id'],
                "name": row['name'],
                "lat": row['latitude'],
                "lon": row['longitude'],
                "addr_full": row['address1'],
                "city": row['city'],
                "state": row['state'],
                "country": row['country_code'],
                "postcode": row['zip_postal_code'],
                "website": row['website'],
                "phone": row['contact_center_phone'],
            }

            yield GeojsonPointItem(**properties)
예제 #5
0
    def parse_details(self, response):
        name = street = zip = city = phone = website = latitude = longitude = ""

        name = response.xpath('//h1[@itemprop="name"]/text()').get()
        street = response.xpath(
            '//span[@itemprop="streetAddress"]/text()').get()
        zip = response.xpath('//span[@itemprop="postalCode"]/text()').get()
        city = response.xpath(
            '//span[@itemprop="addressLocality"]/text()').get()
        phone = response.xpath(
            '//li[@itemprop="telephone"]/a/span/text()').get()
        website = response.xpath('//li[@itemprop="url"]/a/span/text()').get()

        m = re.search(r'lat&quot;:([-+]?[0-9]*\.?[0-9]*)', response.text)
        if m:
            latitude = m.group(1)

        m = re.search(r'lng&quot;:([-+]?[0-9]*\.?[0-9]*)', response.text)
        if m:
            longitude = m.group(1)

        hours = response.xpath(
            '//p[@itemprop="openingHoursSpecification"]/text()').getall()

        properties = {
            'ref': response.request.url,
            'name': name,
            'city': city,
            'street': street,
            'postcode': zip,
            'phone': phone,
            'website': website,
            'lat': latitude,
            'lon': longitude
        }

        if hours:
            properties['opening_hours'] = self.process_hours(hours)

        yield GeojsonPointItem(**properties)
예제 #6
0
    def parse_store(self, response):
        properties = {
            'ref':
            response.url,
            'lat':
            response.xpath('//div[@class="map-container"]/div/@data-latitude'
                           ).extract_first(),
            'lon':
            response.xpath('//div[@class="map-container"]/div/@data-longitude'
                           ).extract_first(),
            'phone':
            response.xpath(
                '//a[@class="phone-link"]/span/text()').extract_first(),
            'addr_full':
            response.xpath('//span[@itemprop="streetAddress"]/text()'
                           ).extract_first().strip(),
            'name':
            response.xpath(
                '//meta[@itemprop="name legalName"]/@content').extract_first(),
            'city':
            response.xpath('//span[@itemprop="addressLocality"]/text()'
                           ).extract_first()[:-1],
            'state':
            response.xpath('//span[@itemprop="addressRegion"]/text()'
                           ).extract_first().strip(),
            'postcode':
            response.xpath('//span[@itemprop="postalCode"]/text()').
            extract_first().strip(),
        }

        o = OpeningHours()
        for h in response.css('#LocalMapAreaOpenHourBanner li.h-day'):
            day = h.xpath('em/span/text()').extract_first().strip()[:2]
            day_range = h.xpath('em/text()').extract_first().strip(':').strip()
            open_time, close_time = day_range.split(' - ')

            o.add_range(day, open_time, close_time, '%I:%M %p')
        properties['opening_hours'] = o.as_opening_hours()

        yield GeojsonPointItem(**properties)
예제 #7
0
    def parse_location(self, response):
        opening_hours = response.css('.js-location-hours').xpath(
            '@data-days').extract_first()
        if opening_hours:
            opening_hours = json.loads(opening_hours)
            opening_hours = self.normalize_hours(opening_hours)

        props = {
            'addr_full':
            response.xpath(
                '//meta[@itemprop="streetAddress"]/@content').extract_first(),
            'lat':
            float(
                response.xpath(
                    '//meta[@itemprop="latitude"]/@content').extract_first()),
            'lon':
            float(
                response.xpath(
                    '//meta[@itemprop="longitude"]/@content').extract_first()),
            'city':
            response.xpath(
                '//span[@class="c-address-city"]/text()').extract_first(),
            'postcode':
            response.xpath('//span[@class="c-address-postal-code"]/text()').
            extract_first(),
            'state':
            response.xpath(
                '//abbr[@class="c-address-state"]/text()').extract_first(),
            'phone':
            response.xpath(
                '//span[@class="c-phone-number-span c-phone-main-number-span"]/text()'
            ).extract_first(),
            'ref':
            response.url,
            'website':
            response.url,
            'opening_hours':
            opening_hours
        }
        return GeojsonPointItem(**props)
예제 #8
0
    def parse(self, response):
        for store in response.xpath('//div[@class="item"]//div[@class="row"]'):
            street = city = zip = lat = lon = ''

            address = store.xpath(
                './/div[@class="col-xs-12 col-sm-6 col-md-3 col-lg-3"]//p'
            ).get()
            match = re.search(r'<p>(.*?)<br>', address)
            if match:
                street = match.group(1)

            match = re.search(r'(\d{5}) (.*?)<\/p>', address)
            if match:
                zip = match.group(1)
                city = match.group(2)

            position = store.xpath(
                './/div[@class="col-xs-12 col-sm-6 col-md-2 col-lg-2 actions"]'
                '//a/@href').get()
            if position:
                match = re.search(r'lng=(.*?)&lat=(.*?)&', position)
                if match:
                    lat = match.group(2)
                    lon = match.group(1)

            properties = {
                'ref': f"{lat}_{lon}",
                'street': street,
                'city': city,
                'postcode': zip,
                'country': 'DE',
                'lat': lat,
                'lon': lon,
            }

            hours = self.parse_hours(store)
            if hours:
                properties["opening_hours"] = hours

            yield GeojsonPointItem(**properties)
예제 #9
0
    def parse_store(self, response):
        ref = re.search(r'.+/(.+)', response.url).group(1)

        properties = {
            'ref':
            ref.strip('/'),
            'name':
            response.xpath('//h2[@class="About-title"]/text()').extract_first(
            ).strip('About '),
            'addr_full':
            response.xpath(
                '//meta[@itemprop="streetAddress"]/@content').extract_first(),
            'city':
            response.xpath('//meta[@itemprop="addressLocality"]/@content').
            extract_first(),
            'state':
            response.xpath(
                '//meta[@itemprop="addressRegion"]/@content').extract_first(),
            'postcode':
            response.xpath(
                '//meta[@itemprop="postalCode"]/@content').extract_first(),
            'country':
            response.xpath(
                '//meta[@itemprop="addressCountry"]/@content').extract_first(),
            'phone':
            response.xpath(
                '//meta[@itemprop="telephone"]/@content').extract_first(),
            'website':
            response.url,
            'lat':
            float(
                response.xpath(
                    '//meta[@itemprop="latitude"]/@content').extract_first()),
            'lon':
            float(
                response.xpath(
                    '//meta[@itemprop="longitude"]/@content').extract_first()),
        }

        yield GeojsonPointItem(**properties)
예제 #10
0
    def parse(self, response):
        data = json.loads(response.body_as_unicode())

        for store in data['results']:
            open_hours = self.parse_hours(store['opening_hours'])

            properties = {
                "ref":
                store['url'],
                "name":
                store['name'],
                "opening_hours":
                open_hours,
                "website":
                "https://www.coop.co.uk" + store['url'],
                "addr_full":
                " ".join([
                    store['street_address'], store['street_address2'],
                    store['street_address3']
                ]),
                "city":
                store['town'],
                "postcode":
                store['postcode'],
                "country":
                'United Kingdom',
                "lon":
                float(store['position']['x']),
                "lat":
                float(store['position']['y']),
                "phone":
                store["phone"],
            }

            yield GeojsonPointItem(**properties)

        if data['next'] is not None:
            self.page_number = self.page_number + 1
            yield scrapy.Request(self.start_urls[0] + '&page=' +
                                 str(self.page_number))
예제 #11
0
    def parse(self, response):
        next_page = response.xpath(
            '//li[@class="next"]/a/@href').extract_first()

        stores = response.xpath('//ol[contains(@class,"results")]/li')
        for store in stores:
            position = re.search(
                r'\?daddr=(.*),(.*)',
                store.xpath('.//a[contains(.,"Driving Directions")]/@href').
                extract_first())

            phone = store.xpath(
                './/span[contains(@class,"tel")]/text()').extract_first()
            if phone:
                phone = phone.replace('- Main', '').strip()

            yield GeojsonPointItem(
                lat=float(position[1]),
                lon=float(position[2]),
                phone=phone,
                website=store.xpath(
                    './/a[contains(.,"View Our Website")]/@href'
                ).extract_first(),
                ref=store.xpath(
                    './/div[contains(@class,"org")]/text()').extract_first(),
                addr_full=store.xpath(
                    './/div[contains(@class,"street-address")]/text()').
                extract_first(),
                city=store.xpath('.//span[contains(@class,"locality")]/text()'
                                 ).extract_first().rstrip(','),
                state=store.xpath('.//span[contains(@class,"region")]/text()'
                                  ).extract_first().strip(),
                postcode=store.xpath(
                    './/span[contains(@class,"postal-code")]/text()').
                extract_first().strip(),
                country='USA',
            )

        if next_page:
            yield scrapy.Request(response.urljoin(next_page))
예제 #12
0
    def parse(self, response):
        xxs = Selector(response)

        pois = xxs.xpath('//poi').extract()

        for poi in pois:
            state = Selector(text=poi).xpath('//state/text()').get()
            if state == None:
                state = Selector(text=poi).xpath('//province/text()').get()

            addr = Selector(text=poi).xpath('//address1/text()').get()
            if addr == None:
                addr = Selector(text=poi).xpath('//address2/text()').get()
                if addr == None:
                    addr = Selector(text=poi).xpath('//dsply_adr/text()').get()

            name = Selector(text=poi).xpath('//name/text()').get()
            name = name.replace('<br>', '')
            name = name.replace('&reg', ' ')
            name = name.replace(';', '')
            name = name.replace('  ', ' ')

            properties = {
                'ref': Selector(text=poi).xpath('//clientkey/text()').get(),
                'name': name,
                'addr_full': addr,
                'city': Selector(text=poi).xpath('//city/text()').get(),
                'state': state,
                'postcode':
                Selector(text=poi).xpath('//postalcode/text()').get(),
                'country': Selector(text=poi).xpath('//country/text()').get(),
                'lat': Selector(text=poi).xpath('//latitude/text()').get(),
                'lon': Selector(text=poi).xpath('//longitude/text()').get(),
                'phone': Selector(text=poi).xpath('//phone/text()').get(),
                'extras': {
                    'brand': "Timberland"
                }
            }

            yield GeojsonPointItem(**properties)
예제 #13
0
    def parse_store(self, response):
        properties = {
            'name':
            response.xpath(
                '//span[@class="LocationName-geo"]/text()').extract_first(),
            'website':
            response.url,
            'ref':
            response.url,
            'addr_full':
            response.xpath('//span[@itemprop="streetAddress"]/span/text()'
                           ).extract_first(),
            'city':
            response.xpath(
                '//span[@itemprop="addressLocality"]/text()').extract_first(),
            'state':
            response.xpath(
                '//abbr[@itemprop="addressRegion"]/text()').extract_first(),
            'postcode':
            response.xpath('//span[@itemprop="postalCode"]/text()').
            extract_first().strip(),
            'lat':
            float(
                response.xpath(
                    '//meta[@itemprop="latitude"]/@content').extract_first()),
            'lon':
            float(
                response.xpath(
                    '//meta[@itemprop="longitude"]/@content').extract_first()),
        }

        hours = json.loads(
            response.xpath(
                '//div[@class="c-location-hours-details-wrapper js-location-hours"]/@data-days'
            ).extract_first())
        opening_hours = self.store_hours(hours) if hours else None
        if opening_hours:
            properties['opening_hours'] = opening_hours

        yield GeojsonPointItem(**properties)
예제 #14
0
    def parse_entity(self, ldjson, page_ref):
        # Each feature has a BankOrCreditUnion and then either a FinancialService
        # or an AutomatedTeller. Loop over them to find a single item.
        properties = {"extras": {}}
        for ent in json.loads(ldjson):
            if "geo" not in ent:
                continue
            properties.update({
                "ref":
                page_ref,
                "website":
                ent["url"],
                "lat":
                ent["geo"]["latitude"],
                "lon":
                ent["geo"]["longitude"],
                "addr_full":
                ent["address"]["streetAddress"],
                "city":
                ent["address"]["addressLocality"],
                "state":
                ent["address"]["addressRegion"],
                "postcode":
                ent["address"]["postalCode"],
                "country":
                ent["address"]["addressCountry"]["name"],
            })

            if ent["@type"] != "AutomatedTeller":
                # Skip over ATM customer service and the shorter name
                properties.update({
                    "name": ent["name"],
                    "phone": ent["telephone"]
                })
            if "openingHours" in ent:
                properties.update({"opening_hours": ent["openingHours"]})
            if ent["@type"] != "BankOrCreditUnion":
                # i.e. the interesting of the two types
                properties["extras"].update({"type": ent["@type"]})
        return GeojsonPointItem(**properties)
예제 #15
0
    def parse(self, response):

        stores = response.xpath(
            '//table[@id="store-search-result"]/tbody/tr[@class="" or @class="store-grey"]'
        )
        for store in stores:
            properties = {
                "ref":
                store.xpath('td[@class="store-result-address"]/text()'
                            ).extract_first(),
                "name":
                store.xpath('td[@class="store-result-address"]/text()'
                            ).extract_first(),
                "opening_hours":
                self.store_hours(
                    store.xpath(
                        'td[@class="store-result-address"]/text()[last()-1]').
                    extract_first()),
                "addr_full":
                store.xpath('td[@class="store-result-address"]/text()')
                [1].extract(),
                "city":
                self.city(
                    store.xpath('td[@class="store-result-address"]/text()')
                    [2].extract()),
                "state":
                self.state(
                    store.xpath('td[@class="store-result-address"]/text()')
                    [2].extract()),
                "postcode":
                self.postCode(
                    store.xpath('td[@class="store-result-address"]/text()')
                    [2].extract()),
                "phone":
                self.phone(
                    store.xpath('td[@class="store-result-phone"]/strong/text()'
                                )[0].extract()),
            }

            yield GeojsonPointItem(**properties)
예제 #16
0
    def parse_store(self, response):

        properties = {
            'name':
            response.xpath(
                '//meta[@property="og:title"]/@content').extract_first(),
            'ref':
            response.url,
            'addr_full':
            response.xpath('//meta[@property="og:street_address"]/@content'
                           ).extract_first(),
            'city':
            response.xpath(
                '//meta[@property="og:locality"]/@content').extract_first(),
            'state':
            response.xpath(
                '//meta[@property="og:region"]/@content').extract_first(),
            'postcode':
            response.xpath(
                '//meta[@property="og:postal_code"]/@content').extract_first(),
            'country':
            response.xpath('//meta[@property="og:country_name"]/@content').
            extract_first(),
            'phone':
            response.xpath(
                '//@href[contains(.,"tel:")]').extract_first().replace(
                    'tel:', ''),
            'website':
            response.url,
            'opening_hours':
            ''.join(response.css('.oh-wrapper ::text').extract()),
            'lon':
            response.xpath(
                '//meta[@property="og:longitude"]/@content').extract_first(),
            'lat':
            response.xpath(
                '//meta[@property="og:latitude"]/@content').extract_first(),
        }

        yield GeojsonPointItem(**properties)
예제 #17
0
    def parse(self, response):
        data = json.loads(response.body_as_unicode())
        places = data["outlets"]

        for place in places:
            address = place["address"]
            city = place["town"].split(',')
            addr = address.split(', ')
            if len(addr) == 4:
                str_addr = addr[0]
                state = addr[2]
                postal = addr[3]
            elif len(addr) == 3:
                str_addr = addr[0]
                state = ""
                postal = addr[2]
            elif len(addr) == 5:
                str_addr = addr[0]
                state = addr[3]
                postal = addr[4]
            elif len(addr) == 6:
                str_addr = addr[0]
                state = addr[4]
                postal = addr[5]

            properties = {
                'ref': place["phc"],
                'name': place["name"],
                'addr_full': str_addr,
                'city': city[0],
                'state': state,
                'postcode': postal,
                'country': "GB",
                'lat': place["lat"],
                'lon': place["lng"],
                'phone': place["tel"],
                'website': place["url"]
            }

            yield GeojsonPointItem(**properties)
예제 #18
0
    def parse_location(self, response):
        properties = {
            'name':
            response.xpath(
                '//h1[@class="lp-yellow-text"]/text()').extract_first(),
            'addr_full':
            response.xpath(
                '//span[@itemprop="streetAddress"]/text()').extract_first(),
            'city':
            response.xpath(
                '//span[@itemprop="addressLocality"]/text()').extract_first(),
            'state':
            response.xpath(
                '//span[@itemprop="addressRegion"]/text()').extract_first(),
            'postcode':
            response.xpath(
                '//span[@itemprop="postalCode"]/text()').extract_first(),
            'phone':
            response.xpath(
                '//a[@itemprop="telephone"]/text()').extract_first(),
            'opening_hours':
            response.xpath(
                '//tr[@itemprop="openingHours"]/@datetime').extract_first(),
            'ref':
            response.request.url,
            'website':
            response.request.url,
            'lon':
            float(
                response.xpath(
                    '//meta[@name="place:location:longitude"]/@content').
                extract_first()),
            'lat':
            float(
                response.xpath(
                    '//meta[@name="place:location:latitude"]/@content').
                extract_first()),
        }

        yield GeojsonPointItem(**properties)
예제 #19
0
    def parse_location(self, response):
        store_js = response.xpath(
            '//script[@type="text/javascript" and contains(text(), "formattedAddress")]/text()'
        ).extract()
        storetext = store_js[0]
        data = json.loads(
            response.xpath(
                '//script[@type="application/ld+json" and contains(text(), "streetAddress")]/text()'
            ).extract_first())
        json_prelim_data = re.search('name(.*)', storetext).group()
        json_data = json_prelim_data.split(",")
        lat = 0.0
        lon = 0.0
        if json_data[5].startswith('"lati'):
            lat = float(json_data[5].replace('"latitude":', ''))
            lon = float(json_data[6].replace('"longitude":', ''))
        elif json_data[6].startswith('"lati'):
            lat = float(json_data[6].replace('"latitude":', ''))
            lon = float(json_data[7].replace('"longitude":', ''))
        else:
            for i in json_data:
                if i.startswith('"lati'):
                    lat = float(i.replace('"latitude":', ''))
                elif i.startswith('"longit'):
                    lon = float(i.replace('"longitude":', ''))

        properties = {
            'ref': (json_data[0].replace('name":', '').strip('"')),
            'name': data['description'],
            'addr_full': data['address']['streetAddress'],
            'city': data['address']['addressLocality'],
            'state': data['address']['addressRegion'],
            'postcode': data['address']['postalCode'],
            'country': data['address']['addressCountry'],
            'phone': data.get("telephone"),
            'lat': lat,
            'lon': lon,
        }

        yield GeojsonPointItem(**properties)
예제 #20
0
    def parse_stores(self, response):
        delivery_only = response.xpath(
            '//div[@class="hours-wrapper"]/p/strong/text()').extract_first()
        if delivery_only != 'DELIVERY HOURS':
            locationid = re.search(r'.*/locations/(.*)/$',
                                   response.url).groups()[0]
            location = [
                loc for loc in self.locations
                if loc["LocationId"] == locationid
            ][0]
            properties = {
                'addr_full':
                response.xpath('//span[@itemprop="streetAddress"]/text()'
                               ).extract_first().strip(','),
                'phone':
                response.xpath(
                    '//span[@itemprop="telephone"]/text()').extract_first(),
                'name':
                response.xpath(
                    '//h2[@class="ppb_title"]/text()').extract_first(),
                'city':
                response.xpath('//span[@itemprop="addressLocality"]/text()'
                               ).extract_first().strip(','),
                'state':
                response.xpath('//span[@itemprop="addressRegion"]/text()').
                extract_first(),
                'postcode':
                response.xpath(
                    '//span[@itemprop="postalCode"]/text()').extract_first(),
                'ref':
                locationid,
                'website':
                response.url,
                'lat':
                float(location["lat"]),
                'lon':
                float(location["lng"]),
            }

            yield GeojsonPointItem(**properties)
예제 #21
0
    def parse(self, response):
        data = json.loads(response.body_as_unicode())
        stores = data["stores"]

        for store in stores:
            addr_1 = store["address_1"]
            addr_2 = store["address_2"]
            addr_3 = store["address_3"]

            properties = {
                'name':
                store["name"],
                'ref':
                store["id"],
                'addr_full':
                re.sub(
                    ' +', ' ',
                    ' '.join(filter(None, [addr_1, addr_2, addr_3])).strip()),
                'city':
                store["city"],
                'state':
                store["state"],
                'postcode':
                store["postal_code"],
                'country':
                store["country_code"],
                'phone':
                store.get("phone_number"),
                'website':
                response.url,
                'lat':
                float(store["latitude"]),
                'lon':
                float(store["longitude"]),
                'extras': {
                    'store_type': store["type"],
                },
            }

            yield GeojsonPointItem(**properties)
예제 #22
0
    def parse(self, response): #high-level list of states
        stri = response.text
        begin_st = stri.find('t=[[')+2
        end_st = stri[begin_st:].find(']]')+begin_st+2
        shops = json.loads(stri[begin_st:end_st].replace('"','\\"').replace("\'", '"'))
        for shop in shops:
            address_parts = re.search(r"(.*),\s*(.*)\s*,\s*(\D{2})\s*(\d{5})",shop[1])

            try:
                state = address_parts[3]
            except Exception as e:
                state = ''

            try:
                addess = address_parts[1]
            except Exception as e:
                addess = ''

            try:
                city = address_parts[2]
            except Exception as e:
                city = ''

            try:
                zip_code = address_parts[4]
            except Exception as e:
                zip_code = ''

            yield GeojsonPointItem(
                lat=float(shop[2]),
                lon=float(shop[3]),
                phone=shop[4],
                ref=shop[0],
                opening_hours=self.store_hours(shop[8:15]),
                addr_full=addess,
                city=city,
                state=state,
                postcode=zip_code,
                country='US',
            )
예제 #23
0
    def parse(self, response):  # high-level list of states
        washers_str = response.xpath('//script[contains(.,"markers =")]').extract_first()
        j_beg = washers_str.find('markers =') + 10
        j_end = washers_str.find('\n\t', j_beg)
        wash_list = json.loads(washers_str[j_beg:j_end].strip().rstrip(';'))

        for wash in wash_list:
            address_parts = re.match(r"(.*),\s*(\D{2,}\s?\D{2,}?\s?\D*)\s*,\s*(\D{2})\s*(\d{5})?,(\D{4,})?",wash['address'])
            if not address_parts:
                address_parts = re.match(r"(.*),\s*(\D{2,}\s?\D{2,}?\s?\D*)\s(\D{2})",wash['address'])
            if not address_parts:
                address_parts = re.match(r"(.*),\s?(\D*)",wash['address'])
            try:
                zip_code = address_parts[4]
            except Exception:
                zip_code = ''
            try:
                state = address_parts[3]
            except Exception:
                state = ''
            try:
                country = address_parts[6].strip()
            except Exception:
                country = 'US'

            phone = self.phone_normalize(wash['infoContent'][wash['infoContent'].find('<b>Phone:</b>')+13:wash['infoContent'].find('/div',wash['infoContent'].find('<b>Phone:</b>'))-1])

            yield GeojsonPointItem(
                lat=float(wash['lat']),
                lon=float(wash['lng']),
                phone=phone,
                website='http://mistercarwash.com/locations/' + wash['name'].lower().replace(' ', '-'),
                ref=wash['loc_id'],
                opening_hours=self.store_hours(wash['loc_hours']),
                addr_full=address_parts[1],
                city=address_parts[2],
                state=state,
                postcode=zip_code,  # no ZIP information :-(
                country=country,
            )
예제 #24
0
 def parse_stores(self, response):
     lat = re.findall(r'\"Latitude\":\"[0-9-.]+',
                      response.body_as_unicode())[0]
     lng = re.findall(r'\"Longitude\":\"[0-9-.]+',
                      response.body_as_unicode())[0]
     lat = re.findall(r"[0-9.-]+$", lat)[0]
     lng = re.findall(r"[0-9.-]+$", lng)[0]
     properties = {
         'addr_full':
         response.xpath(
             'normalize-space(//span[@itemprop="streetAddress"]/text())'
         ).extract_first().replace(',', ''),
         'phone':
         response.xpath(
             'normalize-space(//span[@itemprop="telephone"]/text())'
         ).extract_first(),
         'city':
         response.xpath(
             'normalize-space(//span[@itemprop="addressLocality"]/text())'
         ).extract_first().replace(',', ''),
         'state':
         response.xpath(
             'normalize-space(//span[@itemprop="addressRegion"]/text())'
         ).extract_first(),
         'postcode':
         response.xpath(
             'normalize-space(//span[@itemprop="postalCode"]/text())').
         extract_first(),
         'ref':
         response.xpath(
             'normalize-space(//div[@class="store-number"]/text())').
         extract_first(),
         'website':
         response.url,
         'lat':
         float(lat),
         'lon':
         float(lng),
     }
     yield GeojsonPointItem(**properties)
예제 #25
0
    def parse_store(self, response):
        hours = response.xpath(
            '//div[@class="hours-carryout"]/p[starts-with(@class, "schedule")]//text()'
        ).extract()
        opening_hours = self.parse_hours(hours)

        props = {
            'ref':
            response.xpath(
                '//p[@class="store-number"]/strong/text()').extract_first(),
            'website':
            response.url,
            'addr_full':
            response.xpath(
                '//div[@class="streetAddress"]/text()').extract_first(),
            'phone':
            response.xpath(
                '//span[@itemprop="telephone"]/a/text()').extract_first(),
            'city':
            response.xpath(
                '//span[@itemprop="addressLocality"]/text()').extract_first(),
            'postcode':
            response.xpath(
                '//span[@itemprop="postalCode"]/text()').extract_first(),
            'state':
            response.xpath(
                '//span[@itemprop="addressRegion"]/text()').extract_first(),
            'opening_hours':
            opening_hours,
            'lat':
            float(
                response.xpath(
                    '//meta[@itemprop="latitude"]/@content').extract_first()),
            'lon':
            float(
                response.xpath(
                    '//meta[@itemprop="longitude"]/@content').extract_first()),
        }

        yield GeojsonPointItem(**props)
예제 #26
0
    def parse_store(self, response):
        google_pos = response.xpath('//div[@class="mcm-logo-address"]')[
            0].xpath('.//a[contains(@href,"maps.google")]/@href')[0].extract()
        address_full = response.xpath('//div[@class="mcm-logo-address"]')[
            0].xpath('.//a/p/text()').extract_first()
        address_parts = re.match(r"(.{3,}),\s?(.{3,}),\s?(\w{2}) (\d{5})",
                                 address_full)

        yield GeojsonPointItem(
            ref=response.meta.get('ref'),
            website=response.url,
            addr_full=address_parts[1].strip(),
            city=address_parts[2].strip(),
            state=address_parts[3].strip(),
            postcode=address_parts[4].strip(),
            phone=self.phone_normalize(
                response.xpath('//div[@class="mcm-logo-address"]')[0].xpath(
                    './/ul/li/a/@href').extract_first()),
            opening_hours=self.store_hours(
                response.xpath(
                    '//div[@id="MainContent_hoursText"]/p/text()').extract()),
        )
예제 #27
0
    def parse_store(self, response):
        json_data = response.xpath(
            '//script[@type="application/ld+json"]/text()').extract_first(
            ).replace('"Chic"', 'Chic')
        data = json.loads(json_data)
        data = data[0]

        properties = {
            'name': data['name'],
            'ref': data['name'],
            'addr_full': data['address']['streetAddress'],
            'city': data['address']['addressLocality'],
            'state': data['address']['addressRegion'],
            'postcode': data['address']['postalCode'],
            'phone': data['address']['telephone'],
            'website': data['url'],
            'opening_hours': data['openingHours'],
            'lat': float(data['geo']['latitude']),
            'lon': float(data['geo']['longitude']),
        }

        yield GeojsonPointItem(**properties)
예제 #28
0
    def parse_store(self, response):
        ref = re.search(r'.+/(.+)$', response.url).group(1)

        properties = {
            'addr_full': response.xpath('//div[@id="location-left"]/p/text()').extract_first(),
            'phone': response.xpath('//span[@itemprop="telephone"]/text()').extract_first(),
            'city': response.xpath('//span[@itemprop="addressLocality"]/text()').extract_first(),
            'state': response.xpath('//span[@itemprop="addressRegion"]/text()').extract_first(),
            'postcode': response.xpath('//span[@itemprop="postalCode"]/text()').extract_first(),
            'ref': ref,
            'website': response.url,
            'lat': float(response.xpath('//dt[@itemprop="latitude"]/text()').extract_first()),
            'lon': float(response.xpath('//dt[@itemprop="longitude"]/text()').extract_first()),
            'name': response.xpath('//h1[@itemprop="name"]/text()').extract_first()
        }

        hours = self.parse_hours(response.xpath('//dl[@class="hours"]'))

        if hours:
            properties['opening_hours'] = hours

        yield GeojsonPointItem(**properties)
예제 #29
0
    def parse(self, response):
        jsonresponse = json.loads(response.body_as_unicode())
        for stores in jsonresponse["stores"]:
            store = json.dumps(stores)
            store_data = json.loads(store)
            addr_full = store_data["storeAddress"][
                "addressLine1"] + ' ' + store_data["storeAddress"][
                    "addressLine2"] + ' ' + store_data["storeAddress"][
                        "addressLine2"]

            properties = {
                'ref': store_data["storeNo8Digit"],
                'name': store_data["storeNameExternal"],
                'addr_full': addr_full.strip(),
                'city': store_data["storeAddress"]["city"],
                'postcode': store_data["storeAddress"]["postCode"],
                'country': store_data["storeAddress"]["country"],
                'lat': float(store_data["latitude"]),
                'lon': float(store_data["longitude"])
            }

            yield GeojsonPointItem(**properties)
예제 #30
0
    def parse_store(self, response):
        store_name = response.xpath("//h4/text()").get().replace(
            "Target – ", "")
        address_header = response.xpath(
            "//span[@itemprop='streetAddress']/strong/text()").get()
        address = " ".join(
            response.xpath(
                "//span[@itemprop='streetAddress']/text()").getall()).strip()
        if address_header:
            address = address_header + " " + address
        locality = response.xpath(
            "//span[@itemprop='addressLocality']/text()").get()
        region = response.xpath(
            "//span[@itemprop='addressRegion']/text()").get()
        post_code = response.xpath(
            "//span[@itemprop='postalCode']/text()").get()
        phone_number = response.xpath(
            "//span[@itemprop='telephone']/text()").get()
        hours_section = response.xpath("(//dl)[1]")[0]
        opening_hours = self.parse_hours(hours_section)
        lat = response.xpath(
            "//div[@data-embedded-json='store-content-data']//@data-lat").get(
            )
        lon = response.xpath(
            "//div[@data-embedded-json='store-content-data']//@data-lng").get(
            )

        yield GeojsonPointItem(lat=lat,
                               lon=lon,
                               name=store_name,
                               addr_full=address,
                               city=locality,
                               state=region,
                               postcode=post_code,
                               country="AU",
                               phone=phone_number,
                               website=response.url,
                               opening_hours=opening_hours,
                               ref=response.url.split("/")[-1])