def parse(self, response):
        # get open hours
        if response.meta["comming_soon"] == "":
            open_hours = []
            open_days = response.xpath(
                "//div[@class='store-hours']//p[@class='day']/text()")
            open_hrs = response.xpath(
                "//div[@class='store-hours']//p[@class='hour']/text()")

            for index in range(0, len(open_days)):
                open_hours.append(open_days[index].extract() +
                                  open_hrs[index].extract())
            open_hours = "; ".join(open_hours)
        else:
            open_hours = ""

        item = ChainItem()
        item['store_name'] = response.meta["store_name"]
        item['store_number'] = ""
        item['address'] = response.meta["address"]
        item['phone_number'] = response.meta["phone_number"]
        item['city'] = response.meta["city"]
        item['state'] = response.meta["state"]
        item['zip_code'] = response.meta["zip_code"]
        item['country'] = response.meta["country"]
        item['latitude'] = ""
        item['longitude'] = ""
        item['store_hours'] = open_hours
        #item['store_type'] = info_json["@type"]
        item['other_fields'] = ""
        item['coming_soon'] = response.meta["comming_soon"]

        yield item
 def parse_page(self, response):
     try:
         item = ChainItem()
         item['store_name'] = self.validate(
             response.xpath(
                 '//span[@itemprop="name"]/text()').extract_first())
         item['address'] = self.validate(
             response.xpath('//span[@itemprop="streetAddress"]//text()').
             extract_first())
         item['city'] = self.validate(
             response.xpath('//span[@itemprop="addressLocality"]/text()').
             extract_first())
         item['state'] = self.validate(
             response.xpath('//span[@itemprop="addressRegion"]/text()').
             extract_first())
         item['zip_code'] = self.validate(
             response.xpath(
                 '//span[@itemprop="postalCode"]/text()').extract_first())
         item['country'] = 'United States'
         item['phone_number'] = self.validate(
             response.xpath(
                 '//span[@itemprop="telephone"]/text()').extract_first())
         if item['store_name'] != '':
             yield item
     except:
         pass
Ejemplo n.º 3
0
    def parse_kensas(self, response):
        item = ChainItem()
        item['store_number'] = ''
        item['coming_soon'] = "0"
        item['store_name'] = response.xpath(
            './/a[@class="standard-logo"]/img/@alt').extract_first()
        address = response.xpath('.//address/a[1]/text()').extract()
        address = [
            tp.strip().replace('\n', '') for tp in address if tp.strip() != ""
        ]
        addr = usaddress.parse(" ".join(address))
        city = state = zip_code = street = ''
        for temp in addr:
            if temp[1] == 'PlaceName':
                city += temp[0].replace(',', '') + ' '
            elif temp[1] == 'StateName':
                state = temp[0].replace(',', '')
            elif temp[1] == 'ZipCode':
                zip_code = temp[0].replace(',', '')
            else:
                street += temp[0].replace(',', '') + ' '
        item['address'] = street
        item['country'] = 'United States'
        item['city'] = city
        item['state'] = state
        item['zip_code'] = zip_code
        item['phone_number'] = response.xpath(
            './/address/a/text()').extract_first()
        item['latitude'] = ''
        item['longitude'] = ''
        item['store_hours'] = ""
        item['other_fields'] = ""

        yield item
Ejemplo n.º 4
0
    def parse_store(self, response):
        stores = json.loads(response.body)
        stores_list = []
        try:
            stores_list = stores['Data']
        except:
            print("+++++++++++++++++++++ no store lists")

        temp_state = response.meta['state_mine']
        if stores_list:
            for store in stores_list:
                temp_store_number = store['Number']
                if not temp_store_number in self.uid_list:
                    self.uid_list.append(temp_store_number)
                    item = ChainItem()
                    item['store_name'] = store['Name']
                    item['store_number'] = temp_store_number
                    item['address'] = store['AddressMain']['Line']
                    item['city'] = store['AddressMain']['City']
                    item['state'] = temp_state
                    item['zip_code'] = store['AddressMain']['PostalCode']
                    item['country'] = 'Canada'
                    item['phone_number'] = store['PhoneNumberHome']['Number']
                    item['latitude'] = store['Coordinates']['Latitude']
                    item['longitude'] = store['Coordinates']['Longitude']
                    item['store_hours'] = store['OpeningHours']

                    yield item
                else:
                    print('+++++++++++++++++++++++++++++ already scraped')
        else:
            print('+++++++++++++++++++++++++ there are no any stores')
Ejemplo n.º 5
0
 def parse_page(self, response):
     try:
         item = ChainItem()
         item['store_name'] = self.validate(
             response.xpath(
                 '//span[@itemprop="name"]/text()').extract_first())
         item['address'] = self.validate(
             response.xpath('//span[@itemprop="streetAddress"]/text()').
             extract_first())
         item['city'] = self.validate(
             response.xpath('//span[@itemprop="addressLocality"]/text()').
             extract_first())
         item['state'] = self.validate(
             response.xpath('//span[@itemprop="addressRegion"]/text()').
             extract_first())
         item['zip_code'] = self.validate(
             response.xpath(
                 '//span[@itemprop="postalCode"]/text()').extract_first())
         item['country'] = 'Canada'
         detail = self.eliminate_space(
             response.xpath(
                 '//div[contains(@class, "opentimes_box")]//text()').
             extract())
         item['phone_number'] = ''
         for de in detail:
             if '-' in de:
                 item['phone_number'] = de
         yield item
     except:
         pass
Ejemplo n.º 6
0
 def body(self, response):
     print("=========  Checking.......")
     try:
         store_list = json.loads(response.body)['features']
         for store in store_list:
             item = ChainItem()
             item['store_name'] = self.validate(store['properties']['name'])
             item['store_number'] = self.validate(
                 str(store['properties']['nid']))
             item['address'] = self.validate(
                 store['properties']['thoroughfare'])
             item['city'] = self.validate(store['properties']['localty'])
             item['state'] = self.validate(
                 store['properties']['state_code'])
             item['zip_code'] = self.validate(
                 str(store['properties']['postal_code']))
             if len(item['zip_code']) > 6:
                 item['zip_code'] = item['zip_code'][:-4] + '-' + item[
                     'zip_code'][-4:]
             item['country'] = 'United States'
             item['phone_number'] = self.validate(
                 str(store['properties']['phone']))
             if item['phone_number'] == 'None':
                 item['phone_number'] = ''
             item['latitude'] = self.validate(
                 str(store['geometry']['coordinates'][0]))
             item['longitude'] = self.validate(
                 str(store['geometry']['coordinates'][1]))
             if item['store_number'] not in self.history:
                 self.history.append(item['store_number'])
                 yield item
     except:
         pdb.set_trace()
Ejemplo n.º 7
0
 def parse_store(self, response):
     item = ChainItem()
     item['store_number'] = ''
     item['country'] = 'Canada'
     item['latitude'] = ''
     item['longitude'] = ''
     item['store_name'] = self.validate(
         response.xpath(
             './/div[@class="get_in_touch"]/h3/text()').extract_first())
     item['other_fields'] = ""
     item['coming_soon'] = "0"
     address = response.xpath(
         './/span[@class="address_wrapper"]//text()').extract()
     address = [
         tp.strip() for tp in address if tp.replace('\n', '').strip() != ""
     ]
     item['address'] = address[0]
     item['address2'] = ''
     item['city'] = address[1].split('\n')[0].split(',')[0]
     item['state'] = address[1].split('\n')[0].split(',')[1]
     item['zip_code'] = address[1].split('\n')[1]
     item['phone_number'] = response.xpath(
         './/li[@class="phone"]/p/a/text()').extract_first()
     item['store_hours'] = ''
     yield item
Ejemplo n.º 8
0
 def parseStore(self, response):
     # try:
     stores = response.xpath('//ol[@class="vlist results"]/li')
     for store in stores:
         item = ChainItem()
         # pdb.set_trace()
         item['store_name'] = self.validate(
             store.xpath('.//div[@class="fn org"]/text()')).split(
                 '.')[1].strip()
         item['store_number'] = ""
         item['address'] = self.validate(
             store.xpath('.//div[@class="street-address"]/text()'))
         item['address2'] = ""
         item['phone_number'] = store.xpath(
             './/span[@class="tel"]/text()').extract_first().split(
             )[0] + store.xpath(
                 './/span[@class="tel"]/text()').extract_first().split()[1]
         item['city'] = self.validate(
             store.xpath('.//span[@class="locality"]/text()')).split(',')[0]
         item['state'] = self.validate(
             store.xpath('.//span[@class="region"]/text()'))
         item['zip_code'] = self.validate(
             store.xpath('.//span[@class="postal-code"]/text()'))
         item['country'] = "United States"
         item['latitude'] = store.xpath(
             './/a[contains(@id,"hlDirections")]/@href').extract_first(
             ).split('=')[1].split(',')[0]
         item['longitude'] = store.xpath(
             './/a[contains(@id,"hlDirections")]/@href').extract_first(
             ).split('=')[1].split(',')[1]
         item['store_hours'] = ""
         #item['store_type'] = info_json["@type"]
         item['other_fields'] = ""
         item['coming_soon'] = 0
         yield item
  def parse_detail(self, response):
    shop_name = response.xpath('//h2[@class="h2top"]/text()').extract_first()
    if shop_name:
      store__list_eles = response.xpath('//div[@class="addresses"]//ul//li[@class="moreLandaddress"]')

      store_addr_list = store__list_eles[0].xpath('.//p/text()').extract()
      store_addr_list[0] = store_addr_list[0].replace('\n', ' ')
      addr = store_addr_list[0]
      city_state_zip = store_addr_list[1].split(', ')
      city = city_state_zip[0]
      state = city_state_zip[1]
      zip_code = city_state_zip[2]

      hour = ''
      store_hour_list = store__list_eles[1].xpath('.//p/text()').extract()
      if store_hour_list:
        for store_hour in store_hour_list:
          temp_hour = store_hour.strip().encode('raw-unicode-escape').replace('\u2013', ' - ')
          hour += temp_hour + '; '

      item = ChainItem()
      item['store_name'] = shop_name
      item['address'] = addr
      item['city'] = city
      item['state'] = state
      item['zip_code'] = zip_code
      item['store_hours'] = hour
      item['country'] = 'United States'

      yield item
    else:
      print('++++++++++++++++++++++++++++++++++++++++++ deleted')
 def body(self, response):
     if response.body:
         data = ''
         scripts = response.xpath('//script/text()').extract()
         for script in scripts:
             if script.find('var locations = ') != -1:
                 data = script
         data = data.split('var locations = {')[1].split('};')[0].strip()
         data = '{' + data + '}'
         data = data.replace('new google.maps.LatLng(',
                             '[').replace(')', ']')
         data_list = data.split('"point":')
         store_list = json.loads(self.fixLazyJson(data))
         item = ChainItem()
         for store in store_list:
             item['store_number'] = store
             item['address'] = store_list[store]['address']
             item['city'] = store_list[store]['city']
             item['state'] = store_list[store]['state']
             item['zip_code'] = store_list[store]['zip']
             item['phone_number'] = store_list[store]['phone']
             item['coming_soon'] = '0'
             item['country'] = 'United States'
             item['latitude'] = store_list[store]['point'][0]
             item['longitude'] = store_list[store]['point'][1]
             yield item
     else:
         pass
Ejemplo n.º 11
0
    def body(self, response):
        print("=========  Checking.......")
        store_list = response.xpath('//p[@class="store-p"]')
        for store in store_list:
            status = store.xpath(
                './/span[@class="store-sub-title"]/text()').extract_first()
            detail = store.xpath('.//a/text()').extract_first()
            if '-' in detail:
                detail = detail.split('-')
            else:
                detail = detail.split(',')
            if status:
                if 'coming' in status.lower() or 'opening' in status.lower():
                    item = ChainItem()
                    item['city'] = self.validate(detail[0])
                    item['state'] = self.validate(detail[1])
                    item['country'] = 'United States'
                    if item['state'] == 'Canada':
                        item['country'] = 'Canada'
                        item['state'] = ''
                    item['coming_soon'] = '1'

                    yield item
            else:
                country = 'United States'
                if 'UK' in detail[1] or 'Isle of Man' in detail[1]:
                    country = 'United Kingdom'
                elif 'Manitoba' in detail[1] or 'Ontario' in detail[1]:
                    country = 'Canada'
                yield scrapy.Request(
                    url=store.xpath('.//a/@href').extract_first(),
                    callback=self.parse_page,
                    meta={'country': country})
Ejemplo n.º 12
0
    def parse_store(self, response):
        for store in response.xpath('//div[contains(@class, "wpseo-result")]'):
            # pdb.set_trace()
            
            item = ChainItem()
            item['store_name'] =  store.xpath('.//span[@itemprop="name"]/text()').extract_first().strip()
            pos = store.xpath('.//div[@class="wpseo-sl-route"]/a/@onclick').extract_first()

            item['store_number'] = ''
            item['address'] = self.validate(store.xpath('.//span[@itemprop="streetAddress"]/text()'))
            item['address2'] = ''
            item['city'] = self.validate(store.xpath('.//span[@itemprop="addressLocality"]/text()'))

            item['state'] = self.validate(store.xpath('.//span[@itemprop="addressRegion"]/text()'))

            item['zip_code'] = self.validate(store.xpath('.//span[@itemprop="postalCode"]/text()'))

            item['country'] = 'Canada'
            item['phone_number'] = self.validate(store.xpath('.//span[@itemprop="telephone"]/text()'))
            
            item['latitude'] = pos.split(', \'')[1].split('\'')[0]
            item['longitude'] = pos.split(', \'')[2].split('\'')[0]

            item['store_hours'] = ''
            for hour in store.xpath('.//span[@itemprop="openingHours"]/@content').extract():
                item['store_hours'] += hour.replace(u'\xa0', '') + ' ; '

            item['store_type'] = ''
            item['other_fields'] = ''
            item['coming_soon'] = '0'
            if item['phone_number'] not in self.uid_list:
                self.uid_list.append(item['phone_number'])
                yield item
 def body(self, response):
     print("=========  Checking.......")
     store_list = json.loads(response.body)['Location']
     for store in store_list:
         item = ChainItem()
         item['store_name'] = self.validate(store['name'])
         item['store_number'] = self.validate(str(store['id']))
         item['address'] = self.validate(store['address']['address1'])
         item['address2'] = self.validate(store['address']['address2'])
         item['city'] = self.validate(store['address']['city'])
         item['state'] = self.validate(store['address']['state'])
         item['zip_code'] = self.validate(
             store['address']['address']['zipCode'])
         item['country'] = self.validate(store['address']['country'])
         item['latitude'] = self.validate(
             store['address']['gpsCoordinates']['latitude'])
         item['longitude'] = self.validate(
             store['address']['gpsCoordinates']['longitude'])
         h_temp = ''
         hour_list = store['meetingsForDay']
         for hour in hour_list:
             h_temp += self.validate(
                 hour['dayOfWeek']) + ' ' + self.validate(
                     hour['meetings'][0]) + ' ' + self.validate(
                         hour['openHours']) + ', '
         item['store_hours'] = h_temp[:-2]
         if item['store_number'] not in self.history:
             self.history.append(item['store_number'])
             yield item
	def parse_page(self, response):
		item = ChainItem()
		item['store_name'] = self.validate(response.xpath('//div[@id="content"]/h2/text()').extract_first())
		item['coming_soon'] = '0'
		if 'Coming Soon' in item['store_name']:
			item['store_name'] = item['store_name'].split('Coming Soon')[0].strip()
			item['coming_soon'] = '1'
		detail = response.xpath('//div[@id="location_information"]')
		address = detail.xpath('.//div[@id="address"]//p/text()').extract()
		item['address'] = self.validate(address[0])
		item['address2'] = ''
		addr = address[1].strip().split(',')
		item['city'] = self.validate(addr[0].strip())
		item['state'] = self.validate(addr[1].strip().split(' ')[0].strip())
		try:
			item['zip_code'] = self.validate(addr[1].strip().split(' ')[1].strip())
		except:
			pass			
		item['country'] = 'United States'
		item['phone_number'] = self.validate(detail.xpath('.//p/span[@class="phone"]/text()').extract_first())
		h_temp = ''
		hour_list = detail.xpath('.//div[@id="hours"]//li')
		for hour in hour_list:
			hour = hour.xpath('.//text()').extract()
			h_temp += self.validate(hour[0]) + ' ' + self.validate(hour[1]) + ', '
		item['store_hours'] = h_temp[:-2]
		item['store_type'] = ''
		item['other_fields'] = ''
		yield item			
 def parse_store(self, response):
     stores = tree.xpath('//li//div[@class="text"]//table//tbody')
     if stores:
         for store in stores:
             info_list = store.xpath('.//tr')
             store_name = info_list[0].xpath('.//td')[1].xpath(
                 './a/text()')[0].strip()
             item = ChainItem()
             item['store_name'] = store_name
             temp_list = info_list[1].xpath('.//td')[1].xpath('./text()')
             temp_address = ''
             for temp in temp_list:
                 temp_address += temp.strip() + ' '
             addr = usaddress.parse(temp_address)
             item['city'] = ''
             item['address'] = ''
             for temp in addr:
                 if temp[1] == 'PlaceName':
                     item['city'] += temp[0].replace(',', '') + ' '
                 elif temp[1] == 'StateName':
                     item['state'] = temp[0].replace(',', '')
                 elif temp[1] == 'ZipCode':
                     item['zip_code'] = temp[0].replace(',', '')
                 else:
                     item['address'] += temp[0].replace(',', '') + ' '
             if not item['address'].strip() in self.history:
                 self.history.append(item['address'].strip())
                 item['country'] = info_list[2].xpath('.//td')[1].xpath(
                     './text()')[0].strip()
                 yield item
     else:
         search_zip = response.meta['search_zip']
         self.flag_end[search_zip] = 1
         print('============================== end pagination')
 def body(self, response):
     print("=========  Checking.......")
     store_list = response.xpath('//div[@class="section group store"]')
     for store in store_list:
         try:
             item = ChainItem()
             item['store_name'] = self.validate(
                 store.xpath(
                     './/h3[@class="store_name"]/text()').extract_first())
             item['address'] = self.validate(
                 store.xpath('.//p[@class="address"]//span[1]/text()').
                 extract_first())
             item['city'] = self.validate(
                 store.xpath('.//p[@class="address"]//span[2]/text()').
                 extract_first()).split(',')[0]
             item['state'] = self.validate(
                 store.xpath('./@data-region').extract_first())
             item['zip_code'] = self.validate(
                 store.xpath('.//p[@class="address"]//span[2]/text()').
                 extract_first()).split(',')[1]
             item['country'] = self.validate(
                 store.xpath('./@data-country').extract_first())
             item['phone_number'] = self.validate(
                 store.xpath('.//p[@class="phone"]/text()').extract_first())
             yield item
         except:
             pass
Ejemplo n.º 17
0
    def body(self, response):
        print("=========  Checking.......")

        store_list = json.loads(response.body)
        for store in store_list:
            item = ChainItem()
            item['store_name'] = store['name']
            item['store_number'] = store['id']
            item['address'] = store['address']['address']
            item['address2'] = ''
            item['city'] = store['address']['city']
            item['state'] = store['address']['province']
            item['zip_code'] = store['address']['postal_code']
            item['country'] = store['address']['country']
            if 'store' in store['contact']:
                item['phone_number'] = store['contact']['store']
            item['latitude'] = store['coordinates']['latitude']
            item['longitude'] = store['coordinates']['longitude']
            h_temp = ''
            hour_list = store['hours']
            for hour in hour_list:
                h_temp += hour + ', '
            item['store_hours'] = h_temp[:-2]
            item['store_type'] = ''
            item['other_fields'] = ''
            item['coming_soon'] = ''
            yield item
Ejemplo n.º 18
0
    def parse_store(self, response):
        stores = response.xpath('//div[@class="VWStoreInfo"]')

        for store in stores:
            item = ChainItem()
            item['store_name'] = store.xpath('.//h3/text()').extract_first()
            if item['store_name'] == None:
                continue

            item['store_number'] = ''
            address = store.xpath(".//p/text()").extract_first().split(',')
            item['address'] = address[0]
            for idx, value in enumerate(address):
                if idx == len(address) - 1:
                    item['phone_number'] = value

            if item['phone_number'] in self.uid_list:
                continue
            else:
                self.uid_list.append(item['phone_number'])

            item['address2'] = ""
            item['city'] = address[1].strip()
            item['state'] = response.meta['state']
            item['country'] = "United States"
            item['latitude'] = response.meta['lat']
            item['longitude'] = response.meta['lng']
            item['other_fields'] = ""

            hours = store.xpath(".//p[2]/text()").extract()
            item['store_hours'] = ";".join(hours)
            item['coming_soon'] = 0

            yield item
Ejemplo n.º 19
0
    def parse_store(self, response):
        store_info = response
        item = ChainItem()

        item['store_number'] = ''
        item['store_name'] = store_info.xpath(
            './/div[@class="page-title"]/h1/text()').extract_first()
        item['address'] = store_info.xpath(
            './/div[@class="shop-full-description"]/p/text()').extract(
            )[0].strip()
        item['address2'] = ''
        item['city'] = store_info.xpath(
            './/div[@class="shop-full-description"]/p/text()').extract(
            )[1].split(',')[0].strip()
        item['state'] = "".join(
            store_info.xpath('.//div[@class="shop-full-description"]/p/text()')
            .extract()[1].split(',')[1].strip().split(' ')[:-1])
        item['zip_code'] = store_info.xpath(
            './/div[@class="shop-full-description"]/p/text()').extract(
            )[1].split(',')[1].strip().split(' ')[-1]
        item['country'] = 'United States'
        item['phone_number'] = store_info.xpath(
            './/div[@class="shop-full-description"]/p/a/text()').extract_first(
            )
        item['latitude'] = response.meta['lat']
        item['longitude'] = response.meta['lng']
        item['store_hours'] = store_info.xpath(
            './/div[@class="shop-full-description"]/p/text()').extract(
            )[2].strip()
        item['other_fields'] = ""
        item['coming_soon'] = "0"

        yield item
Ejemplo n.º 20
0
	def body(self, response):
		store_list = response.xpath('//div[contains(@class, "cardgrid__card location__card")]')
		print("=========  Checking.......", len(store_list))
		for store in store_list:
			item = ChainItem()
			item['store_name'] = self.validate(store.xpath('.//a[1]/text()').extract_first())
			detail = store.xpath('.//address/text()').extract()
			item['address'] = self.validate(detail[0])
			addr = detail[1].split(',')
			item['city'] = self.validate(addr[0].strip())
			item['state'] = self.validate(addr[1].strip().split(' ')[0].strip())
			item['zip_code'] = self.validate(addr[1].strip().split(' ')[1].strip())
			item['country'] = 'United States'
			item['phone_number'] = self.validate(detail[2])
			h_temp = ''
			hour_list = store.xpath('.//p/text()').extract()
			for hour in hour_list:
				if self.validate(hour) != '':
					h_temp += self.validate(hour) + ', '
			item['store_hours'] = h_temp[:-2]
			item['coming_soon'] = '0'
			if 'coming' in item['store_hours'].lower():
				item['coming_soon'] = '1'
				item['store_hours'] = ''
			yield item			
Ejemplo n.º 21
0
	def parse_store(self, temp):
		try :
			item = ChainItem()
			if self.contain_check(temp):
				if len(temp) == 4:
					item['store_name'] = self.validate(temp[0])
					item['address'] = self.validate(temp[2])
					address = temp[3].split(',')
					item['city'] = self.validate(address[0])
					item['state'] = self.validate(address[1].strip().split(' ')[0])
					item['zip_code'] = self.validate(address[1].strip().split(' ')[1])

				else :
					item['store_name'] = self.validate(temp[0])

				item['country'] = 'United States'
				item['coming_soon'] = '1'

				return item

			else:
				if len(temp) == 4:
					item['store_name'] = self.validate(temp[0])
					item['address'] = self.validate(temp[1])
					address = temp[2].split(',')
					item['city'] = self.validate(address[0])
					item['state'] = self.validate(address[1].strip().split(' ')[0])
					item['zip_code'] = self.validate(address[1].strip().split(' ')[1])
					item['phone_number'] = self.validate(temp[3])
					
				elif len(temp) == 5:
					item['store_name'] = self.validate(temp[0])
					item['address'] = self.validate(temp[2])
					address = temp[3].split(',')
					item['city'] = self.validate(address[0])
					item['state'] = self.validate(address[1].strip().split(' ')[0])
					item['zip_code'] = self.validate(address[1].strip().split(' ')[1])
					item['phone_number'] = self.validate(temp[4])
				
				elif len(temp) == 3 :
					item['store_name'] = self.validate(temp[0])
					item['address'] = self.validate(temp[1])
					address = temp[2].split(',')
					item['city'] = self.validate(address[0])
					item['state'] = self.validate(address[1].strip().split(' ')[0])
					item['zip_code'] = self.validate(address[1].strip().split(' ')[1])
					
				else :
					pass
				try:
					zip_temp = int(item['zip_code'])
					item['country'] = 'United States'
				except:
					item['country'] = 'Canada'
				item['coming_soon'] = '0'

				return item
		except:
			return None
Ejemplo n.º 22
0
    def parse(self, response):
        store_list = json.loads(response.body)
        for store_info in store_list:
            item = ChainItem()

            item['store_number'] = store_info['id']
            item['store_name'] = store_info['title']
            item['address2'] = ''
            address1 = ''
            if store_info['store_address'].find('<br />') != -1:
                address = store_info['store_address'].split('<br />')
            else:
                address = store_info['store_address'].split('</p>\n<p>')
            if len(address) == 3:
                item['address'] = self.validate(address[0])
                item['address2'] = self.validate(address[1])
                address1 = address[2].split(' ')
            else:
                item['address'] = self.validate(address[0])
                address1 = address[1].split(' ')

            if len(address1) == 4:
                item['city'] = self.validate(
                    address1[0]) + ' ' + self.validate(address1[1])
                item['state'] = self.validate(address1[2])
                item['zip_code'] = self.validate(address1[3])
            else:
                item['zip_code'] = self.validate(address1[2])
                try:
                    val = int(item['zip_code'])
                    item['city'] = self.validate(address1[0])
                    item['state'] = self.validate(address1[1])
                    if address1[1].find(',') != -1:
                        if address1[1].split(',')[1] == '':
                            item['city'] = self.validate(address1[0] + ' ' +
                                                         address1[1])
                            item['state'] = ''
                        else:
                            item['city'] = self.validate(
                                address1[0]) + ' ' + self.validate(
                                    address1[1].split(',')[0])
                            item['state'] = address1[1].split(',')[1]
                except ValueError:
                    item['city'] = self.validate(
                        address1[0]) + ' ' + self.validate(address1[1])
                    item['state'] = self.validate(address1[2])
                    item['zip_code'] = ''

            item['country'] = 'United States'
            item['phone_number'] = store_info['store_telephone']
            item['latitude'] = store_info['lat']
            item['longitude'] = store_info['lng']

            item['store_hours'] = self.validate(
                store_info['store_hours']).replace('<br />', '')
            item['other_fields'] = ""
            item['coming_soon'] = "0"

            yield item
Ejemplo n.º 23
0
    def parse(self, response):
        stores = json.loads(response.body)
        # pprint(stores)
        if 'features' in stores:
            for store in stores['features']:
                item = ChainItem()

                item['chain_id'] = response.meta['chain_id']
                item['store_number'] = store['properties']['identifiers'][
                    'gblnumber']

                item['address'] = store['properties']['addressLine1'].strip()
                #item['address2'] = store['properties']['addressLine2'].strip()
                try:
                    item['phone_number'] = store['properties']['telephone']
                except:
                    pass

                item['latitude'] = store['geometry']['coordinates'][1]
                item['longitude'] = store['geometry']['coordinates'][0]
                item['city'] = store['properties']['addressLine3'].strip()
                item['state'] = store['properties']['subDivision'].strip()
                item['zip_code'] = store['properties']['postcode'].strip()
                item['country'] = store['properties']['addressLine4'].strip()
                item['geo_accuracy'] = "Exact"

                try:
                    item['store_hours'] = 'Mon: ' + store['properties'][
                        'restauranthours']['hoursMonday']
                    item['store_hours'] += '; Tue: ' + store['properties'][
                        'restauranthours']['hoursTuesday']
                    item['store_hours'] += '; Wed: ' + store['properties'][
                        'restauranthours']['hoursWednesday']
                    item['store_hours'] += '; Thu: ' + store['properties'][
                        'restauranthours']['hoursThursday']
                    item['store_hours'] += '; Fri: ' + store['properties'][
                        'restauranthours']['hoursFriday']
                    item['store_hours'] += '; Sat: ' + store['properties'][
                        'restauranthours']['hoursSaturday']
                    item['store_hours'] += '; Sun: ' + store['properties'][
                        'restauranthours']['hoursSunday']
                except:
                    pass

                item['other_fields'] = ", ".join(
                    store['properties']['filterType'])

                for ft in store['properties']['filterType']:
                    if ft == 'WALMARTLOCATION':
                        item['store_name'] = "Walmart"
                        item['store_type'] = "Walmart"
                    else:
                        item['store_name'] = "McDonalds"
                        item['store_type'] = "McDonalds"

                item['coming_soon'] = "1" if store['properties'][
                    'openstatus'] == 'COMINGSOON' else "0"
                if item['chain_id'] != '':
                    yield item
 def body(self, response):
     try:
         data = response.body.split(
             'jQuery224004805107136324005_1498327407126(')[1].strip()[:-2]
         data = data.decode('raw-unicode-escape')
         store_list = json.loads(data)
         if store_list:
             for store in store_list:
                 try:
                     item = ChainItem()
                     item['phone_number'] = store['phoneNumber']
                     item['store_number'] = store['storeNumber']
                     item['country'] = 'United States'
                     item['latitude'] = store['latitude']
                     item['longitude'] = store['longitude']
                     item['store_name'] = store['name']
                     item['other_fields'] = ""
                     item['coming_soon'] = "0"
                     item['address'] = store['streetAddress']
                     item['address2'] = ''
                     item['city'] = store['city']
                     item['state'] = self.validate(store['province'])
                     item['zip_code'] = self.validate(store['postalCode'])
                     item['store_hours'] = "Mon:" + self.parse_time(
                         store['mondayHours']
                         ['openTime']) + " - " + self.parse_time(
                             store['mondayHours']['closeTime']
                         ) + '; ' + "Tue:" + self.parse_time(
                             store['tuesdayHours']
                             ['openTime']) + " - " + self.parse_time(
                                 store['tuesdayHours']['closeTime']
                             ) + '; ' + "Wed:" + self.parse_time(
                                 store['wednesdayHours']['openTime']
                             ) + " - " + self.parse_time(
                                 store['wednesdayHours']['closeTime']
                             ) + '; ' + "Thu:" + self.parse_time(
                                 store['thursdayHours']['openTime']
                             ) + " - " + self.parse_time(
                                 store['thursdayHours']['closeTime']
                             ) + '; ' + "Fri:" + self.parse_time(
                                 store['fridayHours']['openTime']
                             ) + " - " + self.parse_time(
                                 store['fridayHours']['closeTime']
                             ) + '; ' + "Sat:" + self.parse_time(
                                 store['saturdayHours']['openTime']
                             ) + " - " + self.parse_time(
                                 store['saturdayHours']['closeTime']
                             ) + '; ' + "Sun:" + self.parse_time(
                                 store['sundayHours']['openTime']
                             ) + " - " + self.parse_time(
                                 store['sundayHours']['closeTime']) + '; '
                     if item['store_number'] in self.store_number:
                         continue
                     self.store_number.append(item['store_number'])
                     yield item
                 except:
                     pdb.set_trace()
     except:
         pdb.set_trace()
Ejemplo n.º 25
0
    def parse(self, response):
        urls = []
        url = 'https://www.signaturestyle.com/content/dam/sitemaps/signaturestyle/sitemap_signaturestyle_en_us.xml'
        page_text = urllib2.urlopen(url)
        for line in page_text:
            if '/mastercuts-' in line and '.html' in line:
                urls.append(line.split('<loc>')[1].split('<')[0])

        item = ChainItem()
        for url in urls:
            page_text = urllib2.urlopen(url)
            sh = ''
            for line in page_text:
                if 'var salonDetailSalonID = "' in line:
                    item['store_number'] = line.split(
                        'var salonDetailSalonID = "')[1].split('"')[0]
                if '<h2 class="hidden-xs salontitle_salonlrgtxt">' in line:
                    item['store_name'] = line.split(
                        '<h2 class="hidden-xs salontitle_salonlrgtxt">'
                    )[1].split('<')[0]
                if '<span itemprop="streetAddress">' in line:
                    item['address'] = line.split(
                        '<span itemprop="streetAddress">')[1].split('<')[0]
                    item['address2'] = ''
                    item['country'] = 'United States'
                if 'itemprop="addressLocality">' in line:
                    item['city'] = line.split(
                        'op="addressLocality">')[1].split('<')[0]
                if 'itemprop="addressRegion">' in line:
                    item['state'] = line.split(
                        'itemprop="addressRegion">')[1].split('<')[0]
                if '"postalCode">' in line:
                    item['zip_code'] = line.split('"postalCode">')[1].split(
                        '<')[0]
                if 'id="sdp-phone" href="">' in line:
                    item['phone_number'] = line.split(
                        'id="sdp-phone" href="">')[1].split('<')[0]
                if 'itemprop="latitude" content="' in line:
                    item['latitude'] = line.split(
                        'itemprop="latitude" content="')[1].split('"')[0]
                if 'itemprop="longitude" content="' in line:
                    item['longitude'] = line.split(
                        'itemprop="longitude" content="')[1].split('"')[0]
                if '<span class="' in line and 'day">' in line:
                    if sh == '':
                        sh = next(page_text).split('content="')[1].split(
                            '"')[0]
                    else:
                        sh = sh + ';' + next(page_text).split(
                            'content="')[1].split('"')[0]
            item['store_type'] = "MasterCuts"
            if item['state'] == 'PR':
                item['country'] = 'Puerto Rico'
            if ' ' in item['zip_code']:
                item['country'] = 'Canada'
            item['other_fields'] = ''
            item['store_hours'] = sh
            item['coming_soon'] = '0'
            yield item
Ejemplo n.º 26
0
 def parse_stores(self, response):
     if response.body:
         _response = response.body
         response_list = _response.split('"markers":[{')
         _response = response_list[1]
         response_list = _response.split('}],')
         _response = response_list[0].replace('\\u0022\\u003E', '').replace('\\u003C\\/', '')
         store_list = _response.split('},{')
         for store in store_list:
             item = ChainItem()
             store_li = store.split('"latitude":')
             store = store_li[1]
             temp_list = store.split(',')
             item['latitude'] = temp_list[0]
             store_li = store.split('"longitude":')
             store = store_li[1]
             temp_list = store.split(',')
             item['longitude'] = temp_list[0]
             store_li = store.split('"markername":"')
             store = store_li[1]
             temp_list = store.split('",')
             item['store_name'] = temp_list[0]
             store_li = store.split('streetAddress')
             store = store_li[1]
             temp_list = store.split('span')
             item['address'] = temp_list[0].strip()
             item['address'] = item['address'][:-1]
             store_li = store.split('postalCode')
             store = store_li[1]
             temp_list = store.split('span')
             item['zip_code'] = temp_list[0].strip()
             store_li = store.split('addressLocality')
             store = store_li[1]
             temp_list = store.split('span')
             item['city'] = temp_list[0].replace('\\n', '').strip()
             store_li = store.split('addressRegion')
             store = store_li[1]
             temp_list = store.split('span')
             item['state'] = temp_list[0].strip()
             store_li = store.split('Store #')
             store = store_li[1]
             temp_list = store.split('div')
             item['store_number'] = temp_list[0].strip()
             store_li = store.split('Phone:')
             store = store_li[1]
             temp_list = store.split('div')
             item['phone_number'] = temp_list[0].replace('a\\u003E', '').strip()
             store_li = store.split('Hours')
             store = store_li[1]
             temp_list = store.split('div')
             temp_hour = temp_list[0].replace('a\\u003Eem\\u003Ep\\u003E\\n\\n\\u003Cp\\u003E', '').replace('p\\u003E\\n', '').replace('\\u003Cbr \\/\\u003E\\n', '; ').replace('\u0026nbsp;', ' ').replace('\n\u003Cp\u003E', '').strip()
             if temp_hour.find('\u003E') != -1:
                 item['store_hours'] = temp_hour[:-15]
             else:
                 item['store_hours'] = temp_hour
             item['country'] = 'United States'
             yield item
     else:
         print('+++++++++++++++++++++++++ no response')
 def body(self, response):
     try:
         store_list = json.loads(response.body)['Stores']
         for store in store_list:
             try:
                 item = ChainItem()
                 item['store_name'] = store['Description']
                 item['store_number'] = ''
                 item['address'] = store['Address1']
                 item['address2'] = store['Address2']
                 item['city'] = store['City']
                 item['state'] = store['State']
                 item['zip_code'] = store['Zip']
                 item['country'] = 'United States'
                 item['phone_number'] = store['Phone']
                 item['latitude'] = store['Latitude']
                 item['longitude'] = store['Longitude']
                 try:
                     time1_close = store['OperatingHours'][0][
                         'ClosingTime'][6:-2]
                     time1_close = datetime.datetime.utcfromtimestamp(
                         int(int(time1_close) / 1000))
                     time1_open = store['OperatingHours'][0]['OpeningTime'][
                         6:-2]
                     time1_open = datetime.datetime.utcfromtimestamp(
                         int(int(time1_open) / 1000))
                     time2_close = store['OperatingHours'][6][
                         'ClosingTime'][6:-2]
                     time2_close = datetime.datetime.utcfromtimestamp(
                         int(int(time2_close) / 1000))
                     time2_open = store['OperatingHours'][6]['OpeningTime'][
                         6:-2]
                     time2_open = datetime.datetime.utcfromtimestamp(
                         int(int(time2_open) / 1000))
                     time1 = "Sun - Thu : " + time1_open.strftime(
                         '%I') + ":" + time1_open.strftime(
                             '%M') + time1_open.strftime(
                                 '%p') + ' to ' + time1_close.strftime(
                                     '%I') + ":" + time1_close.strftime(
                                         '%M') + time1_close.strftime('%p')
                     time2 = "Fri - Sat : " + time2_open.strftime(
                         '%I') + ":" + time2_open.strftime(
                             '%M') + time2_open.strftime(
                                 '%p') + ' to ' + time2_close.strftime(
                                     '%I') + ":" + time2_close.strftime(
                                         '%M') + time2_close.strftime('%p')
                     item['store_hours'] = time1 + ", " + time2
                 except:
                     pdb.set_trace()
                 item['store_type'] = store['RestType']
                 if item['address'] + item[
                         'phone_number'] not in self.history:
                     self.history.append(item['address'] +
                                         item['phone_number'])
                     yield item
             except:
                 pdb.set_trace()
     except:
         pass
Ejemplo n.º 28
0
	def parse_detail(self, response):

		item = ChainItem()

		item['Product_Name'] = ''.join(self.eliminate_space(response.xpath('//div[contains(@class, "content-top")]//h1[contains(@class, "product-title")]//text()').extract()))
		
		data = response.xpath('//*[contains(@class, "data-pair-item")]')

		for pro in data:

			try:

				prop = self.eliminate_space(pro.xpath('.//text()').extract())

				if 'Bruttovikt'.lower() in prop[0].lower():

					item['Weight'] = prop[1]

				if 'Enhet'.lower() in prop[0].lower():

					item['Unit'] = prop[1]

				if 'Antal per enhet'.lower() in prop[0].lower():

					item['Number_Per_Unit'] = prop[1]

				if 'Lagringsform'.lower() in prop[0].lower():

					item['Storage_Form'] = prop[1]

				if 'Antal/hel'.lower() in prop[0].lower():

					item['Number_Whole_Package'] = prop[1]

				if 'Art.nr leveran'.lower() in prop[0].lower():

					item['Art_Nr_Supplier'] = prop[1]

				if 'Artikelnr'.lower() in prop[0].lower():

					item['Article_Number'] = prop[1]

				if 'Land'.lower() in prop[0].lower():

					item['Country'] = prop[1]

				if 'GTIN'.lower() in prop[0].lower():

					item['GTIN'] = prop[1]

				if 'Kategori'.lower() in prop[0].lower():

					item['Category'] = prop[1]
			except:

				pass

		yield item
    def parse_body(self, response):
        store = []
        for value in response.meta['store_list']:
            if value.find('Click here to get directions') != -1:
                item = ChainItem()
                item['store_name'] = ''
                item['store_number'] = ''
                item['country'] = 'United States'
                item['latitude'] = ''
                item['longitude'] = ''
                item['other_fields'] = ""
                item['coming_soon'] = "0"
                item['address2'] = ''
                getName = False
                getAddress = False
                address = ''
                for x, _value1 in enumerate(store):
                    if _value1.find('Phone') != -1:
                        getAddress = True
                        if item['store_name'].find('Fremont Shopping Center') != -1:
                            pdb.set_trace()
                        if address.find(')') != -1:
                            address = address.split(')')[1]
                        addr = usaddress.parse(address)
                        city = state = zip_code = street = ''
                        for temp in addr:
                            if temp[1] == 'PlaceName':
                                city += temp[0].replace(',','') + ' '
                            elif temp[1] == 'StateName':
                                state = temp[0].replace(',','')
                            elif temp[1] == 'ZipCode':
                                zip_code = temp[0].replace(',','')
                            else:
                                street += temp[0].replace(',','') + ' '
                        # pdb.set_trace()
                        
                        item['address'] = street
                        item['city'] = city
                        item['state'] =  state
                        item['zip_code'] =  zip_code
                        item['phone_number'] =  _value1.replace('Phone:', '').strip()
                        if store[x+1].split('-')[0].isdigit() == True:
                            item['store_hours'] = self.validate(" ".join(store[x+2:]))
                            item['phone_number'] += ' ' + store[x+1]
                        else:
                            item['store_hours'] = self.validate(" ".join(store[x+1:]))
                    elif _value1.split(' ')[-1].find('Center') != -1 or _value1.split(' ')[-1].find('Marketplace') != -1 or _value1.find('Newport Square') != -1:
                        item['store_name'] += _value1
                        getName = True
                    elif getName == False and getAddress == False:
                        item['store_name'] += _value1 + ' '
                    elif getName == True and getAddress == False:
                        address += _value1 + ' '

                yield item
                store = []
            else:
                store.append(value) 
Ejemplo n.º 30
0
    def parse_store(self, response):
        store_list = response.xpath(
            './/div[@class="resource_locations_location"]')
        for store in store_list:
            try:
                item = ChainItem()

                item['store_number'] = ''
                item['store_name'] = store.xpath(
                    './/div[@class="resource_locations_location_content_title"]/text()'
                ).extract_first().replace(u'\u2013', '')
                address = store.xpath(
                    './/div[@class="resource_locations_location_content_address"]/text()'
                ).extract_first()
                if len(address.split(',')) == 3:
                    if len(address.split(',')[2].strip().split(' ')) == 1:
                        item['address'] = address.split(',')[0].strip().split(
                            '|')[0].strip()
                        item['city'] = address.split(',')[0].strip().split(
                            '|')[1].strip()
                        item['state'] = address.split(',')[1].strip()
                        item['zip_code'] = address.split(',')[2].strip()
                    else:
                        item['address'] = address.split(',')[0].strip()
                        item['city'] = address.split(',')[1].strip()
                        item['state'] = address.split(',')[2].strip().split(
                            ' ')[0].strip()
                        item['zip_code'] = address.split(',')[2].strip().split(
                            ' ')[1].strip()
                else:
                    item['address'] = address.split(',')[0].strip().split(
                        '|')[0].strip()
                    item['city'] = address.split(',')[0].strip().split(
                        '|')[1].strip()
                    item['state'] = address.split(',')[1].strip().split(
                        ' ')[0].strip()
                    item['zip_code'] = address.split(',')[1].strip().split(
                        ' ')[1].strip()

                # if item['store_name'].find('ARAPAHOE') != -1:
                #     pdb.set_trace()
                item['address2'] = ''
                item['country'] = 'United States'

                item['phone_number'] = store.xpath(
                    './/div[@class="resource_locations_location_content_phone"]/text()'
                ).extract_first().replace('Ph', '').strip().split('|')[0]
                item['latitude'] = ''
                item['longitude'] = ''

                item['store_hours'] = store.xpath(
                    './/div[@class="resource_locations_location_content_hours"]/text()'
                ).extract_first()
                item['other_fields'] = ""
                item['coming_soon'] = "0"
            except:
                pdb.set_trace()
            yield item