} session = SgRequests(retry_behavior=None) url = 'https://www.picknsave.com/stores/api/graphql' search = DynamicZipSearch(country_codes=[SearchableCountries.USA]) for postal in search: data = { "query": "\n query storeSearch($searchText: String!, $filters: [String]!) {\n storeSearch(searchText: $searchText, filters: $filters) {\n stores {\n ...storeSearchResult\n }\n fuel {\n ...storeSearchResult\n }\n shouldShowFuelMessage\n }\n }\n \n fragment storeSearchResult on Store {\n banner\n vanityName\n divisionNumber\n storeNumber\n phoneNumber\n showWeeklyAd\n showShopThisStoreAndPreferredStoreButtons\n storeType\n distance\n latitude\n longitude\n tz\n ungroupedFormattedHours {\n displayName\n displayHours\n isToday\n }\n address {\n addressLine1\n addressLine2\n city\n countryCode\n stateCode\n zip\n }\n pharmacy {\n phoneNumber\n }\n departments {\n code\n }\n fulfillmentMethods{\n hasPickup\n hasDelivery\n }\n }\n", "variables": { "searchText": postal, "filters": [] }, "operationName": "storeSearch" } response = session.post(url, json=data, headers=headers).json() print(response) x = 0 coords = [] if x % 100 == 0: session = "" session = SgRequests() try: for item in data["data"]["storeSearch"]["storeSearchReducer"][ "searchResults"]["fuel"]: locator_domain = "picknsave.com" page_url = url location_name = item["vanityName"] address = item["address"]["addressLine1"] city = item["address"]["city"]
def fetch_data(): # Your scraper here session = SgRequests() items = [] scraped_items = [] DOMAIN = "dreamdoors.co.uk" start_url = "https://www.dreamdoors.co.uk/kitchen-showrooms" all_codes = DynamicZipSearch( country_codes=[SearchableCountries.BRITAIN], max_radius_miles=10, max_search_results=None, ) for code in all_codes: formdata = { "option": "com_ajax", "module": "dreamdoors_store_finder", "postcode": code, "format": "raw", } headers = { "content-type": "application/x-www-form-urlencoded; charset=UTF-8", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36", "x-requested-with": "XMLHttpRequest", } response = session.post(start_url, data=formdata, headers=headers) if response.status_code != 200: continue data = json.loads(response.text) for poi in data: if type(poi) == str: continue store_url = poi["url"] if store_url in scraped_items: continue loc_response = session.get(store_url) loc_dom = etree.HTML(loc_response.text) location_name = poi["name"] location_name = location_name if location_name else "<MISSING>" raw_address = loc_dom.xpath('//div[@class="address"]//text()') raw_address = [ elem.strip() for elem in raw_address if elem.strip() ] addr = parse_address_intl(" ".join(raw_address).replace( "Address", "")) if addr.street_address_2: street_address = f"{addr.street_address_2} {addr.street_address_1}" else: street_address = addr.street_address_1 street_address = street_address if street_address else "<MISSING>" if "Coming Soon" in street_address: continue city = addr.city city = city if city else "<MISSING>" if "Tbc" in city: street_address = city city = "<MISSING>" state = "<MISSING>" zip_code = addr.postcode zip_code = zip_code if zip_code else "<MISSING>" country_code = addr.country country_code = country_code if country_code else "<MISSING>" store_number = poi["id"] store_number = store_number if store_number else "<MISSING>" phone = loc_dom.xpath('//a[@id="showroom-phone"]/text()') phone = phone[0] if phone else "<MISSING>" location_type = "<MISSING>" hoo = loc_dom.xpath('//div[@class="opening_times"]//text()') hoo = [elem.strip() for elem in hoo if elem.strip()] hours_of_operation = (" ".join(hoo[2:]).split(" Call ")[0] if hoo else "<MISSING>") geo = re.findall(r'.map_initialize\("map_canvas", ".+", (.+?)\)', loc_response.text) latitude = "<MISSING>" longitude = "<MISSING>" if geo: geo = geo[0].split(", ") latitude = geo[0] longitude = geo[1] else: with SgFirefox() as driver: driver.get(store_url) sleep(10) loc_dom = etree.HTML(driver.page_source) geo = loc_dom.xpath('//a[contains(@href, "maps/@")]/@href') if geo: geo = geo[0].split("maps/@")[-1].split(",")[:2] latitude = geo[0] longitude = geo[1] item = [ DOMAIN, store_url, location_name, street_address, city, state, zip_code, country_code, store_number, phone, location_type, latitude, longitude, hours_of_operation, ] if store_url not in scraped_items: scraped_items.append(store_url) items.append(item) return items