}
session = SgRequests(retry_behavior=None)
url = 'https://www.picknsave.com/stores/api/graphql'
search = DynamicZipSearch(country_codes=[SearchableCountries.USA])

for postal in search:
    data = {
        "query":
        "\n      query storeSearch($searchText: String!, $filters: [String]!) {\n        storeSearch(searchText: $searchText, filters: $filters) {\n          stores {\n            ...storeSearchResult\n          }\n          fuel {\n            ...storeSearchResult\n          }\n          shouldShowFuelMessage\n        }\n      }\n      \n  fragment storeSearchResult on Store {\n    banner\n    vanityName\n    divisionNumber\n    storeNumber\n    phoneNumber\n    showWeeklyAd\n    showShopThisStoreAndPreferredStoreButtons\n    storeType\n    distance\n    latitude\n    longitude\n    tz\n    ungroupedFormattedHours {\n      displayName\n      displayHours\n      isToday\n    }\n    address {\n      addressLine1\n      addressLine2\n      city\n      countryCode\n      stateCode\n      zip\n    }\n    pharmacy {\n      phoneNumber\n    }\n    departments {\n      code\n    }\n    fulfillmentMethods{\n      hasPickup\n      hasDelivery\n    }\n  }\n",
        "variables": {
            "searchText": postal,
            "filters": []
        },
        "operationName": "storeSearch"
    }
    response = session.post(url, json=data, headers=headers).json()
    print(response)
    x = 0
    coords = []
    if x % 100 == 0:
        session = ""
        session = SgRequests()
    try:
        for item in data["data"]["storeSearch"]["storeSearchReducer"][
                "searchResults"]["fuel"]:

            locator_domain = "picknsave.com"
            page_url = url
            location_name = item["vanityName"]
            address = item["address"]["addressLine1"]
            city = item["address"]["city"]
Exemple #2
0
def fetch_data():
    # Your scraper here
    session = SgRequests()

    items = []
    scraped_items = []

    DOMAIN = "dreamdoors.co.uk"
    start_url = "https://www.dreamdoors.co.uk/kitchen-showrooms"

    all_codes = DynamicZipSearch(
        country_codes=[SearchableCountries.BRITAIN],
        max_radius_miles=10,
        max_search_results=None,
    )
    for code in all_codes:
        formdata = {
            "option": "com_ajax",
            "module": "dreamdoors_store_finder",
            "postcode": code,
            "format": "raw",
        }
        headers = {
            "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
            "user-agent":
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36",
            "x-requested-with": "XMLHttpRequest",
        }
        response = session.post(start_url, data=formdata, headers=headers)
        if response.status_code != 200:
            continue
        data = json.loads(response.text)

        for poi in data:
            if type(poi) == str:
                continue
            store_url = poi["url"]
            if store_url in scraped_items:
                continue

            loc_response = session.get(store_url)
            loc_dom = etree.HTML(loc_response.text)
            location_name = poi["name"]
            location_name = location_name if location_name else "<MISSING>"
            raw_address = loc_dom.xpath('//div[@class="address"]//text()')
            raw_address = [
                elem.strip() for elem in raw_address if elem.strip()
            ]
            addr = parse_address_intl(" ".join(raw_address).replace(
                "Address", ""))
            if addr.street_address_2:
                street_address = f"{addr.street_address_2} {addr.street_address_1}"
            else:
                street_address = addr.street_address_1
            street_address = street_address if street_address else "<MISSING>"
            if "Coming Soon" in street_address:
                continue
            city = addr.city
            city = city if city else "<MISSING>"
            if "Tbc" in city:
                street_address = city
                city = "<MISSING>"
            state = "<MISSING>"
            zip_code = addr.postcode
            zip_code = zip_code if zip_code else "<MISSING>"
            country_code = addr.country
            country_code = country_code if country_code else "<MISSING>"
            store_number = poi["id"]
            store_number = store_number if store_number else "<MISSING>"
            phone = loc_dom.xpath('//a[@id="showroom-phone"]/text()')
            phone = phone[0] if phone else "<MISSING>"
            location_type = "<MISSING>"
            hoo = loc_dom.xpath('//div[@class="opening_times"]//text()')
            hoo = [elem.strip() for elem in hoo if elem.strip()]
            hours_of_operation = (" ".join(hoo[2:]).split(" Call ")[0]
                                  if hoo else "<MISSING>")

            geo = re.findall(r'.map_initialize\("map_canvas", ".+", (.+?)\)',
                             loc_response.text)
            latitude = "<MISSING>"
            longitude = "<MISSING>"
            if geo:
                geo = geo[0].split(", ")
                latitude = geo[0]
                longitude = geo[1]
            else:
                with SgFirefox() as driver:
                    driver.get(store_url)
                    sleep(10)
                    loc_dom = etree.HTML(driver.page_source)
                    geo = loc_dom.xpath('//a[contains(@href, "maps/@")]/@href')
                    if geo:
                        geo = geo[0].split("maps/@")[-1].split(",")[:2]
                        latitude = geo[0]
                        longitude = geo[1]

            item = [
                DOMAIN,
                store_url,
                location_name,
                street_address,
                city,
                state,
                zip_code,
                country_code,
                store_number,
                phone,
                location_type,
                latitude,
                longitude,
                hours_of_operation,
            ]
            if store_url not in scraped_items:
                scraped_items.append(store_url)
                items.append(item)

    return items