Ejemplo n.º 1
0
def xfi_scrape_postpaid_smartphone_prices():

    # scrape json
    device_page = requests.get('https://modesto-prodapi.xfinity.com/ModestoGW/api/v1.5/products?category=device&offset=0&sortAsc=true&sortBy=rank')
    device_soup = BeautifulSoup(device_page.text, 'html.parser')
    device_json = json.loads(device_soup.text)
    for json_obj in device_json:

        # make object
        scraped_postpaid_price = ScrapedPostpaidPrice()

        # hardcoded variables
        scraped_postpaid_price.provider = 'xfinity'
        scraped_postpaid_price.date = datetime.date.today()
        scraped_postpaid_price.time = datetime.datetime.now().time()

        # scrape data
        scraped_postpaid_price.device = device_parser(json_obj['name'])

        # get description
        description = remove_non_ascii(json_obj['description'])

        # create dictionary of sizes
        size_dict = []
        for variant in json_obj['variants']:
            size_variant = variant['capacity'].replace('gb', '').strip()
            if size_variant in size_dict:
                continue                   # ignore duplicates of the same size
            size_dict.append(size_variant)
            scraped_postpaid_price.storage = size_variant
            scraped_postpaid_price.retail_price = variant['price']
            scraped_postpaid_price.onetime_price = '0.00'
            scraped_postpaid_price.monthly_price = variant['financePrice']
            scraped_postpaid_price.contract_ufc = '0.00'
            scraped_postpaid_price.url = 'https://www.xfinity.com/mobile/shop/device/' + json_obj['slug']

            print(scraped_postpaid_price.provider, scraped_postpaid_price.device,
                  scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price,
                  scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price,
                  scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url,
                  scraped_postpaid_price.date, scraped_postpaid_price.time)

            # add to database
            remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device,
                                      scraped_postpaid_price.storage, scraped_postpaid_price.date)
            add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device,
                                     scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price,
                                     scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price,
                                     scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url,
                                     scraped_postpaid_price.date, scraped_postpaid_price.time)

            # add promotion text to database
            xfi_scrape_prepaid_promotins(scraped_postpaid_price.url, scraped_postpaid_price.device,
                                         scraped_postpaid_price.storage, description)
                pre_price = Pre_price(provider, device, storage, price,
                                      retail_price, url, date, time)
                prices.append(pre_price)
            else:
                empty += 1
    print("X " + provider_name + " file read, " + str(empty) + " empty rows")
    return prices


if payment_type == 'postpaid':
    postpaid_prices = {}
    for provider in postpaid_providers:
        postpaid_prices[provider] = get_postpaid_prices_CSV(provider)
        entries = 0
        for price in postpaid_prices[provider]:
            remove_postpaid_duplicate(price.provider, price.device,
                                      price.storage, price.date)
            add_postpaid_to_database(price.provider, price.device,
                                     price.storage, price.monthly_price,
                                     price.onetime_price, price.retail_price,
                                     price.contract_ufc, price.url, price.date,
                                     price.time)
            entries += 1
        print(str(entries) + " " + provider + " entries added to database")

if payment_type == 'prepaid':
    prepaid_prices = {}
    for provider in prepaid_providers:
        prepaid_prices[provider] = get_prepaid_prices_CSV(provider)
        entries = 0
        for price in prepaid_prices[provider]:
            remove_prepaid_duplicate(price.provider, price.device,
def tmo_scrape_postpaid_smartphone_prices():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.t-mobile.com/')
    time.sleep(5)

    # go to Phones url from homepage (since url could change)
    driver.find_element_by_link_text('PHONES').click()
    time.sleep(20)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # hardcoded variables
    scraped_postpaid_price.provider = 'tmobile'
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()

    errors = []

    # get device name and url from device landing page
    for tariff in soup.findAll(
            'div', class_='tile col-lg-3 col-md-4 col-sm-6 col-xs-12'):

        device_contents = tariff.find(
            'a',
            class_='m-b-5 product-name text-center regular block ng-binding')
        scraped_postpaid_price.device = device_parser(device_contents.text)

        if scraped_postpaid_price.device.find('certified pre-owned') == -1 and \
                scraped_postpaid_price.device.find('linelink') == -1 and \
                scraped_postpaid_price.device.find('sim') == -1 and \
                scraped_postpaid_price.device.find('flip') == -1:

            scraped_postpaid_price.url = 'https://www.t-mobile.com/' + device_contents[
                'href']

            promo_text = tariff.find('div', class_='offerTextHeight').text
            if promo_text != '':
                add_scraped_promotions_to_database(
                    scraped_postpaid_price.provider,
                    scraped_postpaid_price.device, '0', 'device landing page',
                    promo_text, scraped_postpaid_price.url,
                    scraped_postpaid_price.date, scraped_postpaid_price.time)

            # go to individual page
            driver.get(scraped_postpaid_price.url)
            time.sleep(6)
            html = driver.page_source
            device_soup = BeautifulSoup(html, "html.parser")

            # try/except in case of no price (coming soon, etc.)
            try:

                # iterate through storage sizes
                for memory_button in device_soup.findAll('a',
                                                         class_='memory-btn'):

                    # record storage size and url
                    scraped_postpaid_price.storage = memory_button.text.replace(
                        'GB', '').strip()
                    scraped_postpaid_price.url = scraped_postpaid_price.url.split(
                        '?memory='
                    )[0] + '?memory=' + scraped_postpaid_price.storage + 'gb'
                    driver.get(scraped_postpaid_price.url)
                    time.sleep(5)
                    html = driver.page_source
                    device_soup = BeautifulSoup(html, "html.parser")

                    if len(device_soup.findAll('div',
                                               class_='price-lockup')) > 1:
                        downpayment_and_retail = device_soup.findAll(
                            'span',
                            class_='cost-price font-tele-ult ng-binding')
                        scraped_postpaid_price.onetime_price = downpayment_and_retail[
                            0].text
                        scraped_postpaid_price.retail_price = downpayment_and_retail[
                            1].text.replace(',', '')
                        scraped_postpaid_price.monthly_price = monthly_price_parser(
                            device_soup.find(
                                'p',
                                class_='small font-tele-nor m-t-10 ng-binding'
                            ).text)
                    else:
                        scraped_postpaid_price.onetime_price = device_soup.find(
                            'span',
                            class_='cost-price font-tele-ult ng-binding').text

                    # add to database
                    remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                              scraped_postpaid_price.device,
                                              scraped_postpaid_price.storage,
                                              scraped_postpaid_price.date)
                    add_postpaid_to_database(
                        scraped_postpaid_price.provider,
                        scraped_postpaid_price.device,
                        scraped_postpaid_price.storage,
                        scraped_postpaid_price.monthly_price,
                        scraped_postpaid_price.onetime_price,
                        scraped_postpaid_price.retail_price,
                        scraped_postpaid_price.contract_ufc,
                        scraped_postpaid_price.url,
                        scraped_postpaid_price.date,
                        scraped_postpaid_price.time)

                    tmo_scrape_postpaid_promotions(
                        driver, device_soup, scraped_postpaid_price.url,
                        scraped_postpaid_price.device,
                        scraped_postpaid_price.storage)

            except AttributeError:
                errors.append(scraped_postpaid_price.device)
                pass

    print("pricing errors: ", errors)
    driver.quit()
Ejemplo n.º 4
0
def spr_scrape_postpaid_smartphone_prices():
    # go to website
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.get('https://www.sprint.com/en/shop/cell-phones.html')

    # get soup
    driver.get(
        "https://www.sprint.com/en/shop/cell-phones.html?INTNAV=TopNav:Shop:AllPhones&credit=A2&sort=FEATURED"
    )
    time.sleep(5)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")
    driver.close()

    # make scraper object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # set hardcoded variables
    scraped_postpaid_price.provider = 'sprint'
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()

    # iterate through devices on landing page
    for device_tile in soup.findAll(
            'li',
            class_='col-xs-24 col-sm-12 col-lg-8 text-center device-tile'):

        # get device name text
        device_name = device_tile.find("h3", {
            "class":
            "font-size-18 line-height-24 font-normal my-0 align-left"
        }).text.strip().lower()

        # eliminate out of scope devices
        if device_name.find("linelink") != -1 or device_name.find("pre-owned") != -1 or device_name.find("flip") != -1 \
                or device_name.find("xp5s") != -1 or device_name.find("duraxtp") != -1 or device_name.find("duratr") != -1 \
                or device_name.find("xp strike") != -1 or device_name.find("certified") != -1:
            continue

        # device name
        scraped_postpaid_price.device = device_parser(device_name)

        # url
        scraped_postpaid_price.url = "https://www.sprint.com" + device_tile.find(
            "a")["href"]

        # promo text for device landing page
        try:
            promo_text = device_tile.find("span", {
                "class": "color--purple font-size-14"
            }).text.strip()
        except AttributeError:
            promo_text = ''
        add_scraped_promotions_to_database(scraped_postpaid_price.provider,
                                           scraped_postpaid_price.device, '0',
                                           'device landing page', promo_text,
                                           scraped_postpaid_price.url,
                                           scraped_postpaid_price.date,
                                           scraped_postpaid_price.time)

        # go to url
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--window-size=1920x1080")
        chrome_driver = os.getcwd() + "\\chromedriver.exe"
        driver = webdriver.Chrome(chrome_options=chrome_options,
                                  executable_path=chrome_driver)
        driver.implicitly_wait(5)
        driver.get(scraped_postpaid_price.url)
        time.sleep(5)
        html = driver.page_source
        device_soup = BeautifulSoup(html, "html.parser")

        # if 404 error, stop program
        site_title = device_soup.find_all("title")
        if '404' in str(site_title) or not site_title:
            print('404 Error or page error: ' + scraped_postpaid_price.device)
            continue

        else:

            # click on drop down menu and record device sizes
            size_selector = driver.find_element_by_id(
                'sprint_storage_selector')
            sizes = size_selector.text.strip().replace(' GB', '')
            sizes = sizes.split('\n')

            # iterate through sizes
            for size in sizes:

                # click on size and reload page
                size_selector.click()
                time.sleep(2)
                select = Select(
                    driver.find_element_by_id('sprint_storage_selector'))
                select.select_by_value(size)
                time.sleep(6)
                html = driver.page_source
                device_soup = BeautifulSoup(html, "html.parser")

                # record device size
                scraped_postpaid_price.storage = size

                # # iphone shipment
                # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\
                #         scraped_postpaid_price.device == "iphone xs max":
                #
                #     # click on drop down menu and record device colors
                #     div_number = device_soup.find("div", {"data-color-set-storage-key": size})["data-color-set"]
                #     color_selector = driver.find_element_by_xpath(
                #         '/html/body/div[1]/article/div[3]/div[1]/div[1]/div[1]/div/div/div[2]/div[4]/div[1]/div/div['+str(div_number)+']/div/div/select')
                #
                #     color_selector.click()
                #     time.sleep(2)
                #     colors = color_selector.text.strip()
                #     colors = colors.split('\n')
                #
                #     # iterate through colors
                #     for color in colors:
                #         # click on size and reload page
                #         select = Select(driver.find_element_by_xpath(
                #             '/html/body/div[1]/article/div[3]/div[1]/div[1]/div[1]/div/div/div[2]/div[4]/div[1]/div/div['+str(div_number)+']/div/div/select'))
                #         select.select_by_visible_text(color)
                #         time.sleep(6)
                #         html = driver.page_source
                #         device_soup = BeautifulSoup(html, "html.parser")
                #
                #         shipment_text = device_soup.find("div", {"class": "col-xs-24 col-lg-auto mb-20 mb-lg-0"}).find("span", {"class": "font-medium"}).text.strip()
                #
                #         print(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time)
                #         add_iphone_shipment_to_database(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time)

                # record current url
                scraped_postpaid_price.url = driver.current_url

                # initialize price variables
                scraped_postpaid_price.monthly_price = '0.00'
                scraped_postpaid_price.retail_price = '0.00'
                scraped_postpaid_price.onetime_price = '0.00'

                # get prices
                for label in device_soup.findAll(
                        'label', class_='soar-selection__label'):
                    if label.find(
                            'strong').text == ' Sprint Flex 18-mo. lease':
                        monthly = label.findAll('span', class_='display-block')
                        scraped_postpaid_price.monthly_price = price_parser(
                            monthly[0].text.strip())
                        scraped_postpaid_price.onetime_price = price_parser(
                            monthly[1].text.strip())
                    if label.find('strong').text == ' Full price':
                        retail = label.findAll('span', class_='display-block')
                        scraped_postpaid_price.retail_price = price_parser(
                            retail[1].text.strip().replace(',', ''))

                # if page didn't load all the way
                if scraped_postpaid_price.onetime_price == '0.00' and scraped_postpaid_price.monthly_price == '0.00':

                    # close and reload page
                    driver.close()
                    chrome_options = Options()
                    chrome_options.add_argument("--headless")
                    chrome_options.add_argument("--window-size=1920x1080")
                    chrome_driver = os.getcwd() + "\\chromedriver.exe"
                    driver = webdriver.Chrome(chrome_options=chrome_options,
                                              executable_path=chrome_driver)
                    driver.get(scraped_postpaid_price.url)
                    time.sleep(5)
                    html = driver.page_source
                    device_soup = BeautifulSoup(html, "html.parser")

                    # get prices again
                    for label in device_soup.findAll(
                            'label', class_='soar-selection__label'):
                        if label.find(
                                'strong').text == ' Sprint Flex 18-mo. lease':
                            monthly = label.findAll('span',
                                                    class_='display-block')
                            scraped_postpaid_price.monthly_price = price_parser(
                                monthly[0].text.strip())
                            scraped_postpaid_price.onetime_price = price_parser(
                                monthly[1].text.strip())
                        if label.find('strong').text == ' Full price':
                            retail = label.findAll('span',
                                                   class_='display-block')
                            scraped_postpaid_price.retail_price = price_parser(
                                retail[1].text.strip().replace(',', ''))

                # add to database
                remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                          scraped_postpaid_price.device,
                                          scraped_postpaid_price.storage,
                                          scraped_postpaid_price.date)
                add_postpaid_to_database(scraped_postpaid_price.provider,
                                         scraped_postpaid_price.device,
                                         scraped_postpaid_price.storage,
                                         scraped_postpaid_price.monthly_price,
                                         scraped_postpaid_price.onetime_price,
                                         scraped_postpaid_price.retail_price,
                                         scraped_postpaid_price.contract_ufc,
                                         scraped_postpaid_price.url,
                                         scraped_postpaid_price.date,
                                         scraped_postpaid_price.time)
                spr_scrape_postpaid_promotions(device_soup,
                                               scraped_postpaid_price.url,
                                               scraped_postpaid_price.device,
                                               scraped_postpaid_price.storage)

    driver.quit()
def ver_scrape_postpaid_smartphone_prices():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get("https://www.verizonwireless.com/smartphones/")
    time.sleep(10)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # hardcoded variables
    scraped_postpaid_price.provider = 'verizon'
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()

    for device in soup.findAll(
            'div',
            class_=
            'cursorPointer pad15 onlySidePad tile background_supporting border_CC'
    ):

        device_contents = device.find('a')
        scraped_postpaid_price.device = brandparser(device_contents.text)
        if scraped_postpaid_price.device.find("pre-owned") != -1:
            continue
        scraped_postpaid_price.url = 'https://www.verizonwireless.com' + device_contents[
            "href"]

        promo_text = device.find('div', class_='offer-text').text
        if promo_text != '':
            add_scraped_promotions_to_database(
                scraped_postpaid_price.provider, scraped_postpaid_price.device,
                '0', 'device landing page', promo_text,
                scraped_postpaid_price.url, scraped_postpaid_price.date,
                scraped_postpaid_price.time)

        # go to url
        driver.get(scraped_postpaid_price.url)
        time.sleep(5)
        html = driver.page_source
        device_soup = BeautifulSoup(html, "html.parser")

        # select each device size
        try:
            size_button_pad = device_soup.find(
                'div', class_='displayFlex rowNoWrap priceSelectorRow')
            size_buttons = size_button_pad.findAll(
                'div',
                class_=
                'grow1basis0 priceSelectorColumn radioGroup positionRelative')
            for size_button_number in range(1, len(size_buttons) + 1):
                # record new device size
                scraped_postpaid_price.storage = size_buttons[
                    size_button_number - 1].text.replace('GB', '')

                # remove popup before clicking
                try:
                    driver.find_element_by_xpath(
                        '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div['
                        + str(size_button_number) + ']/div/div/p').click()
                except WebDriverException:
                    driver.find_element_by_class_name('fsrCloseBtn').click()
                    driver.find_element_by_xpath(
                        '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div['
                        + str(size_button_number) + ']/div/div/p').click()

                # click on different storage size to show device size-specific promos
                time.sleep(2)
                html = driver.page_source
                device_soup = BeautifulSoup(html, "html.parser")

                values_list = device_soup.findAll('div', class_='sizePad')
                scraped_postpaid_price.monthly_price = monthly_price_parser(
                    values_list[-2].text)
                scraped_postpaid_price.retail_price = retail_price_parser(
                    values_list[-1].text.replace(',', ''))

                remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                          scraped_postpaid_price.device,
                                          scraped_postpaid_price.storage,
                                          scraped_postpaid_price.date)
                add_postpaid_to_database(scraped_postpaid_price.provider,
                                         scraped_postpaid_price.device,
                                         scraped_postpaid_price.storage,
                                         scraped_postpaid_price.monthly_price,
                                         scraped_postpaid_price.onetime_price,
                                         scraped_postpaid_price.retail_price,
                                         scraped_postpaid_price.contract_ufc,
                                         scraped_postpaid_price.url,
                                         scraped_postpaid_price.date,
                                         scraped_postpaid_price.time)

                ver_scrape_postpaid_promotions(device_soup, driver,
                                               scraped_postpaid_price.url,
                                               scraped_postpaid_price.device,
                                               scraped_postpaid_price.storage)

                # # iphone shipment
                # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\
                #         scraped_postpaid_price.device == "iphone xs max":
                #
                #     color_button_pad = device_soup.find('div', class_='flex width100p pad15 onlyBottomPad')
                #     color_buttons = color_button_pad.findAll('div', class_='col-xs-3 textAlignCenter noSidePad radioGroup positionRelative')
                #     for color_button_number in range(1, len(color_buttons) + 1):
                #
                #         color = color_buttons[color_button_number - 1].text
                #         driver.find_element_by_xpath('//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[1]/div/div[' + str(
                #                     color_button_number) + ']/div').click()
                #
                #         time.sleep(1)
                #         html = driver.page_source
                #         device_soup = BeautifulSoup(html, "html.parser")
                #
                #         shipment_text = device_soup.find("div", {"class": "col-xs-6 noSidePad inStoreAvail"}).span.text
                #
                #         print(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time)
                #         add_iphone_shipment_to_database(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time)

        except AttributeError:
            pass

    driver.close()
Ejemplo n.º 6
0
def spr_scrape_postpaid_tablet_prices():
    # go to website
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.get(
        'https://www.sprint.com/en/shop/tablets.html?INTNAV=TopNav:Shop:Tablets&credit=A2&sort=FEATURED'
    )
    time.sleep(5)

    # get soup
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")
    driver.close()

    # make scraper object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # set hardcoded variables
    scraped_postpaid_price.provider = 'sprint'
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()

    # iterate through devices on landing page
    for device_tile in soup.findAll(
            'li',
            class_='col-xs-24 col-sm-12 col-lg-8 text-center device-tile'):

        # get device name text
        device_name = device_tile.find("h3", {
            "class":
            "font-size-18 line-height-24 font-normal my-0 align-left"
        }).text.strip().lower()

        # eliminate out of scope devices
        if device_name.find("linelink") != -1 or device_name.find("pre-owned") != -1 or device_name.find("flip") != -1 \
                or device_name.find("sim") != -1 or device_name.find("duraxtp") != -1 or device_name.find("duratr") != -1 \
                or device_name.find("xp strike") != -1 or device_name.find("certified") != -1:
            continue

        # device name
        scraped_postpaid_price.device = device_parser(device_name)

        # url
        scraped_postpaid_price.url = "https://www.sprint.com" + device_tile.find(
            "a")["href"]

        # promo text for device landing page & add to database
        try:
            promo_text = device_tile.find("span", {
                "class": "color--purple font-size-14"
            }).text.strip()
        except AttributeError:
            promo_text = ''
        add_scraped_promotions_to_database(scraped_postpaid_price.provider,
                                           scraped_postpaid_price.device, '0',
                                           'device landing page', promo_text,
                                           scraped_postpaid_price.url,
                                           scraped_postpaid_price.date,
                                           scraped_postpaid_price.time)

        # go to url
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--window-size=1920x1080")
        chrome_driver = os.getcwd() + "\\chromedriver.exe"
        driver = webdriver.Chrome(chrome_options=chrome_options,
                                  executable_path=chrome_driver)
        driver.implicitly_wait(5)
        driver.get(scraped_postpaid_price.url)
        time.sleep(5)
        html = driver.page_source
        device_soup = BeautifulSoup(html, "html.parser")

        # if 404 error, stop program
        site_title = device_soup.find_all("title")
        if '404' in str(site_title):
            print('404 Error: ' + scraped_postpaid_price.device)
            continue

        # click on drop down menu and record device sizes
        size_selector = driver.find_element_by_id('sprint_storage_selector')
        size_selector.click()
        time.sleep(2)
        sizes = size_selector.text.strip().replace(' GB', '')
        sizes = sizes.split('\n')

        # iterate through sizes
        for size in sizes:

            # click on size and reload page
            select = Select(
                driver.find_element_by_id('sprint_storage_selector'))
            select.select_by_value(size)
            time.sleep(2)
            html = driver.page_source
            device_soup = BeautifulSoup(html, "html.parser")

            # record device size
            scraped_postpaid_price.storage = size

            # initialize price variables
            scraped_postpaid_price.monthly_price = '0.00'
            scraped_postpaid_price.retail_price = '0.00'
            scraped_postpaid_price.onetime_price = '0.00'

            # get prices
            for label in device_soup.findAll('label',
                                             class_='soar-selection__label'):
                if label.find('strong'
                              ).text == ' Buy it with 24 monthly installments':
                    monthly = label.findAll('span', class_='display-block')
                    scraped_postpaid_price.monthly_price = price_parser(
                        monthly[0].text.strip())
                    scraped_postpaid_price.onetime_price = price_parser(
                        monthly[1].text.strip())
                if label.find('strong').text == ' Full price':
                    retail = label.findAll('span', class_='display-block')
                    scraped_postpaid_price.retail_price = price_parser(
                        retail[1].text.strip())

            # add to database
            remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                      scraped_postpaid_price.device,
                                      scraped_postpaid_price.storage,
                                      scraped_postpaid_price.date)
            add_postpaid_to_database(scraped_postpaid_price.provider,
                                     scraped_postpaid_price.device,
                                     scraped_postpaid_price.storage,
                                     scraped_postpaid_price.monthly_price,
                                     scraped_postpaid_price.onetime_price,
                                     scraped_postpaid_price.retail_price,
                                     scraped_postpaid_price.contract_ufc,
                                     scraped_postpaid_price.url,
                                     scraped_postpaid_price.date,
                                     scraped_postpaid_price.time)
            spr_scrape_postpaid_promotions(device_soup,
                                           scraped_postpaid_price.url,
                                           scraped_postpaid_price.device,
                                           scraped_postpaid_price.storage)

    driver.quit()
Ejemplo n.º 7
0
def att_scrape_postpaid_smartphone_prices():
    # headless Chrome
    chrome_options = Options()
    # chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.att.com/shop/wireless/devices/cellphones.html')
    time.sleep(2)

    # check if all devices are shown on page
    devices_shown = driver.find_element_by_class_name(
        'deviceCount').text.split(' ')[-1]
    devices_total = driver.find_element_by_class_name('deviceSize').text
    if devices_shown != devices_total:
        # click 'Show All' button if it exists
        driver.find_element_by_id("deviceShowAllLink").click()

    # load page and get soup
    time.sleep(5)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # set hardcoded variables
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()
    scraped_postpaid_price.provider = 'att'

    # parse through device tiles
    for device in soup.findAll("div", class_="list-item"):

        device_contents = device.find("a", class_="titleURLchng")
        scraped_postpaid_price.device = brandparser(
            parser(device_contents.text)).lower()
        if scraped_postpaid_price.device.find("pre-owned") != -1 or scraped_postpaid_price.device.find("flip") != -1 or \
                scraped_postpaid_price.device.find("wireless") != -1 or scraped_postpaid_price.device.find("b470") != -1 or \
                scraped_postpaid_price.device.find("xp5s") != -1 or scraped_postpaid_price.device.find("duraxe") != -1 or \
                scraped_postpaid_price.device.find("certified") != -1:
            continue
        scraped_postpaid_price.url = 'https://www.att.com' + device_contents[
            'href']

        deal_landing_page_promo = device.findAll("div", class_="holidayFlag")
        if len(deal_landing_page_promo) == 2:
            add_scraped_promotions_to_database(
                scraped_postpaid_price.provider, scraped_postpaid_price.device,
                '0', 'device landing page',
                deal_landing_page_promo[1].img['title'],
                scraped_postpaid_price.url, scraped_postpaid_price.date,
                scraped_postpaid_price.time)

        # go to url and get storage size
        driver.get(scraped_postpaid_price.url)
        time.sleep(5)
        html = driver.page_source
        device_soup = BeautifulSoup(html, "html.parser")

        # read size from size button that is in html even if it is not visible on page
        # iterate through each size
        button_number = 0
        for button in device_soup.findAll('button', class_='preSize'):

            # go back to base web page if there is more than one button
            if button_number > 0:
                driver.get(scraped_postpaid_price.url)
                time.sleep(3)

            device_storage = button.text.replace('GB', '').strip()
            if 'MB' in device_storage:
                device_storage = device_storage.replace('MB', '')
                device_storage = '{: .2f}'.format(int(device_storage) * 0.001)

            # set object's storage size
            scraped_postpaid_price.storage = device_storage
            size_id = 'size_' + scraped_postpaid_price.storage + 'GB'
            size = driver.find_element_by_id(size_id)

            # click on size that was recorded as storage if there is more than one storage size
            if len(device_soup.findAll('button', class_='preSize')) != 1:

                # if popup is there, click it and make it go away
                try:
                    size.click()
                except WebDriverException:
                    driver.find_element_by_xpath(
                        '//*[@id="acsMainInvite"]/a').click()
                    size.click()

                time.sleep(3)
                html = driver.page_source
                device_soup = BeautifulSoup(html, "html.parser")

            # get promotions
            att_scrape_postpaid_promotions(device_soup,
                                           scraped_postpaid_price.url,
                                           scraped_postpaid_price.device,
                                           scraped_postpaid_price.storage)

            # # iphone shipment
            # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\
            #         scraped_postpaid_price.device == "iphone xs max":
            #     color_buttons = device_soup.findAll("button", {"ddh-color-and-data-capacity-item": "color"})
            #
            #     for color_button in color_buttons:
            #
            #         # get object's color
            #         color_id = 'color_' + color_button.text.strip()
            #         color = driver.find_element_by_id(color_id)
            #
            #         # if popup is there, click it and make it go away
            #         try:
            #             color.click()
            #         except WebDriverException:
            #             driver.find_element_by_xpath('//*[@id="acsMainInvite"]/a').click()
            #             color.click()
            #
            #         time.sleep(2)
            #         html = driver.page_source
            #         device_soup = BeautifulSoup(html, "html.parser")
            #
            #         shipment_text_outer = device_soup.find("div", {"class": "checkInstoreDeliveryIconSuccess ng-scope"})
            #         shipment_text = shipment_text_outer.find("span", {"class": "ng-binding"}).text
            #
            #         print(color_button.text.strip(), scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time)
            #         add_iphone_shipment_to_database(color_button.text.strip(), scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time)

            # get sku for correct url and config_url
            try:
                sku = device_soup.find(id='skuIDToDisplay').text.strip()
            except AttributeError:
                sku = 'sku' + device_soup.find('att-product-viewer')['skuid']

            # set url and config_url for object
            url = scraped_postpaid_price.url.split('=sku')[0] + '=sku' + sku
            config_url = 'https://www.att.com/shop/wireless/deviceconfigurator.html?prefetched=true&sku=' + sku
            scraped_postpaid_price.config_url = config_url
            scraped_postpaid_price.url = url

            # go to config_url and get prices
            driver.get(scraped_postpaid_price.config_url)
            time.sleep(5)
            html = driver.page_source
            device_soup = BeautifulSoup(html, "html.parser")
            if len(
                    device_soup.findAll(
                        'div',
                        class_='row-fluid-nowrap posRel margin-top-5')) > 1:
                for div in device_soup.findAll(
                        'div', class_='row-fluid-nowrap posRel margin-top-5'):
                    for span in div.findAll(
                            'span',
                            class_=
                            'text-xlarge margin-right-5 adjustLetterSpace ng-binding ng-scope'
                    ):
                        if span.text == 'AT&T Next Every Year℠':
                            contract_prices = div.findAll(
                                'div',
                                class_=
                                'attGray text-cramped text-xlarge text-nowrap pad-bottom-10'
                            )
                            scraped_postpaid_price.onetime_price = contract_prices[
                                0].text.replace("$", "").strip()
                            scraped_postpaid_price.monthly_price = contract_prices[
                                1].text.replace("$",
                                                "").replace("Monthly",
                                                            "").strip()
                        if span.text == 'No annual contract':
                            no_contract_prices = div.findAll(
                                'div',
                                class_=
                                'attGray text-cramped text-xlarge text-nowrap pad-bottom-10'
                            )
                            scraped_postpaid_price.retail_price = no_contract_prices[
                                0].text.replace(',', '').replace("$",
                                                                 "").strip()
            else:
                for div in device_soup.findAll(
                        'div', class_='row-fluid-nowrap posRel margin-top-5'):
                    for span in div.findAll(
                            'span',
                            class_=
                            'text-xlarge margin-right-5 adjustLetterSpace ng-binding ng-scope'
                    ):
                        if span.text == 'No annual contract':
                            no_contract_prices = div.findAll(
                                'div',
                                class_=
                                'attOrange text-cramped text-xlarge text-nowrap pad-bottom-10'
                            )
                            scraped_postpaid_price.retail_price = no_contract_prices[
                                0].text.replace("$", "").strip()

            remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                      scraped_postpaid_price.device,
                                      scraped_postpaid_price.storage,
                                      scraped_postpaid_price.date)
            add_postpaid_to_database(scraped_postpaid_price.provider,
                                     scraped_postpaid_price.device,
                                     scraped_postpaid_price.storage,
                                     scraped_postpaid_price.monthly_price,
                                     scraped_postpaid_price.onetime_price,
                                     scraped_postpaid_price.retail_price,
                                     scraped_postpaid_price.contract_ufc,
                                     scraped_postpaid_price.url,
                                     scraped_postpaid_price.date,
                                     scraped_postpaid_price.time)

            button_number += 1

    driver.quit()
Ejemplo n.º 8
0
def ver_scrape_postpaid_tablet_prices():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get("https://www.verizonwireless.com/tablets/")
    time.sleep(10)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # hardcoded variables
    scraped_postpaid_price.provider = 'verizon'
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()

    # get device names and links from the tablet landing page
    for device in soup.findAll(
            'div',
            class_=
            'cursorPointer pad15 onlySidePad tile background_supporting border_CC'
    ):

        device_contents = device.find('a')
        scraped_postpaid_price.device = brandparser(device_contents.text)
        if scraped_postpaid_price.device.find("pre-owned") != -1:
            continue
        scraped_postpaid_price.url = 'https://www.verizonwireless.com' + device_contents[
            "href"]

        promo_text = device.find('div', class_='offer-text').text
        if promo_text != '':
            add_scraped_promotions_to_database(
                scraped_postpaid_price.provider, scraped_postpaid_price.device,
                '0', 'device landing page', promo_text,
                scraped_postpaid_price.url, scraped_postpaid_price.date,
                scraped_postpaid_price.time)

        # go to url
        driver.get(scraped_postpaid_price.url)
        time.sleep(5)
        html = driver.page_source
        device_soup = BeautifulSoup(html, "html.parser")
        site_title = device_soup.find_all("title")
        if not site_title or site_title[0] == "<title></title>":
            continue

        else:
            # select each device size
            size_button_pad = device_soup.find(
                'div', class_='displayFlex rowNoWrap priceSelectorRow')
            size_buttons = size_button_pad.findAll(
                'div',
                class_=
                'grow1basis0 priceSelectorColumn radioGroup positionRelative')
            for size_button_number in range(1, len(size_buttons) + 1):

                # record new device size
                scraped_postpaid_price.storage = size_buttons[
                    size_button_number - 1].text.replace('GB', '')

                # remove popup before clicking
                try:
                    driver.find_element_by_xpath(
                        '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div['
                        + str(size_button_number) + ']/div/div/p').click()
                except WebDriverException:
                    driver.find_element_by_link_text('×').click()
                    driver.find_element_by_xpath(
                        '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div['
                        + str(size_button_number) + ']/div/div/p').click()

                # click on different storage size to show device size-specific promos
                time.sleep(2)
                html = driver.page_source
                device_soup = BeautifulSoup(html, "html.parser")

                values_list = device_soup.findAll('div', class_='sizePad')
                scraped_postpaid_price.monthly_price = monthly_price_parser(
                    values_list[-3].text)
                scraped_postpaid_price.contract_ufc = contract_ufc_parser(
                    values_list[-2].text.replace(',', ''))
                scraped_postpaid_price.retail_price = retail_price_parser(
                    values_list[-1].text.replace(',', ''))

                # remove storage from device name if it is in it
                if scraped_postpaid_price.storage in scraped_postpaid_price.device:
                    scraped_postpaid_price.device = scraped_postpaid_price.device.replace(
                        scraped_postpaid_price.storage + 'gb', '')

                remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                          scraped_postpaid_price.device,
                                          scraped_postpaid_price.storage,
                                          scraped_postpaid_price.date)
                add_postpaid_to_database(scraped_postpaid_price.provider,
                                         scraped_postpaid_price.device,
                                         scraped_postpaid_price.storage,
                                         scraped_postpaid_price.monthly_price,
                                         scraped_postpaid_price.onetime_price,
                                         scraped_postpaid_price.retail_price,
                                         scraped_postpaid_price.contract_ufc,
                                         scraped_postpaid_price.url,
                                         scraped_postpaid_price.date,
                                         scraped_postpaid_price.time)

                ver_scrape_postpaid_promotions(device_soup, driver,
                                               scraped_postpaid_price.url,
                                               scraped_postpaid_price.device,
                                               scraped_postpaid_price.storage)

    driver.close()