Esempio n. 1
0
def xfi_scrape_postpaid_smartphone_prices():

    # scrape json
    device_page = requests.get('https://modesto-prodapi.xfinity.com/ModestoGW/api/v1.5/products?category=device&offset=0&sortAsc=true&sortBy=rank')
    device_soup = BeautifulSoup(device_page.text, 'html.parser')
    device_json = json.loads(device_soup.text)
    for json_obj in device_json:

        # make object
        scraped_postpaid_price = ScrapedPostpaidPrice()

        # hardcoded variables
        scraped_postpaid_price.provider = 'xfinity'
        scraped_postpaid_price.date = datetime.date.today()
        scraped_postpaid_price.time = datetime.datetime.now().time()

        # scrape data
        scraped_postpaid_price.device = device_parser(json_obj['name'])

        # get description
        description = remove_non_ascii(json_obj['description'])

        # create dictionary of sizes
        size_dict = []
        for variant in json_obj['variants']:
            size_variant = variant['capacity'].replace('gb', '').strip()
            if size_variant in size_dict:
                continue                   # ignore duplicates of the same size
            size_dict.append(size_variant)
            scraped_postpaid_price.storage = size_variant
            scraped_postpaid_price.retail_price = variant['price']
            scraped_postpaid_price.onetime_price = '0.00'
            scraped_postpaid_price.monthly_price = variant['financePrice']
            scraped_postpaid_price.contract_ufc = '0.00'
            scraped_postpaid_price.url = 'https://www.xfinity.com/mobile/shop/device/' + json_obj['slug']

            print(scraped_postpaid_price.provider, scraped_postpaid_price.device,
                  scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price,
                  scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price,
                  scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url,
                  scraped_postpaid_price.date, scraped_postpaid_price.time)

            # add to database
            remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device,
                                      scraped_postpaid_price.storage, scraped_postpaid_price.date)
            add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device,
                                     scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price,
                                     scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price,
                                     scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url,
                                     scraped_postpaid_price.date, scraped_postpaid_price.time)

            # add promotion text to database
            xfi_scrape_prepaid_promotins(scraped_postpaid_price.url, scraped_postpaid_price.device,
                                         scraped_postpaid_price.storage, description)
Esempio n. 2
0
def att_scrape_postpaid_tablet_prices():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.att.com/shop/wireless/devices/tablets.html')
    time.sleep(5)

    # check if all devices are shown on page
    devices_shown = driver.find_element_by_class_name(
        'deviceCount').text.split(' ')[-1]
    devices_total = driver.find_element_by_class_name('deviceSize').text
    if devices_shown != devices_total:
        # click 'Show All' button if it exists
        if driver.find_element_by_id("deviceShowAllLink"):
            driver.find_element_by_id("deviceShowAllLink").click()

    # load page and get soup
    time.sleep(3)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # set hardcoded variables
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()
    scraped_postpaid_price.provider = 'att'

    # parse through device tiles
    for device in soup.findAll("div", class_="list-item"):

        device_contents = device.find("a", class_="titleURLchng")
        scraped_postpaid_price.device = brandparser(
            parser(device_contents.text)).lower()
        if scraped_postpaid_price.device.find("pre-owned") != -1 or scraped_postpaid_price.device.find("certified") != -1 or \
                scraped_postpaid_price.device.find("wireless") != -1:
            continue
        scraped_postpaid_price.url = 'https://www.att.com' + device_contents[
            'href']

        deal_landing_page_promo = device.findAll("div", class_="holidayFlag")
        if len(deal_landing_page_promo) == 2:
            add_scraped_promotions_to_database(
                scraped_postpaid_price.provider, scraped_postpaid_price.device,
                '0', 'device landing page',
                deal_landing_page_promo[1].img['title'],
                scraped_postpaid_price.url, scraped_postpaid_price.date,
                scraped_postpaid_price.time)

        # initialize object's price variables
        scraped_postpaid_price.monthly_price = '0.00'
        scraped_postpaid_price.contract_ufc = '0.00'
        scraped_postpaid_price.onetime_price = '0.00'
        scraped_postpaid_price.retail_price = '0.00'

        # if device size in storage, take it out
        if 'gb' in scraped_postpaid_price.device:
            device_name_words = scraped_postpaid_price.device.split(' ')
            for word in device_name_words:
                if 'gb' in word:
                    scraped_postpaid_price.device = scraped_postpaid_price.device.replace(
                        ' - ' + word, '')

        # go to url and get storage size
        driver.get(scraped_postpaid_price.url)
        time.sleep(2)
        html = driver.page_source
        soup = BeautifulSoup(html, "html.parser")

        # read size from size button that is in html even if it is not visible on page
        # iterate through each size
        button_number = 0
        for button in soup.findAll('button', class_='preSize'):

            # go back to base web page if there is more than one button
            if button_number > 0:
                driver.get(scraped_postpaid_price.url)
                time.sleep(3)

            device_storage = button.text.replace('GB', '').strip()
            if 'MB' in device_storage:
                device_storage = device_storage.replace('MB', '')
                device_storage = '{: .2f}'.format(int(device_storage) * 0.001)

            # set object's storage size
            scraped_postpaid_price.storage = device_storage
            size_id = 'size_' + scraped_postpaid_price.storage + 'GB'
            size = driver.find_element_by_id(size_id)

            # click on size that was recorded as storage if there is more than one storage size
            if len(soup.findAll('button', class_='preSize')) != 1:

                # if popup is there, click it and make it go away
                try:
                    size.click()
                except WebDriverException:
                    driver.find_element_by_xpath(
                        '//*[@id="acsMainInvite"]/a').click()
                    size.click()

                time.sleep(2)
                html = driver.page_source
                soup = BeautifulSoup(html, "html.parser")

            att_scrape_postpaid_promotions(soup, scraped_postpaid_price.url,
                                           scraped_postpaid_price.device,
                                           scraped_postpaid_price.storage)

            # get sku for correct url and config_url
            try:
                sku = soup.find(id='skuIDToDisplay').text.strip()
            except AttributeError:
                sku = 'sku' + soup.find('att-product-viewer')['skuid']

            # set url and config_url for object
            url = scraped_postpaid_price.url.split('=sku')[0] + '=sku' + sku
            config_url = 'https://www.att.com/shop/wireless/deviceconfigurator.html?prefetched=true&sku=' + sku
            scraped_postpaid_price.config_url = config_url
            scraped_postpaid_price.url = url

            # if there is no device_storage entry (due to differently formatted pages)
            if scraped_postpaid_price.storage == 'N/A':
                for div in soup.findAll(
                        'div',
                        class_=
                        'technicalspecification parbase technicalspecificati additionaldetail'
                ):
                    if div.find(
                            'div',
                            class_='fltL').text == 'Internal memory storage':
                        device_storage = div.find(
                            'div',
                            class_='fltLIco').text.strip().replace('GB', '')
                        scraped_postpaid_price.storage = device_storage

            # go to config_url and get prices
            driver.get(scraped_postpaid_price.config_url)
            time.sleep(3)
            html = driver.page_source
            soup = BeautifulSoup(html, "html.parser")
            if len(
                    soup.findAll(
                        'div',
                        class_='row-fluid-nowrap posRel margin-top-5')) > 1:
                for div in soup.findAll(
                        'div', class_='row-fluid-nowrap posRel margin-top-5'):
                    for span in div.findAll(
                            'span',
                            class_=
                            'text-xlarge margin-right-5 adjustLetterSpace ng-binding ng-scope'
                    ):
                        if span.text == 'AT&T Next Every Year℠':
                            contract_prices = div.findAll(
                                'div',
                                class_=
                                'attGray text-cramped text-xlarge text-nowrap pad-bottom-10'
                            )
                            scraped_postpaid_price.monthly_price = contract_prices[
                                0].text.replace("$", "").strip()
                            scraped_postpaid_price.monthly_price = contract_prices[
                                1].text.replace("$", "").strip()
                        if span.text == 'No annual contract':
                            no_contract_prices = div.findAll(
                                'div',
                                class_=
                                'attGray text-cramped text-xlarge text-nowrap pad-bottom-10'
                            )
                            scraped_postpaid_price.retail_price = no_contract_prices[
                                0].text.replace(',', '').replace("$",
                                                                 "").strip()
                        if span.text == '2-Year Contract':
                            no_contract_prices = div.findAll('div', class_='attOrange text-cramped text-xlarge text-nowrap pad-bottom-10') \
                                                 + div.findAll('div', class_='text-cramped text-xlarge text-nowrap pad-bottom-10 attOrange') \
                                                 + div.findAll('div', class_='attGray text-cramped text-xlarge text-nowrap pad-bottom-10')
                            scraped_postpaid_price.contract_ufc = no_contract_prices[
                                0].text.replace("$", "").strip()
            else:
                for div in soup.findAll(
                        'div', class_='row-fluid-nowrap posRel margin-top-5'):
                    for span in div.findAll(
                            'span',
                            class_=
                            'text-xlarge margin-right-5 adjustLetterSpace ng-binding ng-scope'
                    ):
                        if span.text == 'No annual contract':
                            no_contract_prices = div.findAll(
                                'div',
                                class_=
                                'attOrange text-cramped text-xlarge text-nowrap pad-bottom-10'
                            )
                            scraped_postpaid_price.retail_price = no_contract_prices[
                                0].text.replace("$", "").strip()
                            scraped_postpaid_price.contract_ufc = '0.00'

            remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                      scraped_postpaid_price.device,
                                      scraped_postpaid_price.storage,
                                      scraped_postpaid_price.date)
            add_postpaid_to_database(scraped_postpaid_price.provider,
                                     scraped_postpaid_price.device,
                                     scraped_postpaid_price.storage,
                                     scraped_postpaid_price.monthly_price,
                                     scraped_postpaid_price.onetime_price,
                                     scraped_postpaid_price.retail_price,
                                     scraped_postpaid_price.contract_ufc,
                                     scraped_postpaid_price.url,
                                     scraped_postpaid_price.date,
                                     scraped_postpaid_price.time)

            button_number += 1

    driver.quit()
Esempio n. 3
0
def ver_scrape_postpaid_tablet_prices():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get("https://www.verizonwireless.com/tablets/")
    time.sleep(10)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # hardcoded variables
    scraped_postpaid_price.provider = 'verizon'
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()

    # get device names and links from the tablet landing page
    for device in soup.findAll(
            'div',
            class_=
            'cursorPointer pad15 onlySidePad tile background_supporting border_CC'
    ):

        device_contents = device.find('a')
        scraped_postpaid_price.device = brandparser(device_contents.text)
        if scraped_postpaid_price.device.find("pre-owned") != -1:
            continue
        scraped_postpaid_price.url = 'https://www.verizonwireless.com' + device_contents[
            "href"]

        promo_text = device.find('div', class_='offer-text').text
        if promo_text != '':
            add_scraped_promotions_to_database(
                scraped_postpaid_price.provider, scraped_postpaid_price.device,
                '0', 'device landing page', promo_text,
                scraped_postpaid_price.url, scraped_postpaid_price.date,
                scraped_postpaid_price.time)

        # go to url
        driver.get(scraped_postpaid_price.url)
        time.sleep(5)
        html = driver.page_source
        device_soup = BeautifulSoup(html, "html.parser")
        site_title = device_soup.find_all("title")
        if not site_title or site_title[0] == "<title></title>":
            continue

        else:
            # select each device size
            size_button_pad = device_soup.find(
                'div', class_='displayFlex rowNoWrap priceSelectorRow')
            size_buttons = size_button_pad.findAll(
                'div',
                class_=
                'grow1basis0 priceSelectorColumn radioGroup positionRelative')
            for size_button_number in range(1, len(size_buttons) + 1):

                # record new device size
                scraped_postpaid_price.storage = size_buttons[
                    size_button_number - 1].text.replace('GB', '')

                # remove popup before clicking
                try:
                    driver.find_element_by_xpath(
                        '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div['
                        + str(size_button_number) + ']/div/div/p').click()
                except WebDriverException:
                    driver.find_element_by_link_text('×').click()
                    driver.find_element_by_xpath(
                        '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div['
                        + str(size_button_number) + ']/div/div/p').click()

                # click on different storage size to show device size-specific promos
                time.sleep(2)
                html = driver.page_source
                device_soup = BeautifulSoup(html, "html.parser")

                values_list = device_soup.findAll('div', class_='sizePad')
                scraped_postpaid_price.monthly_price = monthly_price_parser(
                    values_list[-3].text)
                scraped_postpaid_price.contract_ufc = contract_ufc_parser(
                    values_list[-2].text.replace(',', ''))
                scraped_postpaid_price.retail_price = retail_price_parser(
                    values_list[-1].text.replace(',', ''))

                # remove storage from device name if it is in it
                if scraped_postpaid_price.storage in scraped_postpaid_price.device:
                    scraped_postpaid_price.device = scraped_postpaid_price.device.replace(
                        scraped_postpaid_price.storage + 'gb', '')

                remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                          scraped_postpaid_price.device,
                                          scraped_postpaid_price.storage,
                                          scraped_postpaid_price.date)
                add_postpaid_to_database(scraped_postpaid_price.provider,
                                         scraped_postpaid_price.device,
                                         scraped_postpaid_price.storage,
                                         scraped_postpaid_price.monthly_price,
                                         scraped_postpaid_price.onetime_price,
                                         scraped_postpaid_price.retail_price,
                                         scraped_postpaid_price.contract_ufc,
                                         scraped_postpaid_price.url,
                                         scraped_postpaid_price.date,
                                         scraped_postpaid_price.time)

                ver_scrape_postpaid_promotions(device_soup, driver,
                                               scraped_postpaid_price.url,
                                               scraped_postpaid_price.device,
                                               scraped_postpaid_price.storage)

    driver.close()