def tmo_scrape_postpaid_smartphone_prices(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.t-mobile.com/') time.sleep(5) # go to Phones url from homepage (since url could change) driver.find_element_by_link_text('PHONES').click() time.sleep(20) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_postpaid_price = ScrapedPostpaidPrice() # hardcoded variables scraped_postpaid_price.provider = 'tmobile' scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() errors = [] # get device name and url from device landing page for tariff in soup.findAll( 'div', class_='tile col-lg-3 col-md-4 col-sm-6 col-xs-12'): device_contents = tariff.find( 'a', class_='m-b-5 product-name text-center regular block ng-binding') scraped_postpaid_price.device = device_parser(device_contents.text) if scraped_postpaid_price.device.find('certified pre-owned') == -1 and \ scraped_postpaid_price.device.find('linelink') == -1 and \ scraped_postpaid_price.device.find('sim') == -1 and \ scraped_postpaid_price.device.find('flip') == -1: scraped_postpaid_price.url = 'https://www.t-mobile.com/' + device_contents[ 'href'] promo_text = tariff.find('div', class_='offerTextHeight').text if promo_text != '': add_scraped_promotions_to_database( scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', promo_text, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to individual page driver.get(scraped_postpaid_price.url) time.sleep(6) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # try/except in case of no price (coming soon, etc.) try: # iterate through storage sizes for memory_button in device_soup.findAll('a', class_='memory-btn'): # record storage size and url scraped_postpaid_price.storage = memory_button.text.replace( 'GB', '').strip() scraped_postpaid_price.url = scraped_postpaid_price.url.split( '?memory=' )[0] + '?memory=' + scraped_postpaid_price.storage + 'gb' driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") if len(device_soup.findAll('div', class_='price-lockup')) > 1: downpayment_and_retail = device_soup.findAll( 'span', class_='cost-price font-tele-ult ng-binding') scraped_postpaid_price.onetime_price = downpayment_and_retail[ 0].text scraped_postpaid_price.retail_price = downpayment_and_retail[ 1].text.replace(',', '') scraped_postpaid_price.monthly_price = monthly_price_parser( device_soup.find( 'p', class_='small font-tele-nor m-t-10 ng-binding' ).text) else: scraped_postpaid_price.onetime_price = device_soup.find( 'span', class_='cost-price font-tele-ult ng-binding').text # add to database remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database( scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) tmo_scrape_postpaid_promotions( driver, device_soup, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) except AttributeError: errors.append(scraped_postpaid_price.device) pass print("pricing errors: ", errors) driver.quit()
def xfi_scrape_postpaid_smartphone_prices(): # scrape json device_page = requests.get('https://modesto-prodapi.xfinity.com/ModestoGW/api/v1.5/products?category=device&offset=0&sortAsc=true&sortBy=rank') device_soup = BeautifulSoup(device_page.text, 'html.parser') device_json = json.loads(device_soup.text) for json_obj in device_json: # make object scraped_postpaid_price = ScrapedPostpaidPrice() # hardcoded variables scraped_postpaid_price.provider = 'xfinity' scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() # scrape data scraped_postpaid_price.device = device_parser(json_obj['name']) # get description description = remove_non_ascii(json_obj['description']) # create dictionary of sizes size_dict = [] for variant in json_obj['variants']: size_variant = variant['capacity'].replace('gb', '').strip() if size_variant in size_dict: continue # ignore duplicates of the same size size_dict.append(size_variant) scraped_postpaid_price.storage = size_variant scraped_postpaid_price.retail_price = variant['price'] scraped_postpaid_price.onetime_price = '0.00' scraped_postpaid_price.monthly_price = variant['financePrice'] scraped_postpaid_price.contract_ufc = '0.00' scraped_postpaid_price.url = 'https://www.xfinity.com/mobile/shop/device/' + json_obj['slug'] print(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # add to database remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # add promotion text to database xfi_scrape_prepaid_promotins(scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage, description)
def spr_scrape_postpaid_smartphone_prices(): # go to website chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.get('https://www.sprint.com/en/shop/cell-phones.html') # get soup driver.get( "https://www.sprint.com/en/shop/cell-phones.html?INTNAV=TopNav:Shop:AllPhones&credit=A2&sort=FEATURED" ) time.sleep(5) html = driver.page_source soup = BeautifulSoup(html, "html.parser") driver.close() # make scraper object scraped_postpaid_price = ScrapedPostpaidPrice() # set hardcoded variables scraped_postpaid_price.provider = 'sprint' scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() # iterate through devices on landing page for device_tile in soup.findAll( 'li', class_='col-xs-24 col-sm-12 col-lg-8 text-center device-tile'): # get device name text device_name = device_tile.find("h3", { "class": "font-size-18 line-height-24 font-normal my-0 align-left" }).text.strip().lower() # eliminate out of scope devices if device_name.find("linelink") != -1 or device_name.find("pre-owned") != -1 or device_name.find("flip") != -1 \ or device_name.find("xp5s") != -1 or device_name.find("duraxtp") != -1 or device_name.find("duratr") != -1 \ or device_name.find("xp strike") != -1 or device_name.find("certified") != -1: continue # device name scraped_postpaid_price.device = device_parser(device_name) # url scraped_postpaid_price.url = "https://www.sprint.com" + device_tile.find( "a")["href"] # promo text for device landing page try: promo_text = device_tile.find("span", { "class": "color--purple font-size-14" }).text.strip() except AttributeError: promo_text = '' add_scraped_promotions_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', promo_text, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to url chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # if 404 error, stop program site_title = device_soup.find_all("title") if '404' in str(site_title) or not site_title: print('404 Error or page error: ' + scraped_postpaid_price.device) continue else: # click on drop down menu and record device sizes size_selector = driver.find_element_by_id( 'sprint_storage_selector') sizes = size_selector.text.strip().replace(' GB', '') sizes = sizes.split('\n') # iterate through sizes for size in sizes: # click on size and reload page size_selector.click() time.sleep(2) select = Select( driver.find_element_by_id('sprint_storage_selector')) select.select_by_value(size) time.sleep(6) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # record device size scraped_postpaid_price.storage = size # # iphone shipment # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\ # scraped_postpaid_price.device == "iphone xs max": # # # click on drop down menu and record device colors # div_number = device_soup.find("div", {"data-color-set-storage-key": size})["data-color-set"] # color_selector = driver.find_element_by_xpath( # '/html/body/div[1]/article/div[3]/div[1]/div[1]/div[1]/div/div/div[2]/div[4]/div[1]/div/div['+str(div_number)+']/div/div/select') # # color_selector.click() # time.sleep(2) # colors = color_selector.text.strip() # colors = colors.split('\n') # # # iterate through colors # for color in colors: # # click on size and reload page # select = Select(driver.find_element_by_xpath( # '/html/body/div[1]/article/div[3]/div[1]/div[1]/div[1]/div/div/div[2]/div[4]/div[1]/div/div['+str(div_number)+']/div/div/select')) # select.select_by_visible_text(color) # time.sleep(6) # html = driver.page_source # device_soup = BeautifulSoup(html, "html.parser") # # shipment_text = device_soup.find("div", {"class": "col-xs-24 col-lg-auto mb-20 mb-lg-0"}).find("span", {"class": "font-medium"}).text.strip() # # print(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time) # add_iphone_shipment_to_database(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time) # record current url scraped_postpaid_price.url = driver.current_url # initialize price variables scraped_postpaid_price.monthly_price = '0.00' scraped_postpaid_price.retail_price = '0.00' scraped_postpaid_price.onetime_price = '0.00' # get prices for label in device_soup.findAll( 'label', class_='soar-selection__label'): if label.find( 'strong').text == ' Sprint Flex 18-mo. lease': monthly = label.findAll('span', class_='display-block') scraped_postpaid_price.monthly_price = price_parser( monthly[0].text.strip()) scraped_postpaid_price.onetime_price = price_parser( monthly[1].text.strip()) if label.find('strong').text == ' Full price': retail = label.findAll('span', class_='display-block') scraped_postpaid_price.retail_price = price_parser( retail[1].text.strip().replace(',', '')) # if page didn't load all the way if scraped_postpaid_price.onetime_price == '0.00' and scraped_postpaid_price.monthly_price == '0.00': # close and reload page driver.close() chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # get prices again for label in device_soup.findAll( 'label', class_='soar-selection__label'): if label.find( 'strong').text == ' Sprint Flex 18-mo. lease': monthly = label.findAll('span', class_='display-block') scraped_postpaid_price.monthly_price = price_parser( monthly[0].text.strip()) scraped_postpaid_price.onetime_price = price_parser( monthly[1].text.strip()) if label.find('strong').text == ' Full price': retail = label.findAll('span', class_='display-block') scraped_postpaid_price.retail_price = price_parser( retail[1].text.strip().replace(',', '')) # add to database remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) spr_scrape_postpaid_promotions(device_soup, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) driver.quit()
def ver_scrape_postpaid_smartphone_prices(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get("https://www.verizonwireless.com/smartphones/") time.sleep(10) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_postpaid_price = ScrapedPostpaidPrice() # hardcoded variables scraped_postpaid_price.provider = 'verizon' scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() for device in soup.findAll( 'div', class_= 'cursorPointer pad15 onlySidePad tile background_supporting border_CC' ): device_contents = device.find('a') scraped_postpaid_price.device = brandparser(device_contents.text) if scraped_postpaid_price.device.find("pre-owned") != -1: continue scraped_postpaid_price.url = 'https://www.verizonwireless.com' + device_contents[ "href"] promo_text = device.find('div', class_='offer-text').text if promo_text != '': add_scraped_promotions_to_database( scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', promo_text, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to url driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # select each device size try: size_button_pad = device_soup.find( 'div', class_='displayFlex rowNoWrap priceSelectorRow') size_buttons = size_button_pad.findAll( 'div', class_= 'grow1basis0 priceSelectorColumn radioGroup positionRelative') for size_button_number in range(1, len(size_buttons) + 1): # record new device size scraped_postpaid_price.storage = size_buttons[ size_button_number - 1].text.replace('GB', '') # remove popup before clicking try: driver.find_element_by_xpath( '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div[' + str(size_button_number) + ']/div/div/p').click() except WebDriverException: driver.find_element_by_class_name('fsrCloseBtn').click() driver.find_element_by_xpath( '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div[' + str(size_button_number) + ']/div/div/p').click() # click on different storage size to show device size-specific promos time.sleep(2) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") values_list = device_soup.findAll('div', class_='sizePad') scraped_postpaid_price.monthly_price = monthly_price_parser( values_list[-2].text) scraped_postpaid_price.retail_price = retail_price_parser( values_list[-1].text.replace(',', '')) remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) ver_scrape_postpaid_promotions(device_soup, driver, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) # # iphone shipment # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\ # scraped_postpaid_price.device == "iphone xs max": # # color_button_pad = device_soup.find('div', class_='flex width100p pad15 onlyBottomPad') # color_buttons = color_button_pad.findAll('div', class_='col-xs-3 textAlignCenter noSidePad radioGroup positionRelative') # for color_button_number in range(1, len(color_buttons) + 1): # # color = color_buttons[color_button_number - 1].text # driver.find_element_by_xpath('//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[1]/div/div[' + str( # color_button_number) + ']/div').click() # # time.sleep(1) # html = driver.page_source # device_soup = BeautifulSoup(html, "html.parser") # # shipment_text = device_soup.find("div", {"class": "col-xs-6 noSidePad inStoreAvail"}).span.text # # print(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time) # add_iphone_shipment_to_database(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time) except AttributeError: pass driver.close()
def spr_scrape_postpaid_tablet_prices(): # go to website chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.get( 'https://www.sprint.com/en/shop/tablets.html?INTNAV=TopNav:Shop:Tablets&credit=A2&sort=FEATURED' ) time.sleep(5) # get soup html = driver.page_source soup = BeautifulSoup(html, "html.parser") driver.close() # make scraper object scraped_postpaid_price = ScrapedPostpaidPrice() # set hardcoded variables scraped_postpaid_price.provider = 'sprint' scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() # iterate through devices on landing page for device_tile in soup.findAll( 'li', class_='col-xs-24 col-sm-12 col-lg-8 text-center device-tile'): # get device name text device_name = device_tile.find("h3", { "class": "font-size-18 line-height-24 font-normal my-0 align-left" }).text.strip().lower() # eliminate out of scope devices if device_name.find("linelink") != -1 or device_name.find("pre-owned") != -1 or device_name.find("flip") != -1 \ or device_name.find("sim") != -1 or device_name.find("duraxtp") != -1 or device_name.find("duratr") != -1 \ or device_name.find("xp strike") != -1 or device_name.find("certified") != -1: continue # device name scraped_postpaid_price.device = device_parser(device_name) # url scraped_postpaid_price.url = "https://www.sprint.com" + device_tile.find( "a")["href"] # promo text for device landing page & add to database try: promo_text = device_tile.find("span", { "class": "color--purple font-size-14" }).text.strip() except AttributeError: promo_text = '' add_scraped_promotions_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', promo_text, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to url chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # if 404 error, stop program site_title = device_soup.find_all("title") if '404' in str(site_title): print('404 Error: ' + scraped_postpaid_price.device) continue # click on drop down menu and record device sizes size_selector = driver.find_element_by_id('sprint_storage_selector') size_selector.click() time.sleep(2) sizes = size_selector.text.strip().replace(' GB', '') sizes = sizes.split('\n') # iterate through sizes for size in sizes: # click on size and reload page select = Select( driver.find_element_by_id('sprint_storage_selector')) select.select_by_value(size) time.sleep(2) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # record device size scraped_postpaid_price.storage = size # initialize price variables scraped_postpaid_price.monthly_price = '0.00' scraped_postpaid_price.retail_price = '0.00' scraped_postpaid_price.onetime_price = '0.00' # get prices for label in device_soup.findAll('label', class_='soar-selection__label'): if label.find('strong' ).text == ' Buy it with 24 monthly installments': monthly = label.findAll('span', class_='display-block') scraped_postpaid_price.monthly_price = price_parser( monthly[0].text.strip()) scraped_postpaid_price.onetime_price = price_parser( monthly[1].text.strip()) if label.find('strong').text == ' Full price': retail = label.findAll('span', class_='display-block') scraped_postpaid_price.retail_price = price_parser( retail[1].text.strip()) # add to database remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) spr_scrape_postpaid_promotions(device_soup, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) driver.quit()
def att_scrape_postpaid_smartphone_prices(): # headless Chrome chrome_options = Options() # chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.att.com/shop/wireless/devices/cellphones.html') time.sleep(2) # check if all devices are shown on page devices_shown = driver.find_element_by_class_name( 'deviceCount').text.split(' ')[-1] devices_total = driver.find_element_by_class_name('deviceSize').text if devices_shown != devices_total: # click 'Show All' button if it exists driver.find_element_by_id("deviceShowAllLink").click() # load page and get soup time.sleep(5) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_postpaid_price = ScrapedPostpaidPrice() # set hardcoded variables scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() scraped_postpaid_price.provider = 'att' # parse through device tiles for device in soup.findAll("div", class_="list-item"): device_contents = device.find("a", class_="titleURLchng") scraped_postpaid_price.device = brandparser( parser(device_contents.text)).lower() if scraped_postpaid_price.device.find("pre-owned") != -1 or scraped_postpaid_price.device.find("flip") != -1 or \ scraped_postpaid_price.device.find("wireless") != -1 or scraped_postpaid_price.device.find("b470") != -1 or \ scraped_postpaid_price.device.find("xp5s") != -1 or scraped_postpaid_price.device.find("duraxe") != -1 or \ scraped_postpaid_price.device.find("certified") != -1: continue scraped_postpaid_price.url = 'https://www.att.com' + device_contents[ 'href'] deal_landing_page_promo = device.findAll("div", class_="holidayFlag") if len(deal_landing_page_promo) == 2: add_scraped_promotions_to_database( scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', deal_landing_page_promo[1].img['title'], scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to url and get storage size driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # read size from size button that is in html even if it is not visible on page # iterate through each size button_number = 0 for button in device_soup.findAll('button', class_='preSize'): # go back to base web page if there is more than one button if button_number > 0: driver.get(scraped_postpaid_price.url) time.sleep(3) device_storage = button.text.replace('GB', '').strip() if 'MB' in device_storage: device_storage = device_storage.replace('MB', '') device_storage = '{: .2f}'.format(int(device_storage) * 0.001) # set object's storage size scraped_postpaid_price.storage = device_storage size_id = 'size_' + scraped_postpaid_price.storage + 'GB' size = driver.find_element_by_id(size_id) # click on size that was recorded as storage if there is more than one storage size if len(device_soup.findAll('button', class_='preSize')) != 1: # if popup is there, click it and make it go away try: size.click() except WebDriverException: driver.find_element_by_xpath( '//*[@id="acsMainInvite"]/a').click() size.click() time.sleep(3) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # get promotions att_scrape_postpaid_promotions(device_soup, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) # # iphone shipment # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\ # scraped_postpaid_price.device == "iphone xs max": # color_buttons = device_soup.findAll("button", {"ddh-color-and-data-capacity-item": "color"}) # # for color_button in color_buttons: # # # get object's color # color_id = 'color_' + color_button.text.strip() # color = driver.find_element_by_id(color_id) # # # if popup is there, click it and make it go away # try: # color.click() # except WebDriverException: # driver.find_element_by_xpath('//*[@id="acsMainInvite"]/a').click() # color.click() # # time.sleep(2) # html = driver.page_source # device_soup = BeautifulSoup(html, "html.parser") # # shipment_text_outer = device_soup.find("div", {"class": "checkInstoreDeliveryIconSuccess ng-scope"}) # shipment_text = shipment_text_outer.find("span", {"class": "ng-binding"}).text # # print(color_button.text.strip(), scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time) # add_iphone_shipment_to_database(color_button.text.strip(), scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time) # get sku for correct url and config_url try: sku = device_soup.find(id='skuIDToDisplay').text.strip() except AttributeError: sku = 'sku' + device_soup.find('att-product-viewer')['skuid'] # set url and config_url for object url = scraped_postpaid_price.url.split('=sku')[0] + '=sku' + sku config_url = 'https://www.att.com/shop/wireless/deviceconfigurator.html?prefetched=true&sku=' + sku scraped_postpaid_price.config_url = config_url scraped_postpaid_price.url = url # go to config_url and get prices driver.get(scraped_postpaid_price.config_url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") if len( device_soup.findAll( 'div', class_='row-fluid-nowrap posRel margin-top-5')) > 1: for div in device_soup.findAll( 'div', class_='row-fluid-nowrap posRel margin-top-5'): for span in div.findAll( 'span', class_= 'text-xlarge margin-right-5 adjustLetterSpace ng-binding ng-scope' ): if span.text == 'AT&T Next Every Year℠': contract_prices = div.findAll( 'div', class_= 'attGray text-cramped text-xlarge text-nowrap pad-bottom-10' ) scraped_postpaid_price.onetime_price = contract_prices[ 0].text.replace("$", "").strip() scraped_postpaid_price.monthly_price = contract_prices[ 1].text.replace("$", "").replace("Monthly", "").strip() if span.text == 'No annual contract': no_contract_prices = div.findAll( 'div', class_= 'attGray text-cramped text-xlarge text-nowrap pad-bottom-10' ) scraped_postpaid_price.retail_price = no_contract_prices[ 0].text.replace(',', '').replace("$", "").strip() else: for div in device_soup.findAll( 'div', class_='row-fluid-nowrap posRel margin-top-5'): for span in div.findAll( 'span', class_= 'text-xlarge margin-right-5 adjustLetterSpace ng-binding ng-scope' ): if span.text == 'No annual contract': no_contract_prices = div.findAll( 'div', class_= 'attOrange text-cramped text-xlarge text-nowrap pad-bottom-10' ) scraped_postpaid_price.retail_price = no_contract_prices[ 0].text.replace("$", "").strip() remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) button_number += 1 driver.quit()
def ver_scrape_postpaid_tablet_prices(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get("https://www.verizonwireless.com/tablets/") time.sleep(10) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_postpaid_price = ScrapedPostpaidPrice() # hardcoded variables scraped_postpaid_price.provider = 'verizon' scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() # get device names and links from the tablet landing page for device in soup.findAll( 'div', class_= 'cursorPointer pad15 onlySidePad tile background_supporting border_CC' ): device_contents = device.find('a') scraped_postpaid_price.device = brandparser(device_contents.text) if scraped_postpaid_price.device.find("pre-owned") != -1: continue scraped_postpaid_price.url = 'https://www.verizonwireless.com' + device_contents[ "href"] promo_text = device.find('div', class_='offer-text').text if promo_text != '': add_scraped_promotions_to_database( scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', promo_text, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to url driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") site_title = device_soup.find_all("title") if not site_title or site_title[0] == "<title></title>": continue else: # select each device size size_button_pad = device_soup.find( 'div', class_='displayFlex rowNoWrap priceSelectorRow') size_buttons = size_button_pad.findAll( 'div', class_= 'grow1basis0 priceSelectorColumn radioGroup positionRelative') for size_button_number in range(1, len(size_buttons) + 1): # record new device size scraped_postpaid_price.storage = size_buttons[ size_button_number - 1].text.replace('GB', '') # remove popup before clicking try: driver.find_element_by_xpath( '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div[' + str(size_button_number) + ']/div/div/p').click() except WebDriverException: driver.find_element_by_link_text('×').click() driver.find_element_by_xpath( '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div[' + str(size_button_number) + ']/div/div/p').click() # click on different storage size to show device size-specific promos time.sleep(2) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") values_list = device_soup.findAll('div', class_='sizePad') scraped_postpaid_price.monthly_price = monthly_price_parser( values_list[-3].text) scraped_postpaid_price.contract_ufc = contract_ufc_parser( values_list[-2].text.replace(',', '')) scraped_postpaid_price.retail_price = retail_price_parser( values_list[-1].text.replace(',', '')) # remove storage from device name if it is in it if scraped_postpaid_price.storage in scraped_postpaid_price.device: scraped_postpaid_price.device = scraped_postpaid_price.device.replace( scraped_postpaid_price.storage + 'gb', '') remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) ver_scrape_postpaid_promotions(device_soup, driver, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) driver.close()
import requests from bs4 import BeautifulSoup import json import time import datetime import os from selenium import webdriver from selenium.webdriver.chrome.options import Options from data.model.Scraped_Postpaid_Price import ScrapedPostpaidPrice # make object scraped_postpaid_price = ScrapedPostpaidPrice() # set hardcoded variables scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() scraped_postpaid_price.provider = 'att' # get skus from device landing page # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.att.com/shop/wireless/devices/cellphones.html') time.sleep(2)