def ver_scrape_homepage(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) # go to website driver.get("https://www.verizonwireless.com/") time.sleep(5) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_promotion = ScrapedPromotion() # hardcoded variables scraped_promotion.provider = 'verizon' scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.promo_location = 'homepage' scraped_promotion.device_storage = '0' scraped_promotion.url = driver.current_url # get main containers for section in soup.findAll('section', class_='container-fluid'): if section['id'] != 'fourBoxPromo2': deals_page_promo = section.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) # get deals in small tiles for div in soup.findAll('div', class_='swiper-promo-pannel'): if div.text.strip().replace('\n', '') != '': deals_page_promo = div.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) driver.close()
def tmo_scrape_homepage(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.t-mobile.com/') time.sleep(5) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_promotion = ScrapedPromotion() # hardcoded variables scraped_promotion.provider = 'tmobile' scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.url = driver.current_url scraped_promotion.device_storage = '0' scraped_promotion.promo_location = 'homepage' for div in soup.findAll('div', class_='heroContent ng-scope'): deals_page_promo = div.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) try: see_more_link = div.find("div", {"class": "cta"}).a["href"] if see_more_link[:7] == "/offers": driver.get("https://www.t-mobile.com" + see_more_link) time.sleep(2) html = driver.page_source offer_page_soup = BeautifulSoup(html, "html.parser") first_faq = offer_page_soup.find("div", {"class": "panel-body"}).text.strip() scraped_promotion.promo_text = "FIRST FAQ: " + first_faq add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) except TypeError: pass driver.quit()
def xfi_scrape_homepage(): # make object scraped_promotion = ScrapedPromotion() # hardcoded variables scraped_promotion.provider = 'xfinity' scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.promo_location = 'homepage' scraped_promotion.device_storage = '0' scraped_promotion.url = "https://www.xfinity.com/mobile/" # scrape json device_page = requests.get( 'https://cdn.comcast.com/mobile-static/content/20180720-2015/variants/default/pages/landing.json' ) device_soup = BeautifulSoup(device_page.text, 'html.parser') device_json = json.loads(device_soup.text) # carousel for actions in device_json["carousel"]["default"]: scraped_promotion.promo_text = actions["actions"]["popovers"][0][ "data"]["body"] print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) # plan info for item in device_json["contentItems"]: try: scraped_promotion.promo_text = item["content"]["description"] print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) except KeyError: pass
def tmo_scrape_deals_page(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.t-mobile.com/') time.sleep(5) # go to Phones url from homepage (since url could change) driver.find_element_by_link_text('DEALS').click() time.sleep(3) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_promotion = ScrapedPromotion() # hardcoded variables scraped_promotion.provider = 'tmobile' scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.url = driver.current_url scraped_promotion.device_storage = '0' scraped_promotion.promo_location = 'deals page' for div in soup.findAll('div', class_='section-content'): deals_page_promo = div.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) driver.quit()
def met_scrape_homepage(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.metropcs.com') time.sleep(5) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_promotion = ScrapedPromotion() # set hardcoded variables scraped_promotion.provider = 'metropcs' scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.device_storage = '0' scraped_promotion.device_name = 'N/A' scraped_promotion.url = driver.current_url scraped_promotion.promo_location = 'homepage' # get first banner for div in soup.findAll('div', class_='row'): deals_page_promo = div.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) driver.close()
def tmo_scrape_postpaid_promotions(driver, soup, url, device_name, device_storage): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'tmobile' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url # make empty list of promotions promotions = [] # upper banner text try: upper_banner_text = driver.find_element_by_id('promo-banner') promotions.append(['upper banner', upper_banner_text.text]) except NoSuchElementException: upper_banner_text = '' # banner under device name text for div2 in soup.findAll("div", class_="text-magenta ng-scope"): promotions.append(['banner under device name', div2.text]) # crossed out text (if savings is anything other than $0.00) strike_out_price = soup.findAll('span', class_='text-magenta ng-binding') if strike_out_price[0].text != '($0.00 Savings)': promotions.append(['discount', strike_out_price[0].text]) # make object for each promo text instance for promo_instance in promotions: scraped_promotion.promo_location = promo_instance[0] scraped_promotion.promo_text = promo_instance[1] # time variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() # add to database add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def cri_scrape_homepage(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.cricketwireless.com/') time.sleep(3) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_promotion = ScrapedPromotion() # set hardcoded variables scraped_promotion.provider = 'cricket' scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.promo_location = 'homepage' scraped_promotion.device_name = 'N/A' scraped_promotion.device_storage = '0' scraped_promotion.url = driver.current_url # get slideshow main = soup.find('div', class_='main') for div1 in main.findAll('div', class_='constrain-width-1024'): deals_page_promo = format_promo_text(div1.text) scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) driver.close()
def spr_scrape_postpaid_promotions(soup, url, device_name, device_storage): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'sprint' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url # make empty list of promotions promotions = [] # promotion text under price box price_boxes = soup.findAll('div', class_='col-xs-24 col-lg-24 col-xl-24 mb-20 active') for box in price_boxes: if box.find('strong', class_='display-block font-size-16 font-size-md-18 color--blue').text != ' Full price': for li in box.findAll('li', class_='promo-item'): promotions.append(['banner under Sprint Flex box', li.text.replace('Detail', '').strip()]) # crossed out price strike_out_price = soup.findAll('del', class_='sprint-price-del') if strike_out_price[0].text != '': promotions.append(['crossed out price', strike_out_price[0].text]) # make object for each promo text instance for promo_instance in promotions: scraped_promotion.promo_location = promo_instance[0] scraped_promotion.promo_text = promo_instance[1] # time variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() # add to database add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def met_scrape_prepaid_promotins(soup, url, device_name, device_storage): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'metropcs' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url # make empty list of promotions promotions = [] # crossed out price try: crossed_out_price = soup.find('span', class_='normal-price') promotions.append([ 'crossed out price', crossed_out_price.text.strip().replace('\n', '').replace( ' ', '.') ]) except AttributeError: crossed_out_price = '' # make object for each promo text instance for promo_instance in promotions: scraped_promotion.promo_location = promo_instance[0] scraped_promotion.promo_text = promo_instance[1] # time variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() # add to database add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def xfi_scrape_prepaid_promotins(url, device_name, device_storage, description): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'xfinity' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url scraped_promotion.promo_text = description scraped_promotion.promo_location = 'description' # time variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() # add to database add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def att_scrape_postpaid_promotions(soup, url, device_name, device_storage): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'att' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url # make empty list of promotions promotions = [] # upper banner text for span in soup.findAll("span", class_="offerTxt"): if span.text.strip() != '': upper_banner_text = span.text.strip() promotions.append(['upper banner', upper_banner_text]) # lower banner text for div in soup.findAll("div", class_="ds2MarketingMessageTextStyle"): promotions.append(['lower banner', div.text]) # make object for each promo text instance for promo_instance in promotions: scraped_promotion.promo_location = promo_instance[0] scraped_promotion.promo_text = promo_instance[1] # hardcoded variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def cri_scrape_prepaid_promotions(driver, url, device_name, device_storage): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'cricket' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url # make empty list of promotions promotions = [] # sale price try: sale_price = driver.find_element_by_xpath( '//*[@id="pricingWrapper"]/div[1]/div[1]') promotions.append([ 'sale price', sale_price.text.strip().replace('\n', '').replace( ' ', '') ]) except NoSuchElementException: sale_price = '' # make object for each promo text instance for promo_instance in promotions: scraped_promotion.promo_location = promo_instance[0] scraped_promotion.promo_text = promo_instance[1] scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def att_scrape_homepage(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(6) # go to website driver.get('https://www.att.com/') time.sleep(10) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_promotion = ScrapedPromotion() # set hardcoded variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.provider = 'att' scraped_promotion.promo_location = 'homepage' scraped_promotion.url = driver.current_url scraped_promotion.device_storage = '0' for slideshow in soup.findAll('div', class_='content-wrapper'): deals_page_promo = slideshow.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) for div in soup.findAll('div', class_='item-wrapper')[1:]: deals_page_promo = div.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) item_details = div.find("div", {"class": "legal"}) legal_link = "https://www.att.com/" + item_details.a["data-ajaxdata"] driver.get(legal_link) time.sleep(2) html = driver.page_source legal_soup = BeautifulSoup(html, "html.parser") for legal_terms in legal_soup.body.findAll("div")[1:]: scraped_promotion.promo_text = "LEGAL TERMS: " + legal_terms.text.strip( ) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) for row in soup.findAll('div', class_='row no-flex'): deals_page_promo = row.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) driver.quit()
def tmo_scrape_postpaid_smartphone_prices(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.t-mobile.com/') time.sleep(5) # go to Phones url from homepage (since url could change) driver.find_element_by_link_text('PHONES').click() time.sleep(20) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_postpaid_price = ScrapedPostpaidPrice() # hardcoded variables scraped_postpaid_price.provider = 'tmobile' scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() errors = [] # get device name and url from device landing page for tariff in soup.findAll( 'div', class_='tile col-lg-3 col-md-4 col-sm-6 col-xs-12'): device_contents = tariff.find( 'a', class_='m-b-5 product-name text-center regular block ng-binding') scraped_postpaid_price.device = device_parser(device_contents.text) if scraped_postpaid_price.device.find('certified pre-owned') == -1 and \ scraped_postpaid_price.device.find('linelink') == -1 and \ scraped_postpaid_price.device.find('sim') == -1 and \ scraped_postpaid_price.device.find('flip') == -1: scraped_postpaid_price.url = 'https://www.t-mobile.com/' + device_contents[ 'href'] promo_text = tariff.find('div', class_='offerTextHeight').text if promo_text != '': add_scraped_promotions_to_database( scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', promo_text, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to individual page driver.get(scraped_postpaid_price.url) time.sleep(6) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # try/except in case of no price (coming soon, etc.) try: # iterate through storage sizes for memory_button in device_soup.findAll('a', class_='memory-btn'): # record storage size and url scraped_postpaid_price.storage = memory_button.text.replace( 'GB', '').strip() scraped_postpaid_price.url = scraped_postpaid_price.url.split( '?memory=' )[0] + '?memory=' + scraped_postpaid_price.storage + 'gb' driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") if len(device_soup.findAll('div', class_='price-lockup')) > 1: downpayment_and_retail = device_soup.findAll( 'span', class_='cost-price font-tele-ult ng-binding') scraped_postpaid_price.onetime_price = downpayment_and_retail[ 0].text scraped_postpaid_price.retail_price = downpayment_and_retail[ 1].text.replace(',', '') scraped_postpaid_price.monthly_price = monthly_price_parser( device_soup.find( 'p', class_='small font-tele-nor m-t-10 ng-binding' ).text) else: scraped_postpaid_price.onetime_price = device_soup.find( 'span', class_='cost-price font-tele-ult ng-binding').text # add to database remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database( scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) tmo_scrape_postpaid_promotions( driver, device_soup, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) except AttributeError: errors.append(scraped_postpaid_price.device) pass print("pricing errors: ", errors) driver.quit()
def att_scrape_postpaid_smartphone_prices(): # headless Chrome chrome_options = Options() # chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.att.com/shop/wireless/devices/cellphones.html') time.sleep(2) # check if all devices are shown on page devices_shown = driver.find_element_by_class_name( 'deviceCount').text.split(' ')[-1] devices_total = driver.find_element_by_class_name('deviceSize').text if devices_shown != devices_total: # click 'Show All' button if it exists driver.find_element_by_id("deviceShowAllLink").click() # load page and get soup time.sleep(5) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_postpaid_price = ScrapedPostpaidPrice() # set hardcoded variables scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() scraped_postpaid_price.provider = 'att' # parse through device tiles for device in soup.findAll("div", class_="list-item"): device_contents = device.find("a", class_="titleURLchng") scraped_postpaid_price.device = brandparser( parser(device_contents.text)).lower() if scraped_postpaid_price.device.find("pre-owned") != -1 or scraped_postpaid_price.device.find("flip") != -1 or \ scraped_postpaid_price.device.find("wireless") != -1 or scraped_postpaid_price.device.find("b470") != -1 or \ scraped_postpaid_price.device.find("xp5s") != -1 or scraped_postpaid_price.device.find("duraxe") != -1 or \ scraped_postpaid_price.device.find("certified") != -1: continue scraped_postpaid_price.url = 'https://www.att.com' + device_contents[ 'href'] deal_landing_page_promo = device.findAll("div", class_="holidayFlag") if len(deal_landing_page_promo) == 2: add_scraped_promotions_to_database( scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', deal_landing_page_promo[1].img['title'], scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to url and get storage size driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # read size from size button that is in html even if it is not visible on page # iterate through each size button_number = 0 for button in device_soup.findAll('button', class_='preSize'): # go back to base web page if there is more than one button if button_number > 0: driver.get(scraped_postpaid_price.url) time.sleep(3) device_storage = button.text.replace('GB', '').strip() if 'MB' in device_storage: device_storage = device_storage.replace('MB', '') device_storage = '{: .2f}'.format(int(device_storage) * 0.001) # set object's storage size scraped_postpaid_price.storage = device_storage size_id = 'size_' + scraped_postpaid_price.storage + 'GB' size = driver.find_element_by_id(size_id) # click on size that was recorded as storage if there is more than one storage size if len(device_soup.findAll('button', class_='preSize')) != 1: # if popup is there, click it and make it go away try: size.click() except WebDriverException: driver.find_element_by_xpath( '//*[@id="acsMainInvite"]/a').click() size.click() time.sleep(3) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # get promotions att_scrape_postpaid_promotions(device_soup, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) # # iphone shipment # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\ # scraped_postpaid_price.device == "iphone xs max": # color_buttons = device_soup.findAll("button", {"ddh-color-and-data-capacity-item": "color"}) # # for color_button in color_buttons: # # # get object's color # color_id = 'color_' + color_button.text.strip() # color = driver.find_element_by_id(color_id) # # # if popup is there, click it and make it go away # try: # color.click() # except WebDriverException: # driver.find_element_by_xpath('//*[@id="acsMainInvite"]/a').click() # color.click() # # time.sleep(2) # html = driver.page_source # device_soup = BeautifulSoup(html, "html.parser") # # shipment_text_outer = device_soup.find("div", {"class": "checkInstoreDeliveryIconSuccess ng-scope"}) # shipment_text = shipment_text_outer.find("span", {"class": "ng-binding"}).text # # print(color_button.text.strip(), scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time) # add_iphone_shipment_to_database(color_button.text.strip(), scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time) # get sku for correct url and config_url try: sku = device_soup.find(id='skuIDToDisplay').text.strip() except AttributeError: sku = 'sku' + device_soup.find('att-product-viewer')['skuid'] # set url and config_url for object url = scraped_postpaid_price.url.split('=sku')[0] + '=sku' + sku config_url = 'https://www.att.com/shop/wireless/deviceconfigurator.html?prefetched=true&sku=' + sku scraped_postpaid_price.config_url = config_url scraped_postpaid_price.url = url # go to config_url and get prices driver.get(scraped_postpaid_price.config_url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") if len( device_soup.findAll( 'div', class_='row-fluid-nowrap posRel margin-top-5')) > 1: for div in device_soup.findAll( 'div', class_='row-fluid-nowrap posRel margin-top-5'): for span in div.findAll( 'span', class_= 'text-xlarge margin-right-5 adjustLetterSpace ng-binding ng-scope' ): if span.text == 'AT&T Next Every Year℠': contract_prices = div.findAll( 'div', class_= 'attGray text-cramped text-xlarge text-nowrap pad-bottom-10' ) scraped_postpaid_price.onetime_price = contract_prices[ 0].text.replace("$", "").strip() scraped_postpaid_price.monthly_price = contract_prices[ 1].text.replace("$", "").replace("Monthly", "").strip() if span.text == 'No annual contract': no_contract_prices = div.findAll( 'div', class_= 'attGray text-cramped text-xlarge text-nowrap pad-bottom-10' ) scraped_postpaid_price.retail_price = no_contract_prices[ 0].text.replace(',', '').replace("$", "").strip() else: for div in device_soup.findAll( 'div', class_='row-fluid-nowrap posRel margin-top-5'): for span in div.findAll( 'span', class_= 'text-xlarge margin-right-5 adjustLetterSpace ng-binding ng-scope' ): if span.text == 'No annual contract': no_contract_prices = div.findAll( 'div', class_= 'attOrange text-cramped text-xlarge text-nowrap pad-bottom-10' ) scraped_postpaid_price.retail_price = no_contract_prices[ 0].text.replace("$", "").strip() remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) button_number += 1 driver.quit()
def spr_scrape_postpaid_smartphone_prices(): # go to website chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.get('https://www.sprint.com/en/shop/cell-phones.html') # get soup driver.get( "https://www.sprint.com/en/shop/cell-phones.html?INTNAV=TopNav:Shop:AllPhones&credit=A2&sort=FEATURED" ) time.sleep(5) html = driver.page_source soup = BeautifulSoup(html, "html.parser") driver.close() # make scraper object scraped_postpaid_price = ScrapedPostpaidPrice() # set hardcoded variables scraped_postpaid_price.provider = 'sprint' scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() # iterate through devices on landing page for device_tile in soup.findAll( 'li', class_='col-xs-24 col-sm-12 col-lg-8 text-center device-tile'): # get device name text device_name = device_tile.find("h3", { "class": "font-size-18 line-height-24 font-normal my-0 align-left" }).text.strip().lower() # eliminate out of scope devices if device_name.find("linelink") != -1 or device_name.find("pre-owned") != -1 or device_name.find("flip") != -1 \ or device_name.find("xp5s") != -1 or device_name.find("duraxtp") != -1 or device_name.find("duratr") != -1 \ or device_name.find("xp strike") != -1 or device_name.find("certified") != -1: continue # device name scraped_postpaid_price.device = device_parser(device_name) # url scraped_postpaid_price.url = "https://www.sprint.com" + device_tile.find( "a")["href"] # promo text for device landing page try: promo_text = device_tile.find("span", { "class": "color--purple font-size-14" }).text.strip() except AttributeError: promo_text = '' add_scraped_promotions_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', promo_text, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to url chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # if 404 error, stop program site_title = device_soup.find_all("title") if '404' in str(site_title) or not site_title: print('404 Error or page error: ' + scraped_postpaid_price.device) continue else: # click on drop down menu and record device sizes size_selector = driver.find_element_by_id( 'sprint_storage_selector') sizes = size_selector.text.strip().replace(' GB', '') sizes = sizes.split('\n') # iterate through sizes for size in sizes: # click on size and reload page size_selector.click() time.sleep(2) select = Select( driver.find_element_by_id('sprint_storage_selector')) select.select_by_value(size) time.sleep(6) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # record device size scraped_postpaid_price.storage = size # # iphone shipment # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\ # scraped_postpaid_price.device == "iphone xs max": # # # click on drop down menu and record device colors # div_number = device_soup.find("div", {"data-color-set-storage-key": size})["data-color-set"] # color_selector = driver.find_element_by_xpath( # '/html/body/div[1]/article/div[3]/div[1]/div[1]/div[1]/div/div/div[2]/div[4]/div[1]/div/div['+str(div_number)+']/div/div/select') # # color_selector.click() # time.sleep(2) # colors = color_selector.text.strip() # colors = colors.split('\n') # # # iterate through colors # for color in colors: # # click on size and reload page # select = Select(driver.find_element_by_xpath( # '/html/body/div[1]/article/div[3]/div[1]/div[1]/div[1]/div/div/div[2]/div[4]/div[1]/div/div['+str(div_number)+']/div/div/select')) # select.select_by_visible_text(color) # time.sleep(6) # html = driver.page_source # device_soup = BeautifulSoup(html, "html.parser") # # shipment_text = device_soup.find("div", {"class": "col-xs-24 col-lg-auto mb-20 mb-lg-0"}).find("span", {"class": "font-medium"}).text.strip() # # print(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time) # add_iphone_shipment_to_database(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time) # record current url scraped_postpaid_price.url = driver.current_url # initialize price variables scraped_postpaid_price.monthly_price = '0.00' scraped_postpaid_price.retail_price = '0.00' scraped_postpaid_price.onetime_price = '0.00' # get prices for label in device_soup.findAll( 'label', class_='soar-selection__label'): if label.find( 'strong').text == ' Sprint Flex 18-mo. lease': monthly = label.findAll('span', class_='display-block') scraped_postpaid_price.monthly_price = price_parser( monthly[0].text.strip()) scraped_postpaid_price.onetime_price = price_parser( monthly[1].text.strip()) if label.find('strong').text == ' Full price': retail = label.findAll('span', class_='display-block') scraped_postpaid_price.retail_price = price_parser( retail[1].text.strip().replace(',', '')) # if page didn't load all the way if scraped_postpaid_price.onetime_price == '0.00' and scraped_postpaid_price.monthly_price == '0.00': # close and reload page driver.close() chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # get prices again for label in device_soup.findAll( 'label', class_='soar-selection__label'): if label.find( 'strong').text == ' Sprint Flex 18-mo. lease': monthly = label.findAll('span', class_='display-block') scraped_postpaid_price.monthly_price = price_parser( monthly[0].text.strip()) scraped_postpaid_price.onetime_price = price_parser( monthly[1].text.strip()) if label.find('strong').text == ' Full price': retail = label.findAll('span', class_='display-block') scraped_postpaid_price.retail_price = price_parser( retail[1].text.strip().replace(',', '')) # add to database remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) spr_scrape_postpaid_promotions(device_soup, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) driver.quit()
def ver_scrape_postpaid_promotions(soup, driver, url, device_name, device_storage): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'verizon' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url # make empty list of promotions promotions = [] # alternate way to get banner text upper_banner_text_2 = driver.find_element_by_class_name('clearfix') if upper_banner_text_2.text.strip() != '': promotions.append(['upper banner', upper_banner_text_2.text.strip()]) # crossed out price pricing_options = soup.findAll('div', class_='pad8 noRightPad') for div in pricing_options: if 'was' in div.text: promotions.append(['crossed out price', div.text.replace('2-Year Contract', ' 2-Year Contract').replace('24 Monthly Payments',' 24 Monthly Payments').replace('was ', ' was')]) # each payment option has its own banners for option in range(1, len(pricing_options) + 1): option_button = driver.find_element_by_xpath('//*[@id="tile_container"]/div[1]/div[3]/div[1]/div/div[2]/div/div/div[1]/div/div[' + str(option) + ']/div/div/div') # PAYMENT LEVEL # click on different payment options to show different promos # if popup is there, remove it before clicking try: option_button.click() except WebDriverException: driver.find_element_by_class_name('fsrCloseBtn').click() option_button.click() time.sleep(2) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # promotion text above device icon try: banner_above_icon = soup.find('div', class_='offersPad fontSize_12 lineHeight8') promotions.append(['banner above device icon', banner_above_icon.text.replace('Special Offer', '').replace('See the details', '').replace('\n', '')]) except AttributeError: banner_above_icon = '' # banner under price below_price_banner = soup.find('div', class_='row padTop6 noSideMargin priceLabel').text if below_price_banner != 'Retail Price' and below_price_banner != 'Early Termination Fee: $175 (2-Year Contracts)': promotions.append(['banner below price', below_price_banner]) # make object for each promo text instance for promo_instance in promotions: scraped_promotion.promo_location = promo_instance[0] scraped_promotion.promo_text = promo_instance[1] # hardcoded variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() # add to database add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def cri_scrape_deals_page(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.cricketwireless.com/current-phone-and-plan-deals') time.sleep(3) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_promotion = ScrapedPromotion() # set hardcoded variables scraped_promotion.provider = 'cricket' scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.promo_location = 'deals page' scraped_promotion.device_name = 'N/A' scraped_promotion.device_storage = '0' scraped_promotion.url = driver.current_url # get big green promo banner for div in soup.findAll('div', class_='hero-promo hover-item'): deals_page_promo = format_promo_text(div.text) scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) # get other main promotions for div1 in soup.findAll('div', class_='promo-content-wrapper'): deals_page_promo = format_promo_text(div1.text) scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) # get promos at the bottom of the page for div2 in soup.findAll('div', class_='col-xs-9'): deals_page_promo = format_promo_text(div2.text) scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) driver.close()
def ver_scrape_postpaid_tablet_prices(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get("https://www.verizonwireless.com/tablets/") time.sleep(10) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_postpaid_price = ScrapedPostpaidPrice() # hardcoded variables scraped_postpaid_price.provider = 'verizon' scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() # get device names and links from the tablet landing page for device in soup.findAll( 'div', class_= 'cursorPointer pad15 onlySidePad tile background_supporting border_CC' ): device_contents = device.find('a') scraped_postpaid_price.device = brandparser(device_contents.text) if scraped_postpaid_price.device.find("pre-owned") != -1: continue scraped_postpaid_price.url = 'https://www.verizonwireless.com' + device_contents[ "href"] promo_text = device.find('div', class_='offer-text').text if promo_text != '': add_scraped_promotions_to_database( scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', promo_text, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to url driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") site_title = device_soup.find_all("title") if not site_title or site_title[0] == "<title></title>": continue else: # select each device size size_button_pad = device_soup.find( 'div', class_='displayFlex rowNoWrap priceSelectorRow') size_buttons = size_button_pad.findAll( 'div', class_= 'grow1basis0 priceSelectorColumn radioGroup positionRelative') for size_button_number in range(1, len(size_buttons) + 1): # record new device size scraped_postpaid_price.storage = size_buttons[ size_button_number - 1].text.replace('GB', '') # remove popup before clicking try: driver.find_element_by_xpath( '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div[' + str(size_button_number) + ']/div/div/p').click() except WebDriverException: driver.find_element_by_link_text('×').click() driver.find_element_by_xpath( '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div[' + str(size_button_number) + ']/div/div/p').click() # click on different storage size to show device size-specific promos time.sleep(2) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") values_list = device_soup.findAll('div', class_='sizePad') scraped_postpaid_price.monthly_price = monthly_price_parser( values_list[-3].text) scraped_postpaid_price.contract_ufc = contract_ufc_parser( values_list[-2].text.replace(',', '')) scraped_postpaid_price.retail_price = retail_price_parser( values_list[-1].text.replace(',', '')) # remove storage from device name if it is in it if scraped_postpaid_price.storage in scraped_postpaid_price.device: scraped_postpaid_price.device = scraped_postpaid_price.device.replace( scraped_postpaid_price.storage + 'gb', '') remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) ver_scrape_postpaid_promotions(device_soup, driver, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) driver.close()
def ver_scrape_postpaid_smartphone_prices(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get("https://www.verizonwireless.com/smartphones/") time.sleep(10) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_postpaid_price = ScrapedPostpaidPrice() # hardcoded variables scraped_postpaid_price.provider = 'verizon' scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() for device in soup.findAll( 'div', class_= 'cursorPointer pad15 onlySidePad tile background_supporting border_CC' ): device_contents = device.find('a') scraped_postpaid_price.device = brandparser(device_contents.text) if scraped_postpaid_price.device.find("pre-owned") != -1: continue scraped_postpaid_price.url = 'https://www.verizonwireless.com' + device_contents[ "href"] promo_text = device.find('div', class_='offer-text').text if promo_text != '': add_scraped_promotions_to_database( scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', promo_text, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to url driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # select each device size try: size_button_pad = device_soup.find( 'div', class_='displayFlex rowNoWrap priceSelectorRow') size_buttons = size_button_pad.findAll( 'div', class_= 'grow1basis0 priceSelectorColumn radioGroup positionRelative') for size_button_number in range(1, len(size_buttons) + 1): # record new device size scraped_postpaid_price.storage = size_buttons[ size_button_number - 1].text.replace('GB', '') # remove popup before clicking try: driver.find_element_by_xpath( '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div[' + str(size_button_number) + ']/div/div/p').click() except WebDriverException: driver.find_element_by_class_name('fsrCloseBtn').click() driver.find_element_by_xpath( '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div[' + str(size_button_number) + ']/div/div/p').click() # click on different storage size to show device size-specific promos time.sleep(2) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") values_list = device_soup.findAll('div', class_='sizePad') scraped_postpaid_price.monthly_price = monthly_price_parser( values_list[-2].text) scraped_postpaid_price.retail_price = retail_price_parser( values_list[-1].text.replace(',', '')) remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) ver_scrape_postpaid_promotions(device_soup, driver, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) # # iphone shipment # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\ # scraped_postpaid_price.device == "iphone xs max": # # color_button_pad = device_soup.find('div', class_='flex width100p pad15 onlyBottomPad') # color_buttons = color_button_pad.findAll('div', class_='col-xs-3 textAlignCenter noSidePad radioGroup positionRelative') # for color_button_number in range(1, len(color_buttons) + 1): # # color = color_buttons[color_button_number - 1].text # driver.find_element_by_xpath('//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[1]/div/div[' + str( # color_button_number) + ']/div').click() # # time.sleep(1) # html = driver.page_source # device_soup = BeautifulSoup(html, "html.parser") # # shipment_text = device_soup.find("div", {"class": "col-xs-6 noSidePad inStoreAvail"}).span.text # # print(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time) # add_iphone_shipment_to_database(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time) except AttributeError: pass driver.close()
def spr_scrape_postpaid_tablet_prices(): # go to website chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.get( 'https://www.sprint.com/en/shop/tablets.html?INTNAV=TopNav:Shop:Tablets&credit=A2&sort=FEATURED' ) time.sleep(5) # get soup html = driver.page_source soup = BeautifulSoup(html, "html.parser") driver.close() # make scraper object scraped_postpaid_price = ScrapedPostpaidPrice() # set hardcoded variables scraped_postpaid_price.provider = 'sprint' scraped_postpaid_price.date = datetime.date.today() scraped_postpaid_price.time = datetime.datetime.now().time() # iterate through devices on landing page for device_tile in soup.findAll( 'li', class_='col-xs-24 col-sm-12 col-lg-8 text-center device-tile'): # get device name text device_name = device_tile.find("h3", { "class": "font-size-18 line-height-24 font-normal my-0 align-left" }).text.strip().lower() # eliminate out of scope devices if device_name.find("linelink") != -1 or device_name.find("pre-owned") != -1 or device_name.find("flip") != -1 \ or device_name.find("sim") != -1 or device_name.find("duraxtp") != -1 or device_name.find("duratr") != -1 \ or device_name.find("xp strike") != -1 or device_name.find("certified") != -1: continue # device name scraped_postpaid_price.device = device_parser(device_name) # url scraped_postpaid_price.url = "https://www.sprint.com" + device_tile.find( "a")["href"] # promo text for device landing page & add to database try: promo_text = device_tile.find("span", { "class": "color--purple font-size-14" }).text.strip() except AttributeError: promo_text = '' add_scraped_promotions_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, '0', 'device landing page', promo_text, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) # go to url chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) driver.get(scraped_postpaid_price.url) time.sleep(5) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # if 404 error, stop program site_title = device_soup.find_all("title") if '404' in str(site_title): print('404 Error: ' + scraped_postpaid_price.device) continue # click on drop down menu and record device sizes size_selector = driver.find_element_by_id('sprint_storage_selector') size_selector.click() time.sleep(2) sizes = size_selector.text.strip().replace(' GB', '') sizes = sizes.split('\n') # iterate through sizes for size in sizes: # click on size and reload page select = Select( driver.find_element_by_id('sprint_storage_selector')) select.select_by_value(size) time.sleep(2) html = driver.page_source device_soup = BeautifulSoup(html, "html.parser") # record device size scraped_postpaid_price.storage = size # initialize price variables scraped_postpaid_price.monthly_price = '0.00' scraped_postpaid_price.retail_price = '0.00' scraped_postpaid_price.onetime_price = '0.00' # get prices for label in device_soup.findAll('label', class_='soar-selection__label'): if label.find('strong' ).text == ' Buy it with 24 monthly installments': monthly = label.findAll('span', class_='display-block') scraped_postpaid_price.monthly_price = price_parser( monthly[0].text.strip()) scraped_postpaid_price.onetime_price = price_parser( monthly[1].text.strip()) if label.find('strong').text == ' Full price': retail = label.findAll('span', class_='display-block') scraped_postpaid_price.retail_price = price_parser( retail[1].text.strip()) # add to database remove_postpaid_duplicate(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.date) add_postpaid_to_database(scraped_postpaid_price.provider, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.monthly_price, scraped_postpaid_price.onetime_price, scraped_postpaid_price.retail_price, scraped_postpaid_price.contract_ufc, scraped_postpaid_price.url, scraped_postpaid_price.date, scraped_postpaid_price.time) spr_scrape_postpaid_promotions(device_soup, scraped_postpaid_price.url, scraped_postpaid_price.device, scraped_postpaid_price.storage) driver.quit()