def spr_scrape_deals_page(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.sprint.com/en/shop/offers.html') time.sleep(5) # go to Phones url (since url could change) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make promotions object scraped_promotion = ScrapedPromotion() # hard coded variables scraped_promotion.provider = 'sprint' scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.device_storage = '0' scraped_promotion.promo_location = 'deals page' scraped_promotion.url = driver.current_url for div in soup.findAll('div', class_='ui-bucket__content'): deals_page_promo = div.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) driver.quit()
def met_scrape_prepaid_promotins(soup, url, device_name, device_storage): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'metropcs' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url # make empty list of promotions promotions = [] # crossed out price try: crossed_out_price = soup.find('span', class_='normal-price') promotions.append([ 'crossed out price', crossed_out_price.text.strip().replace('\n', '').replace( ' ', '.') ]) except AttributeError: crossed_out_price = '' # make object for each promo text instance for promo_instance in promotions: scraped_promotion.promo_location = promo_instance[0] scraped_promotion.promo_text = promo_instance[1] # time variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() # add to database add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def xfi_scrape_prepaid_promotins(url, device_name, device_storage, description): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'xfinity' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url scraped_promotion.promo_text = description scraped_promotion.promo_location = 'description' # time variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() # add to database add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def att_scrape_postpaid_promotions(soup, url, device_name, device_storage): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'att' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url # make empty list of promotions promotions = [] # upper banner text for span in soup.findAll("span", class_="offerTxt"): if span.text.strip() != '': upper_banner_text = span.text.strip() promotions.append(['upper banner', upper_banner_text]) # lower banner text for div in soup.findAll("div", class_="ds2MarketingMessageTextStyle"): promotions.append(['lower banner', div.text]) # make object for each promo text instance for promo_instance in promotions: scraped_promotion.promo_location = promo_instance[0] scraped_promotion.promo_text = promo_instance[1] # hardcoded variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def cri_scrape_prepaid_promotions(driver, url, device_name, device_storage): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'cricket' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url # make empty list of promotions promotions = [] # sale price try: sale_price = driver.find_element_by_xpath( '//*[@id="pricingWrapper"]/div[1]/div[1]') promotions.append([ 'sale price', sale_price.text.strip().replace('\n', '').replace( ' ', '') ]) except NoSuchElementException: sale_price = '' # make object for each promo text instance for promo_instance in promotions: scraped_promotion.promo_location = promo_instance[0] scraped_promotion.promo_text = promo_instance[1] scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def att_scrape_homepage(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(6) # go to website driver.get('https://www.att.com/') time.sleep(10) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_promotion = ScrapedPromotion() # set hardcoded variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.provider = 'att' scraped_promotion.promo_location = 'homepage' scraped_promotion.url = driver.current_url scraped_promotion.device_storage = '0' for slideshow in soup.findAll('div', class_='content-wrapper'): deals_page_promo = slideshow.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) for div in soup.findAll('div', class_='item-wrapper')[1:]: deals_page_promo = div.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) item_details = div.find("div", {"class": "legal"}) legal_link = "https://www.att.com/" + item_details.a["data-ajaxdata"] driver.get(legal_link) time.sleep(2) html = driver.page_source legal_soup = BeautifulSoup(html, "html.parser") for legal_terms in legal_soup.body.findAll("div")[1:]: scraped_promotion.promo_text = "LEGAL TERMS: " + legal_terms.text.strip( ) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) for row in soup.findAll('div', class_='row no-flex'): deals_page_promo = row.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) driver.quit()
def met_scrape_deals_page(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.metropcs.com/shop/deals') time.sleep(5) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_promotion = ScrapedPromotion() # set hardcoded variables scraped_promotion.provider = 'metropcs' scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.device_storage = '0' scraped_promotion.device_name = 'N/A' scraped_promotion.url = driver.current_url scraped_promotion.promo_location = 'deals page' # get first banner for div in soup.findAll( 'div', class_='col-md-12 col-xs-12 p-t-30-lg p-t-10-md text-left'): deals_page_promo = div.text.strip().replace('\n', '') scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) # get promotions for div in soup.findAll('div', class_=' col-xs-12 col-sm-6'): for div1 in div.findAll('div', class_='m-b-10'): deals_page_promo = div1.a.img['alt'] scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) driver.close()
def ver_scrape_postpaid_promotions(soup, driver, url, device_name, device_storage): # make object scraped_promotion = ScrapedPromotion() # set variables already determined scraped_promotion.provider = 'verizon' scraped_promotion.device_name = device_name scraped_promotion.device_storage = device_storage scraped_promotion.url = url # make empty list of promotions promotions = [] # alternate way to get banner text upper_banner_text_2 = driver.find_element_by_class_name('clearfix') if upper_banner_text_2.text.strip() != '': promotions.append(['upper banner', upper_banner_text_2.text.strip()]) # crossed out price pricing_options = soup.findAll('div', class_='pad8 noRightPad') for div in pricing_options: if 'was' in div.text: promotions.append(['crossed out price', div.text.replace('2-Year Contract', ' 2-Year Contract').replace('24 Monthly Payments',' 24 Monthly Payments').replace('was ', ' was')]) # each payment option has its own banners for option in range(1, len(pricing_options) + 1): option_button = driver.find_element_by_xpath('//*[@id="tile_container"]/div[1]/div[3]/div[1]/div/div[2]/div/div/div[1]/div/div[' + str(option) + ']/div/div/div') # PAYMENT LEVEL # click on different payment options to show different promos # if popup is there, remove it before clicking try: option_button.click() except WebDriverException: driver.find_element_by_class_name('fsrCloseBtn').click() option_button.click() time.sleep(2) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # promotion text above device icon try: banner_above_icon = soup.find('div', class_='offersPad fontSize_12 lineHeight8') promotions.append(['banner above device icon', banner_above_icon.text.replace('Special Offer', '').replace('See the details', '').replace('\n', '')]) except AttributeError: banner_above_icon = '' # banner under price below_price_banner = soup.find('div', class_='row padTop6 noSideMargin priceLabel').text if below_price_banner != 'Retail Price' and below_price_banner != 'Early Termination Fee: $175 (2-Year Contracts)': promotions.append(['banner below price', below_price_banner]) # make object for each promo text instance for promo_instance in promotions: scraped_promotion.promo_location = promo_instance[0] scraped_promotion.promo_text = promo_instance[1] # hardcoded variables scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() # add to database add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time)
def cri_scrape_deals_page(): # headless Chrome chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size=1920x1080") chrome_driver = os.getcwd() + "\\chromedriver.exe" driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver) driver.implicitly_wait(5) # go to website driver.get('https://www.cricketwireless.com/current-phone-and-plan-deals') time.sleep(3) html = driver.page_source soup = BeautifulSoup(html, "html.parser") # make object scraped_promotion = ScrapedPromotion() # set hardcoded variables scraped_promotion.provider = 'cricket' scraped_promotion.date = datetime.date.today() scraped_promotion.time = datetime.datetime.now().time() scraped_promotion.promo_location = 'deals page' scraped_promotion.device_name = 'N/A' scraped_promotion.device_storage = '0' scraped_promotion.url = driver.current_url # get big green promo banner for div in soup.findAll('div', class_='hero-promo hover-item'): deals_page_promo = format_promo_text(div.text) scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) # get other main promotions for div1 in soup.findAll('div', class_='promo-content-wrapper'): deals_page_promo = format_promo_text(div1.text) scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) # get promos at the bottom of the page for div2 in soup.findAll('div', class_='col-xs-9'): deals_page_promo = format_promo_text(div2.text) scraped_promotion.promo_text = deals_page_promo print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) add_scraped_promotions_to_database( scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage, scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date, scraped_promotion.time) driver.close()