コード例 #1
0
def ver_scrape_homepage():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)

    # go to website
    driver.get("https://www.verizonwireless.com/")
    time.sleep(5)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_promotion = ScrapedPromotion()

    # hardcoded variables
    scraped_promotion.provider = 'verizon'
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.promo_location = 'homepage'
    scraped_promotion.device_storage = '0'
    scraped_promotion.url = driver.current_url

    # get main containers
    for section in soup.findAll('section', class_='container-fluid'):
        if section['id'] != 'fourBoxPromo2':
            deals_page_promo = section.text.strip().replace('\n', '')
            scraped_promotion.promo_text = deals_page_promo
            print(scraped_promotion.provider, scraped_promotion.device_name,
                  scraped_promotion.device_storage,
                  scraped_promotion.promo_location,
                  scraped_promotion.promo_text, scraped_promotion.url,
                  scraped_promotion.date, scraped_promotion.time)
            add_scraped_promotions_to_database(
                scraped_promotion.provider, scraped_promotion.device_name,
                scraped_promotion.device_storage,
                scraped_promotion.promo_location, scraped_promotion.promo_text,
                scraped_promotion.url, scraped_promotion.date,
                scraped_promotion.time)

    # get deals in small tiles
    for div in soup.findAll('div', class_='swiper-promo-pannel'):
        if div.text.strip().replace('\n', '') != '':
            deals_page_promo = div.text.strip().replace('\n', '')
            scraped_promotion.promo_text = deals_page_promo
            print(scraped_promotion.provider, scraped_promotion.device_name,
                  scraped_promotion.device_storage,
                  scraped_promotion.promo_location,
                  scraped_promotion.promo_text, scraped_promotion.url,
                  scraped_promotion.date, scraped_promotion.time)
            add_scraped_promotions_to_database(
                scraped_promotion.provider, scraped_promotion.device_name,
                scraped_promotion.device_storage,
                scraped_promotion.promo_location, scraped_promotion.promo_text,
                scraped_promotion.url, scraped_promotion.date,
                scraped_promotion.time)
    driver.close()
コード例 #2
0
def tmo_scrape_homepage():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.t-mobile.com/')
    time.sleep(5)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_promotion = ScrapedPromotion()

    # hardcoded variables
    scraped_promotion.provider = 'tmobile'
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.url = driver.current_url
    scraped_promotion.device_storage = '0'
    scraped_promotion.promo_location = 'homepage'

    for div in soup.findAll('div', class_='heroContent ng-scope'):
        deals_page_promo = div.text.strip().replace('\n', '')
        scraped_promotion.promo_text = deals_page_promo
        add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name,
                                           scraped_promotion.device_storage, scraped_promotion.promo_location,
                                           scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date,
                                           scraped_promotion.time)
        try:
            see_more_link = div.find("div", {"class": "cta"}).a["href"]
            if see_more_link[:7] == "/offers":
                driver.get("https://www.t-mobile.com" + see_more_link)
                time.sleep(2)
                html = driver.page_source
                offer_page_soup = BeautifulSoup(html, "html.parser")
                first_faq = offer_page_soup.find("div", {"class": "panel-body"}).text.strip()
                scraped_promotion.promo_text = "FIRST FAQ: " + first_faq
                add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name,
                                                   scraped_promotion.device_storage, scraped_promotion.promo_location,
                                                   scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date,
                                                   scraped_promotion.time)

        except TypeError:
            pass

    driver.quit()
コード例 #3
0
def xfi_scrape_homepage():
    # make object
    scraped_promotion = ScrapedPromotion()

    # hardcoded variables
    scraped_promotion.provider = 'xfinity'
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.promo_location = 'homepage'
    scraped_promotion.device_storage = '0'
    scraped_promotion.url = "https://www.xfinity.com/mobile/"

    # scrape json
    device_page = requests.get(
        'https://cdn.comcast.com/mobile-static/content/20180720-2015/variants/default/pages/landing.json'
    )
    device_soup = BeautifulSoup(device_page.text, 'html.parser')
    device_json = json.loads(device_soup.text)

    # carousel
    for actions in device_json["carousel"]["default"]:
        scraped_promotion.promo_text = actions["actions"]["popovers"][0][
            "data"]["body"]
        print(scraped_promotion.provider, scraped_promotion.device_name,
              scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text,
              scraped_promotion.url, scraped_promotion.date,
              scraped_promotion.time)
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    # plan info
    for item in device_json["contentItems"]:
        try:
            scraped_promotion.promo_text = item["content"]["description"]
            print(scraped_promotion.provider, scraped_promotion.device_name,
                  scraped_promotion.device_storage,
                  scraped_promotion.promo_location,
                  scraped_promotion.promo_text, scraped_promotion.url,
                  scraped_promotion.date, scraped_promotion.time)
            add_scraped_promotions_to_database(
                scraped_promotion.provider, scraped_promotion.device_name,
                scraped_promotion.device_storage,
                scraped_promotion.promo_location, scraped_promotion.promo_text,
                scraped_promotion.url, scraped_promotion.date,
                scraped_promotion.time)
        except KeyError:
            pass
コード例 #4
0
def tmo_scrape_deals_page():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.t-mobile.com/')
    time.sleep(5)

    # go to Phones url from homepage (since url could change)
    driver.find_element_by_link_text('DEALS').click()
    time.sleep(3)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_promotion = ScrapedPromotion()

    # hardcoded variables
    scraped_promotion.provider = 'tmobile'
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.url = driver.current_url
    scraped_promotion.device_storage = '0'
    scraped_promotion.promo_location = 'deals page'

    for div in soup.findAll('div', class_='section-content'):
        deals_page_promo = div.text.strip().replace('\n', '')
        scraped_promotion.promo_text = deals_page_promo
        print(scraped_promotion.provider, scraped_promotion.device_name,
              scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text,
              scraped_promotion.url, scraped_promotion.date,
              scraped_promotion.time)
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)
    driver.quit()
コード例 #5
0
def met_scrape_homepage():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.metropcs.com')
    time.sleep(5)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_promotion = ScrapedPromotion()

    # set hardcoded variables
    scraped_promotion.provider = 'metropcs'
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.device_storage = '0'
    scraped_promotion.device_name = 'N/A'
    scraped_promotion.url = driver.current_url
    scraped_promotion.promo_location = 'homepage'

    # get first banner
    for div in soup.findAll('div', class_='row'):
        deals_page_promo = div.text.strip().replace('\n', '')
        scraped_promotion.promo_text = deals_page_promo
        print(scraped_promotion.provider, scraped_promotion.device_name,
              scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text,
              scraped_promotion.url, scraped_promotion.date,
              scraped_promotion.time)
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    driver.close()
コード例 #6
0
def tmo_scrape_postpaid_promotions(driver, soup, url, device_name, device_storage):
    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'tmobile'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url

    # make empty list of promotions
    promotions = []

    # upper banner text
    try:
        upper_banner_text = driver.find_element_by_id('promo-banner')
        promotions.append(['upper banner', upper_banner_text.text])
    except NoSuchElementException:
        upper_banner_text = ''

    # banner under device name text
    for div2 in soup.findAll("div", class_="text-magenta ng-scope"):
        promotions.append(['banner under device name', div2.text])

    # crossed out text (if savings is anything other than $0.00)
    strike_out_price = soup.findAll('span', class_='text-magenta ng-binding')
    if strike_out_price[0].text != '($0.00 Savings)':
        promotions.append(['discount', strike_out_price[0].text])

    # make object for each promo text instance
    for promo_instance in promotions:
        scraped_promotion.promo_location = promo_instance[0]
        scraped_promotion.promo_text = promo_instance[1]

        # time variables
        scraped_promotion.date = datetime.date.today()
        scraped_promotion.time = datetime.datetime.now().time()

        # add to database
        add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name,
                                           scraped_promotion.device_storage, scraped_promotion.promo_location,
                                           scraped_promotion.promo_text, scraped_promotion.url,
                                           scraped_promotion.date, scraped_promotion.time)
コード例 #7
0
def cri_scrape_homepage():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.cricketwireless.com/')
    time.sleep(3)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_promotion = ScrapedPromotion()

    # set hardcoded variables
    scraped_promotion.provider = 'cricket'
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.promo_location = 'homepage'
    scraped_promotion.device_name = 'N/A'
    scraped_promotion.device_storage = '0'
    scraped_promotion.url = driver.current_url

    # get slideshow
    main = soup.find('div', class_='main')
    for div1 in main.findAll('div', class_='constrain-width-1024'):
        deals_page_promo = format_promo_text(div1.text)
        scraped_promotion.promo_text = deals_page_promo
        print(scraped_promotion.provider, scraped_promotion.device_name, scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url,
              scraped_promotion.date, scraped_promotion.time)
        add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name,
                                           scraped_promotion.device_storage, scraped_promotion.promo_location,
                                           scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date,
                                           scraped_promotion.time)


    driver.close()
コード例 #8
0
def spr_scrape_postpaid_promotions(soup, url, device_name, device_storage):

    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'sprint'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url

    # make empty list of promotions
    promotions = []

    # promotion text under price box
    price_boxes = soup.findAll('div', class_='col-xs-24 col-lg-24 col-xl-24 mb-20 active')
    for box in price_boxes:
        if box.find('strong', class_='display-block font-size-16 font-size-md-18 color--blue').text != ' Full price':
            for li in box.findAll('li', class_='promo-item'):
                promotions.append(['banner under Sprint Flex box', li.text.replace('Detail', '').strip()])

    # crossed out price
    strike_out_price = soup.findAll('del', class_='sprint-price-del')
    if strike_out_price[0].text != '':
        promotions.append(['crossed out price', strike_out_price[0].text])

    # make object for each promo text instance
    for promo_instance in promotions:
        scraped_promotion.promo_location = promo_instance[0]
        scraped_promotion.promo_text = promo_instance[1]

        # time variables
        scraped_promotion.date = datetime.date.today()
        scraped_promotion.time = datetime.datetime.now().time()

        # add to database
        add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name,
                                           scraped_promotion.device_storage,
                                           scraped_promotion.promo_location,
                                           scraped_promotion.promo_text, scraped_promotion.url,
                                           scraped_promotion.date, scraped_promotion.time)
コード例 #9
0
def met_scrape_prepaid_promotins(soup, url, device_name, device_storage):
    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'metropcs'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url

    # make empty list of promotions
    promotions = []

    # crossed out price
    try:
        crossed_out_price = soup.find('span', class_='normal-price')
        promotions.append([
            'crossed out price',
            crossed_out_price.text.strip().replace('\n', '').replace(
                '                                ', '.')
        ])
    except AttributeError:
        crossed_out_price = ''

    # make object for each promo text instance
    for promo_instance in promotions:
        scraped_promotion.promo_location = promo_instance[0]
        scraped_promotion.promo_text = promo_instance[1]

        # time variables
        scraped_promotion.date = datetime.date.today()
        scraped_promotion.time = datetime.datetime.now().time()

        # add to database
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)
コード例 #10
0
def xfi_scrape_prepaid_promotins(url, device_name, device_storage,
                                 description):
    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'xfinity'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url
    scraped_promotion.promo_text = description
    scraped_promotion.promo_location = 'description'

    # time variables
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()

    # add to database
    add_scraped_promotions_to_database(
        scraped_promotion.provider, scraped_promotion.device_name,
        scraped_promotion.device_storage, scraped_promotion.promo_location,
        scraped_promotion.promo_text, scraped_promotion.url,
        scraped_promotion.date, scraped_promotion.time)
コード例 #11
0
def att_scrape_postpaid_promotions(soup, url, device_name, device_storage):
    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'att'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url

    # make empty list of promotions
    promotions = []

    # upper banner text
    for span in soup.findAll("span", class_="offerTxt"):
        if span.text.strip() != '':
            upper_banner_text = span.text.strip()
            promotions.append(['upper banner', upper_banner_text])

    # lower banner text
    for div in soup.findAll("div", class_="ds2MarketingMessageTextStyle"):
        promotions.append(['lower banner', div.text])

    # make object for each promo text instance
    for promo_instance in promotions:
        scraped_promotion.promo_location = promo_instance[0]
        scraped_promotion.promo_text = promo_instance[1]

        # hardcoded variables
        scraped_promotion.date = datetime.date.today()
        scraped_promotion.time = datetime.datetime.now().time()

        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)
コード例 #12
0
def cri_scrape_prepaid_promotions(driver, url, device_name, device_storage):
    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'cricket'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url

    # make empty list of promotions
    promotions = []

    # sale price
    try:
        sale_price = driver.find_element_by_xpath(
            '//*[@id="pricingWrapper"]/div[1]/div[1]')
        promotions.append([
            'sale price',
            sale_price.text.strip().replace('\n', '').replace(
                '                           ', '')
        ])
    except NoSuchElementException:
        sale_price = ''

    # make object for each promo text instance
    for promo_instance in promotions:
        scraped_promotion.promo_location = promo_instance[0]
        scraped_promotion.promo_text = promo_instance[1]
        scraped_promotion.date = datetime.date.today()
        scraped_promotion.time = datetime.datetime.now().time()

        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)
コード例 #13
0
def att_scrape_homepage():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(6)

    # go to website
    driver.get('https://www.att.com/')
    time.sleep(10)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_promotion = ScrapedPromotion()

    # set hardcoded variables
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.provider = 'att'
    scraped_promotion.promo_location = 'homepage'
    scraped_promotion.url = driver.current_url
    scraped_promotion.device_storage = '0'

    for slideshow in soup.findAll('div', class_='content-wrapper'):
        deals_page_promo = slideshow.text.strip().replace('\n', '')
        scraped_promotion.promo_text = deals_page_promo
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    for div in soup.findAll('div', class_='item-wrapper')[1:]:
        deals_page_promo = div.text.strip().replace('\n', '')
        scraped_promotion.promo_text = deals_page_promo
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)
        item_details = div.find("div", {"class": "legal"})
        legal_link = "https://www.att.com/" + item_details.a["data-ajaxdata"]
        driver.get(legal_link)
        time.sleep(2)
        html = driver.page_source
        legal_soup = BeautifulSoup(html, "html.parser")
        for legal_terms in legal_soup.body.findAll("div")[1:]:
            scraped_promotion.promo_text = "LEGAL TERMS: " + legal_terms.text.strip(
            )
            add_scraped_promotions_to_database(
                scraped_promotion.provider, scraped_promotion.device_name,
                scraped_promotion.device_storage,
                scraped_promotion.promo_location, scraped_promotion.promo_text,
                scraped_promotion.url, scraped_promotion.date,
                scraped_promotion.time)

    for row in soup.findAll('div', class_='row no-flex'):
        deals_page_promo = row.text.strip().replace('\n', '')
        scraped_promotion.promo_text = deals_page_promo
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    driver.quit()
コード例 #14
0
def tmo_scrape_postpaid_smartphone_prices():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.t-mobile.com/')
    time.sleep(5)

    # go to Phones url from homepage (since url could change)
    driver.find_element_by_link_text('PHONES').click()
    time.sleep(20)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # hardcoded variables
    scraped_postpaid_price.provider = 'tmobile'
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()

    errors = []

    # get device name and url from device landing page
    for tariff in soup.findAll(
            'div', class_='tile col-lg-3 col-md-4 col-sm-6 col-xs-12'):

        device_contents = tariff.find(
            'a',
            class_='m-b-5 product-name text-center regular block ng-binding')
        scraped_postpaid_price.device = device_parser(device_contents.text)

        if scraped_postpaid_price.device.find('certified pre-owned') == -1 and \
                scraped_postpaid_price.device.find('linelink') == -1 and \
                scraped_postpaid_price.device.find('sim') == -1 and \
                scraped_postpaid_price.device.find('flip') == -1:

            scraped_postpaid_price.url = 'https://www.t-mobile.com/' + device_contents[
                'href']

            promo_text = tariff.find('div', class_='offerTextHeight').text
            if promo_text != '':
                add_scraped_promotions_to_database(
                    scraped_postpaid_price.provider,
                    scraped_postpaid_price.device, '0', 'device landing page',
                    promo_text, scraped_postpaid_price.url,
                    scraped_postpaid_price.date, scraped_postpaid_price.time)

            # go to individual page
            driver.get(scraped_postpaid_price.url)
            time.sleep(6)
            html = driver.page_source
            device_soup = BeautifulSoup(html, "html.parser")

            # try/except in case of no price (coming soon, etc.)
            try:

                # iterate through storage sizes
                for memory_button in device_soup.findAll('a',
                                                         class_='memory-btn'):

                    # record storage size and url
                    scraped_postpaid_price.storage = memory_button.text.replace(
                        'GB', '').strip()
                    scraped_postpaid_price.url = scraped_postpaid_price.url.split(
                        '?memory='
                    )[0] + '?memory=' + scraped_postpaid_price.storage + 'gb'
                    driver.get(scraped_postpaid_price.url)
                    time.sleep(5)
                    html = driver.page_source
                    device_soup = BeautifulSoup(html, "html.parser")

                    if len(device_soup.findAll('div',
                                               class_='price-lockup')) > 1:
                        downpayment_and_retail = device_soup.findAll(
                            'span',
                            class_='cost-price font-tele-ult ng-binding')
                        scraped_postpaid_price.onetime_price = downpayment_and_retail[
                            0].text
                        scraped_postpaid_price.retail_price = downpayment_and_retail[
                            1].text.replace(',', '')
                        scraped_postpaid_price.monthly_price = monthly_price_parser(
                            device_soup.find(
                                'p',
                                class_='small font-tele-nor m-t-10 ng-binding'
                            ).text)
                    else:
                        scraped_postpaid_price.onetime_price = device_soup.find(
                            'span',
                            class_='cost-price font-tele-ult ng-binding').text

                    # add to database
                    remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                              scraped_postpaid_price.device,
                                              scraped_postpaid_price.storage,
                                              scraped_postpaid_price.date)
                    add_postpaid_to_database(
                        scraped_postpaid_price.provider,
                        scraped_postpaid_price.device,
                        scraped_postpaid_price.storage,
                        scraped_postpaid_price.monthly_price,
                        scraped_postpaid_price.onetime_price,
                        scraped_postpaid_price.retail_price,
                        scraped_postpaid_price.contract_ufc,
                        scraped_postpaid_price.url,
                        scraped_postpaid_price.date,
                        scraped_postpaid_price.time)

                    tmo_scrape_postpaid_promotions(
                        driver, device_soup, scraped_postpaid_price.url,
                        scraped_postpaid_price.device,
                        scraped_postpaid_price.storage)

            except AttributeError:
                errors.append(scraped_postpaid_price.device)
                pass

    print("pricing errors: ", errors)
    driver.quit()
コード例 #15
0
def att_scrape_postpaid_smartphone_prices():
    # headless Chrome
    chrome_options = Options()
    # chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.att.com/shop/wireless/devices/cellphones.html')
    time.sleep(2)

    # check if all devices are shown on page
    devices_shown = driver.find_element_by_class_name(
        'deviceCount').text.split(' ')[-1]
    devices_total = driver.find_element_by_class_name('deviceSize').text
    if devices_shown != devices_total:
        # click 'Show All' button if it exists
        driver.find_element_by_id("deviceShowAllLink").click()

    # load page and get soup
    time.sleep(5)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # set hardcoded variables
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()
    scraped_postpaid_price.provider = 'att'

    # parse through device tiles
    for device in soup.findAll("div", class_="list-item"):

        device_contents = device.find("a", class_="titleURLchng")
        scraped_postpaid_price.device = brandparser(
            parser(device_contents.text)).lower()
        if scraped_postpaid_price.device.find("pre-owned") != -1 or scraped_postpaid_price.device.find("flip") != -1 or \
                scraped_postpaid_price.device.find("wireless") != -1 or scraped_postpaid_price.device.find("b470") != -1 or \
                scraped_postpaid_price.device.find("xp5s") != -1 or scraped_postpaid_price.device.find("duraxe") != -1 or \
                scraped_postpaid_price.device.find("certified") != -1:
            continue
        scraped_postpaid_price.url = 'https://www.att.com' + device_contents[
            'href']

        deal_landing_page_promo = device.findAll("div", class_="holidayFlag")
        if len(deal_landing_page_promo) == 2:
            add_scraped_promotions_to_database(
                scraped_postpaid_price.provider, scraped_postpaid_price.device,
                '0', 'device landing page',
                deal_landing_page_promo[1].img['title'],
                scraped_postpaid_price.url, scraped_postpaid_price.date,
                scraped_postpaid_price.time)

        # go to url and get storage size
        driver.get(scraped_postpaid_price.url)
        time.sleep(5)
        html = driver.page_source
        device_soup = BeautifulSoup(html, "html.parser")

        # read size from size button that is in html even if it is not visible on page
        # iterate through each size
        button_number = 0
        for button in device_soup.findAll('button', class_='preSize'):

            # go back to base web page if there is more than one button
            if button_number > 0:
                driver.get(scraped_postpaid_price.url)
                time.sleep(3)

            device_storage = button.text.replace('GB', '').strip()
            if 'MB' in device_storage:
                device_storage = device_storage.replace('MB', '')
                device_storage = '{: .2f}'.format(int(device_storage) * 0.001)

            # set object's storage size
            scraped_postpaid_price.storage = device_storage
            size_id = 'size_' + scraped_postpaid_price.storage + 'GB'
            size = driver.find_element_by_id(size_id)

            # click on size that was recorded as storage if there is more than one storage size
            if len(device_soup.findAll('button', class_='preSize')) != 1:

                # if popup is there, click it and make it go away
                try:
                    size.click()
                except WebDriverException:
                    driver.find_element_by_xpath(
                        '//*[@id="acsMainInvite"]/a').click()
                    size.click()

                time.sleep(3)
                html = driver.page_source
                device_soup = BeautifulSoup(html, "html.parser")

            # get promotions
            att_scrape_postpaid_promotions(device_soup,
                                           scraped_postpaid_price.url,
                                           scraped_postpaid_price.device,
                                           scraped_postpaid_price.storage)

            # # iphone shipment
            # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\
            #         scraped_postpaid_price.device == "iphone xs max":
            #     color_buttons = device_soup.findAll("button", {"ddh-color-and-data-capacity-item": "color"})
            #
            #     for color_button in color_buttons:
            #
            #         # get object's color
            #         color_id = 'color_' + color_button.text.strip()
            #         color = driver.find_element_by_id(color_id)
            #
            #         # if popup is there, click it and make it go away
            #         try:
            #             color.click()
            #         except WebDriverException:
            #             driver.find_element_by_xpath('//*[@id="acsMainInvite"]/a').click()
            #             color.click()
            #
            #         time.sleep(2)
            #         html = driver.page_source
            #         device_soup = BeautifulSoup(html, "html.parser")
            #
            #         shipment_text_outer = device_soup.find("div", {"class": "checkInstoreDeliveryIconSuccess ng-scope"})
            #         shipment_text = shipment_text_outer.find("span", {"class": "ng-binding"}).text
            #
            #         print(color_button.text.strip(), scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time)
            #         add_iphone_shipment_to_database(color_button.text.strip(), scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time)

            # get sku for correct url and config_url
            try:
                sku = device_soup.find(id='skuIDToDisplay').text.strip()
            except AttributeError:
                sku = 'sku' + device_soup.find('att-product-viewer')['skuid']

            # set url and config_url for object
            url = scraped_postpaid_price.url.split('=sku')[0] + '=sku' + sku
            config_url = 'https://www.att.com/shop/wireless/deviceconfigurator.html?prefetched=true&sku=' + sku
            scraped_postpaid_price.config_url = config_url
            scraped_postpaid_price.url = url

            # go to config_url and get prices
            driver.get(scraped_postpaid_price.config_url)
            time.sleep(5)
            html = driver.page_source
            device_soup = BeautifulSoup(html, "html.parser")
            if len(
                    device_soup.findAll(
                        'div',
                        class_='row-fluid-nowrap posRel margin-top-5')) > 1:
                for div in device_soup.findAll(
                        'div', class_='row-fluid-nowrap posRel margin-top-5'):
                    for span in div.findAll(
                            'span',
                            class_=
                            'text-xlarge margin-right-5 adjustLetterSpace ng-binding ng-scope'
                    ):
                        if span.text == 'AT&T Next Every Year℠':
                            contract_prices = div.findAll(
                                'div',
                                class_=
                                'attGray text-cramped text-xlarge text-nowrap pad-bottom-10'
                            )
                            scraped_postpaid_price.onetime_price = contract_prices[
                                0].text.replace("$", "").strip()
                            scraped_postpaid_price.monthly_price = contract_prices[
                                1].text.replace("$",
                                                "").replace("Monthly",
                                                            "").strip()
                        if span.text == 'No annual contract':
                            no_contract_prices = div.findAll(
                                'div',
                                class_=
                                'attGray text-cramped text-xlarge text-nowrap pad-bottom-10'
                            )
                            scraped_postpaid_price.retail_price = no_contract_prices[
                                0].text.replace(',', '').replace("$",
                                                                 "").strip()
            else:
                for div in device_soup.findAll(
                        'div', class_='row-fluid-nowrap posRel margin-top-5'):
                    for span in div.findAll(
                            'span',
                            class_=
                            'text-xlarge margin-right-5 adjustLetterSpace ng-binding ng-scope'
                    ):
                        if span.text == 'No annual contract':
                            no_contract_prices = div.findAll(
                                'div',
                                class_=
                                'attOrange text-cramped text-xlarge text-nowrap pad-bottom-10'
                            )
                            scraped_postpaid_price.retail_price = no_contract_prices[
                                0].text.replace("$", "").strip()

            remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                      scraped_postpaid_price.device,
                                      scraped_postpaid_price.storage,
                                      scraped_postpaid_price.date)
            add_postpaid_to_database(scraped_postpaid_price.provider,
                                     scraped_postpaid_price.device,
                                     scraped_postpaid_price.storage,
                                     scraped_postpaid_price.monthly_price,
                                     scraped_postpaid_price.onetime_price,
                                     scraped_postpaid_price.retail_price,
                                     scraped_postpaid_price.contract_ufc,
                                     scraped_postpaid_price.url,
                                     scraped_postpaid_price.date,
                                     scraped_postpaid_price.time)

            button_number += 1

    driver.quit()
コード例 #16
0
def spr_scrape_postpaid_smartphone_prices():
    # go to website
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.get('https://www.sprint.com/en/shop/cell-phones.html')

    # get soup
    driver.get(
        "https://www.sprint.com/en/shop/cell-phones.html?INTNAV=TopNav:Shop:AllPhones&credit=A2&sort=FEATURED"
    )
    time.sleep(5)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")
    driver.close()

    # make scraper object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # set hardcoded variables
    scraped_postpaid_price.provider = 'sprint'
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()

    # iterate through devices on landing page
    for device_tile in soup.findAll(
            'li',
            class_='col-xs-24 col-sm-12 col-lg-8 text-center device-tile'):

        # get device name text
        device_name = device_tile.find("h3", {
            "class":
            "font-size-18 line-height-24 font-normal my-0 align-left"
        }).text.strip().lower()

        # eliminate out of scope devices
        if device_name.find("linelink") != -1 or device_name.find("pre-owned") != -1 or device_name.find("flip") != -1 \
                or device_name.find("xp5s") != -1 or device_name.find("duraxtp") != -1 or device_name.find("duratr") != -1 \
                or device_name.find("xp strike") != -1 or device_name.find("certified") != -1:
            continue

        # device name
        scraped_postpaid_price.device = device_parser(device_name)

        # url
        scraped_postpaid_price.url = "https://www.sprint.com" + device_tile.find(
            "a")["href"]

        # promo text for device landing page
        try:
            promo_text = device_tile.find("span", {
                "class": "color--purple font-size-14"
            }).text.strip()
        except AttributeError:
            promo_text = ''
        add_scraped_promotions_to_database(scraped_postpaid_price.provider,
                                           scraped_postpaid_price.device, '0',
                                           'device landing page', promo_text,
                                           scraped_postpaid_price.url,
                                           scraped_postpaid_price.date,
                                           scraped_postpaid_price.time)

        # go to url
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--window-size=1920x1080")
        chrome_driver = os.getcwd() + "\\chromedriver.exe"
        driver = webdriver.Chrome(chrome_options=chrome_options,
                                  executable_path=chrome_driver)
        driver.implicitly_wait(5)
        driver.get(scraped_postpaid_price.url)
        time.sleep(5)
        html = driver.page_source
        device_soup = BeautifulSoup(html, "html.parser")

        # if 404 error, stop program
        site_title = device_soup.find_all("title")
        if '404' in str(site_title) or not site_title:
            print('404 Error or page error: ' + scraped_postpaid_price.device)
            continue

        else:

            # click on drop down menu and record device sizes
            size_selector = driver.find_element_by_id(
                'sprint_storage_selector')
            sizes = size_selector.text.strip().replace(' GB', '')
            sizes = sizes.split('\n')

            # iterate through sizes
            for size in sizes:

                # click on size and reload page
                size_selector.click()
                time.sleep(2)
                select = Select(
                    driver.find_element_by_id('sprint_storage_selector'))
                select.select_by_value(size)
                time.sleep(6)
                html = driver.page_source
                device_soup = BeautifulSoup(html, "html.parser")

                # record device size
                scraped_postpaid_price.storage = size

                # # iphone shipment
                # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\
                #         scraped_postpaid_price.device == "iphone xs max":
                #
                #     # click on drop down menu and record device colors
                #     div_number = device_soup.find("div", {"data-color-set-storage-key": size})["data-color-set"]
                #     color_selector = driver.find_element_by_xpath(
                #         '/html/body/div[1]/article/div[3]/div[1]/div[1]/div[1]/div/div/div[2]/div[4]/div[1]/div/div['+str(div_number)+']/div/div/select')
                #
                #     color_selector.click()
                #     time.sleep(2)
                #     colors = color_selector.text.strip()
                #     colors = colors.split('\n')
                #
                #     # iterate through colors
                #     for color in colors:
                #         # click on size and reload page
                #         select = Select(driver.find_element_by_xpath(
                #             '/html/body/div[1]/article/div[3]/div[1]/div[1]/div[1]/div/div/div[2]/div[4]/div[1]/div/div['+str(div_number)+']/div/div/select'))
                #         select.select_by_visible_text(color)
                #         time.sleep(6)
                #         html = driver.page_source
                #         device_soup = BeautifulSoup(html, "html.parser")
                #
                #         shipment_text = device_soup.find("div", {"class": "col-xs-24 col-lg-auto mb-20 mb-lg-0"}).find("span", {"class": "font-medium"}).text.strip()
                #
                #         print(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time)
                #         add_iphone_shipment_to_database(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time)

                # record current url
                scraped_postpaid_price.url = driver.current_url

                # initialize price variables
                scraped_postpaid_price.monthly_price = '0.00'
                scraped_postpaid_price.retail_price = '0.00'
                scraped_postpaid_price.onetime_price = '0.00'

                # get prices
                for label in device_soup.findAll(
                        'label', class_='soar-selection__label'):
                    if label.find(
                            'strong').text == ' Sprint Flex 18-mo. lease':
                        monthly = label.findAll('span', class_='display-block')
                        scraped_postpaid_price.monthly_price = price_parser(
                            monthly[0].text.strip())
                        scraped_postpaid_price.onetime_price = price_parser(
                            monthly[1].text.strip())
                    if label.find('strong').text == ' Full price':
                        retail = label.findAll('span', class_='display-block')
                        scraped_postpaid_price.retail_price = price_parser(
                            retail[1].text.strip().replace(',', ''))

                # if page didn't load all the way
                if scraped_postpaid_price.onetime_price == '0.00' and scraped_postpaid_price.monthly_price == '0.00':

                    # close and reload page
                    driver.close()
                    chrome_options = Options()
                    chrome_options.add_argument("--headless")
                    chrome_options.add_argument("--window-size=1920x1080")
                    chrome_driver = os.getcwd() + "\\chromedriver.exe"
                    driver = webdriver.Chrome(chrome_options=chrome_options,
                                              executable_path=chrome_driver)
                    driver.get(scraped_postpaid_price.url)
                    time.sleep(5)
                    html = driver.page_source
                    device_soup = BeautifulSoup(html, "html.parser")

                    # get prices again
                    for label in device_soup.findAll(
                            'label', class_='soar-selection__label'):
                        if label.find(
                                'strong').text == ' Sprint Flex 18-mo. lease':
                            monthly = label.findAll('span',
                                                    class_='display-block')
                            scraped_postpaid_price.monthly_price = price_parser(
                                monthly[0].text.strip())
                            scraped_postpaid_price.onetime_price = price_parser(
                                monthly[1].text.strip())
                        if label.find('strong').text == ' Full price':
                            retail = label.findAll('span',
                                                   class_='display-block')
                            scraped_postpaid_price.retail_price = price_parser(
                                retail[1].text.strip().replace(',', ''))

                # add to database
                remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                          scraped_postpaid_price.device,
                                          scraped_postpaid_price.storage,
                                          scraped_postpaid_price.date)
                add_postpaid_to_database(scraped_postpaid_price.provider,
                                         scraped_postpaid_price.device,
                                         scraped_postpaid_price.storage,
                                         scraped_postpaid_price.monthly_price,
                                         scraped_postpaid_price.onetime_price,
                                         scraped_postpaid_price.retail_price,
                                         scraped_postpaid_price.contract_ufc,
                                         scraped_postpaid_price.url,
                                         scraped_postpaid_price.date,
                                         scraped_postpaid_price.time)
                spr_scrape_postpaid_promotions(device_soup,
                                               scraped_postpaid_price.url,
                                               scraped_postpaid_price.device,
                                               scraped_postpaid_price.storage)

    driver.quit()
コード例 #17
0
def ver_scrape_postpaid_promotions(soup, driver, url, device_name, device_storage):
    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'verizon'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url

    # make empty list of promotions
    promotions = []

    # alternate way to get banner text
    upper_banner_text_2 = driver.find_element_by_class_name('clearfix')
    if upper_banner_text_2.text.strip() != '':
        promotions.append(['upper banner', upper_banner_text_2.text.strip()])

    # crossed out price
    pricing_options = soup.findAll('div', class_='pad8 noRightPad')
    for div in pricing_options:
        if 'was' in div.text:
            promotions.append(['crossed out price', div.text.replace('2-Year Contract', ' 2-Year Contract').replace('24 Monthly Payments',' 24 Monthly Payments').replace('was ', ' was')])

    # each payment option has its own banners
    for option in range(1, len(pricing_options) + 1):
        option_button = driver.find_element_by_xpath('//*[@id="tile_container"]/div[1]/div[3]/div[1]/div/div[2]/div/div/div[1]/div/div[' + str(option) + ']/div/div/div')

        # PAYMENT LEVEL
        # click on different payment options to show different promos
        # if popup is there, remove it before clicking
        try:
            option_button.click()
        except WebDriverException:
            driver.find_element_by_class_name('fsrCloseBtn').click()
            option_button.click()
        time.sleep(2)
        html = driver.page_source
        soup = BeautifulSoup(html, "html.parser")

        # promotion text above device icon
        try:
            banner_above_icon = soup.find('div', class_='offersPad fontSize_12 lineHeight8')
            promotions.append(['banner above device icon', banner_above_icon.text.replace('Special Offer', '').replace('See the details', '').replace('\n', '')])
        except AttributeError:
            banner_above_icon = ''

        # banner under price
        below_price_banner = soup.find('div', class_='row padTop6 noSideMargin priceLabel').text
        if below_price_banner != 'Retail Price' and below_price_banner != 'Early Termination Fee: $175 (2-Year Contracts)':
            promotions.append(['banner below price', below_price_banner])

    # make object for each promo text instance
    for promo_instance in promotions:
        scraped_promotion.promo_location = promo_instance[0]
        scraped_promotion.promo_text = promo_instance[1]

        # hardcoded variables
        scraped_promotion.date = datetime.date.today()
        scraped_promotion.time = datetime.datetime.now().time()

        # add to database
        add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name,
                                           scraped_promotion.device_storage, scraped_promotion.promo_location,
                                           scraped_promotion.promo_text, scraped_promotion.url,
                                           scraped_promotion.date, scraped_promotion.time)
コード例 #18
0
def cri_scrape_deals_page():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.cricketwireless.com/current-phone-and-plan-deals')
    time.sleep(3)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_promotion = ScrapedPromotion()

    # set hardcoded variables
    scraped_promotion.provider = 'cricket'
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.promo_location = 'deals page'
    scraped_promotion.device_name = 'N/A'
    scraped_promotion.device_storage = '0'
    scraped_promotion.url = driver.current_url

    # get big green promo banner
    for div in soup.findAll('div', class_='hero-promo hover-item'):
        deals_page_promo = format_promo_text(div.text)
        scraped_promotion.promo_text = deals_page_promo
        print(scraped_promotion.provider, scraped_promotion.device_name,
              scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text,
              scraped_promotion.url, scraped_promotion.date,
              scraped_promotion.time)
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    # get other main promotions
    for div1 in soup.findAll('div', class_='promo-content-wrapper'):
        deals_page_promo = format_promo_text(div1.text)
        scraped_promotion.promo_text = deals_page_promo
        print(scraped_promotion.provider, scraped_promotion.device_name,
              scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text,
              scraped_promotion.url, scraped_promotion.date,
              scraped_promotion.time)
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    # get promos at the bottom of the page
    for div2 in soup.findAll('div', class_='col-xs-9'):
        deals_page_promo = format_promo_text(div2.text)
        scraped_promotion.promo_text = deals_page_promo
        print(scraped_promotion.provider, scraped_promotion.device_name,
              scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text,
              scraped_promotion.url, scraped_promotion.date,
              scraped_promotion.time)
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    driver.close()
コード例 #19
0
def ver_scrape_postpaid_tablet_prices():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get("https://www.verizonwireless.com/tablets/")
    time.sleep(10)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # hardcoded variables
    scraped_postpaid_price.provider = 'verizon'
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()

    # get device names and links from the tablet landing page
    for device in soup.findAll(
            'div',
            class_=
            'cursorPointer pad15 onlySidePad tile background_supporting border_CC'
    ):

        device_contents = device.find('a')
        scraped_postpaid_price.device = brandparser(device_contents.text)
        if scraped_postpaid_price.device.find("pre-owned") != -1:
            continue
        scraped_postpaid_price.url = 'https://www.verizonwireless.com' + device_contents[
            "href"]

        promo_text = device.find('div', class_='offer-text').text
        if promo_text != '':
            add_scraped_promotions_to_database(
                scraped_postpaid_price.provider, scraped_postpaid_price.device,
                '0', 'device landing page', promo_text,
                scraped_postpaid_price.url, scraped_postpaid_price.date,
                scraped_postpaid_price.time)

        # go to url
        driver.get(scraped_postpaid_price.url)
        time.sleep(5)
        html = driver.page_source
        device_soup = BeautifulSoup(html, "html.parser")
        site_title = device_soup.find_all("title")
        if not site_title or site_title[0] == "<title></title>":
            continue

        else:
            # select each device size
            size_button_pad = device_soup.find(
                'div', class_='displayFlex rowNoWrap priceSelectorRow')
            size_buttons = size_button_pad.findAll(
                'div',
                class_=
                'grow1basis0 priceSelectorColumn radioGroup positionRelative')
            for size_button_number in range(1, len(size_buttons) + 1):

                # record new device size
                scraped_postpaid_price.storage = size_buttons[
                    size_button_number - 1].text.replace('GB', '')

                # remove popup before clicking
                try:
                    driver.find_element_by_xpath(
                        '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div['
                        + str(size_button_number) + ']/div/div/p').click()
                except WebDriverException:
                    driver.find_element_by_link_text('×').click()
                    driver.find_element_by_xpath(
                        '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div['
                        + str(size_button_number) + ']/div/div/p').click()

                # click on different storage size to show device size-specific promos
                time.sleep(2)
                html = driver.page_source
                device_soup = BeautifulSoup(html, "html.parser")

                values_list = device_soup.findAll('div', class_='sizePad')
                scraped_postpaid_price.monthly_price = monthly_price_parser(
                    values_list[-3].text)
                scraped_postpaid_price.contract_ufc = contract_ufc_parser(
                    values_list[-2].text.replace(',', ''))
                scraped_postpaid_price.retail_price = retail_price_parser(
                    values_list[-1].text.replace(',', ''))

                # remove storage from device name if it is in it
                if scraped_postpaid_price.storage in scraped_postpaid_price.device:
                    scraped_postpaid_price.device = scraped_postpaid_price.device.replace(
                        scraped_postpaid_price.storage + 'gb', '')

                remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                          scraped_postpaid_price.device,
                                          scraped_postpaid_price.storage,
                                          scraped_postpaid_price.date)
                add_postpaid_to_database(scraped_postpaid_price.provider,
                                         scraped_postpaid_price.device,
                                         scraped_postpaid_price.storage,
                                         scraped_postpaid_price.monthly_price,
                                         scraped_postpaid_price.onetime_price,
                                         scraped_postpaid_price.retail_price,
                                         scraped_postpaid_price.contract_ufc,
                                         scraped_postpaid_price.url,
                                         scraped_postpaid_price.date,
                                         scraped_postpaid_price.time)

                ver_scrape_postpaid_promotions(device_soup, driver,
                                               scraped_postpaid_price.url,
                                               scraped_postpaid_price.device,
                                               scraped_postpaid_price.storage)

    driver.close()
コード例 #20
0
def ver_scrape_postpaid_smartphone_prices():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get("https://www.verizonwireless.com/smartphones/")
    time.sleep(10)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # hardcoded variables
    scraped_postpaid_price.provider = 'verizon'
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()

    for device in soup.findAll(
            'div',
            class_=
            'cursorPointer pad15 onlySidePad tile background_supporting border_CC'
    ):

        device_contents = device.find('a')
        scraped_postpaid_price.device = brandparser(device_contents.text)
        if scraped_postpaid_price.device.find("pre-owned") != -1:
            continue
        scraped_postpaid_price.url = 'https://www.verizonwireless.com' + device_contents[
            "href"]

        promo_text = device.find('div', class_='offer-text').text
        if promo_text != '':
            add_scraped_promotions_to_database(
                scraped_postpaid_price.provider, scraped_postpaid_price.device,
                '0', 'device landing page', promo_text,
                scraped_postpaid_price.url, scraped_postpaid_price.date,
                scraped_postpaid_price.time)

        # go to url
        driver.get(scraped_postpaid_price.url)
        time.sleep(5)
        html = driver.page_source
        device_soup = BeautifulSoup(html, "html.parser")

        # select each device size
        try:
            size_button_pad = device_soup.find(
                'div', class_='displayFlex rowNoWrap priceSelectorRow')
            size_buttons = size_button_pad.findAll(
                'div',
                class_=
                'grow1basis0 priceSelectorColumn radioGroup positionRelative')
            for size_button_number in range(1, len(size_buttons) + 1):
                # record new device size
                scraped_postpaid_price.storage = size_buttons[
                    size_button_number - 1].text.replace('GB', '')

                # remove popup before clicking
                try:
                    driver.find_element_by_xpath(
                        '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div['
                        + str(size_button_number) + ']/div/div/p').click()
                except WebDriverException:
                    driver.find_element_by_class_name('fsrCloseBtn').click()
                    driver.find_element_by_xpath(
                        '//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div[2]/div/div['
                        + str(size_button_number) + ']/div/div/p').click()

                # click on different storage size to show device size-specific promos
                time.sleep(2)
                html = driver.page_source
                device_soup = BeautifulSoup(html, "html.parser")

                values_list = device_soup.findAll('div', class_='sizePad')
                scraped_postpaid_price.monthly_price = monthly_price_parser(
                    values_list[-2].text)
                scraped_postpaid_price.retail_price = retail_price_parser(
                    values_list[-1].text.replace(',', ''))

                remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                          scraped_postpaid_price.device,
                                          scraped_postpaid_price.storage,
                                          scraped_postpaid_price.date)
                add_postpaid_to_database(scraped_postpaid_price.provider,
                                         scraped_postpaid_price.device,
                                         scraped_postpaid_price.storage,
                                         scraped_postpaid_price.monthly_price,
                                         scraped_postpaid_price.onetime_price,
                                         scraped_postpaid_price.retail_price,
                                         scraped_postpaid_price.contract_ufc,
                                         scraped_postpaid_price.url,
                                         scraped_postpaid_price.date,
                                         scraped_postpaid_price.time)

                ver_scrape_postpaid_promotions(device_soup, driver,
                                               scraped_postpaid_price.url,
                                               scraped_postpaid_price.device,
                                               scraped_postpaid_price.storage)

                # # iphone shipment
                # if scraped_postpaid_price.device == "iphone xr" or scraped_postpaid_price.device == "iphone xs" or\
                #         scraped_postpaid_price.device == "iphone xs max":
                #
                #     color_button_pad = device_soup.find('div', class_='flex width100p pad15 onlyBottomPad')
                #     color_buttons = color_button_pad.findAll('div', class_='col-xs-3 textAlignCenter noSidePad radioGroup positionRelative')
                #     for color_button_number in range(1, len(color_buttons) + 1):
                #
                #         color = color_buttons[color_button_number - 1].text
                #         driver.find_element_by_xpath('//*[@id="tile_container"]/div[1]/div[2]/div/div/div[2]/div/div/div[1]/div/div[' + str(
                #                     color_button_number) + ']/div').click()
                #
                #         time.sleep(1)
                #         html = driver.page_source
                #         device_soup = BeautifulSoup(html, "html.parser")
                #
                #         shipment_text = device_soup.find("div", {"class": "col-xs-6 noSidePad inStoreAvail"}).span.text
                #
                #         print(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(), scraped_postpaid_price.date, scraped_postpaid_price.time)
                #         add_iphone_shipment_to_database(color, scraped_postpaid_price.device, scraped_postpaid_price.storage, scraped_postpaid_price.provider, shipment_text.strip(),scraped_postpaid_price.date, scraped_postpaid_price.time)

        except AttributeError:
            pass

    driver.close()
コード例 #21
0
def spr_scrape_postpaid_tablet_prices():
    # go to website
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.get(
        'https://www.sprint.com/en/shop/tablets.html?INTNAV=TopNav:Shop:Tablets&credit=A2&sort=FEATURED'
    )
    time.sleep(5)

    # get soup
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")
    driver.close()

    # make scraper object
    scraped_postpaid_price = ScrapedPostpaidPrice()

    # set hardcoded variables
    scraped_postpaid_price.provider = 'sprint'
    scraped_postpaid_price.date = datetime.date.today()
    scraped_postpaid_price.time = datetime.datetime.now().time()

    # iterate through devices on landing page
    for device_tile in soup.findAll(
            'li',
            class_='col-xs-24 col-sm-12 col-lg-8 text-center device-tile'):

        # get device name text
        device_name = device_tile.find("h3", {
            "class":
            "font-size-18 line-height-24 font-normal my-0 align-left"
        }).text.strip().lower()

        # eliminate out of scope devices
        if device_name.find("linelink") != -1 or device_name.find("pre-owned") != -1 or device_name.find("flip") != -1 \
                or device_name.find("sim") != -1 or device_name.find("duraxtp") != -1 or device_name.find("duratr") != -1 \
                or device_name.find("xp strike") != -1 or device_name.find("certified") != -1:
            continue

        # device name
        scraped_postpaid_price.device = device_parser(device_name)

        # url
        scraped_postpaid_price.url = "https://www.sprint.com" + device_tile.find(
            "a")["href"]

        # promo text for device landing page & add to database
        try:
            promo_text = device_tile.find("span", {
                "class": "color--purple font-size-14"
            }).text.strip()
        except AttributeError:
            promo_text = ''
        add_scraped_promotions_to_database(scraped_postpaid_price.provider,
                                           scraped_postpaid_price.device, '0',
                                           'device landing page', promo_text,
                                           scraped_postpaid_price.url,
                                           scraped_postpaid_price.date,
                                           scraped_postpaid_price.time)

        # go to url
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--window-size=1920x1080")
        chrome_driver = os.getcwd() + "\\chromedriver.exe"
        driver = webdriver.Chrome(chrome_options=chrome_options,
                                  executable_path=chrome_driver)
        driver.implicitly_wait(5)
        driver.get(scraped_postpaid_price.url)
        time.sleep(5)
        html = driver.page_source
        device_soup = BeautifulSoup(html, "html.parser")

        # if 404 error, stop program
        site_title = device_soup.find_all("title")
        if '404' in str(site_title):
            print('404 Error: ' + scraped_postpaid_price.device)
            continue

        # click on drop down menu and record device sizes
        size_selector = driver.find_element_by_id('sprint_storage_selector')
        size_selector.click()
        time.sleep(2)
        sizes = size_selector.text.strip().replace(' GB', '')
        sizes = sizes.split('\n')

        # iterate through sizes
        for size in sizes:

            # click on size and reload page
            select = Select(
                driver.find_element_by_id('sprint_storage_selector'))
            select.select_by_value(size)
            time.sleep(2)
            html = driver.page_source
            device_soup = BeautifulSoup(html, "html.parser")

            # record device size
            scraped_postpaid_price.storage = size

            # initialize price variables
            scraped_postpaid_price.monthly_price = '0.00'
            scraped_postpaid_price.retail_price = '0.00'
            scraped_postpaid_price.onetime_price = '0.00'

            # get prices
            for label in device_soup.findAll('label',
                                             class_='soar-selection__label'):
                if label.find('strong'
                              ).text == ' Buy it with 24 monthly installments':
                    monthly = label.findAll('span', class_='display-block')
                    scraped_postpaid_price.monthly_price = price_parser(
                        monthly[0].text.strip())
                    scraped_postpaid_price.onetime_price = price_parser(
                        monthly[1].text.strip())
                if label.find('strong').text == ' Full price':
                    retail = label.findAll('span', class_='display-block')
                    scraped_postpaid_price.retail_price = price_parser(
                        retail[1].text.strip())

            # add to database
            remove_postpaid_duplicate(scraped_postpaid_price.provider,
                                      scraped_postpaid_price.device,
                                      scraped_postpaid_price.storage,
                                      scraped_postpaid_price.date)
            add_postpaid_to_database(scraped_postpaid_price.provider,
                                     scraped_postpaid_price.device,
                                     scraped_postpaid_price.storage,
                                     scraped_postpaid_price.monthly_price,
                                     scraped_postpaid_price.onetime_price,
                                     scraped_postpaid_price.retail_price,
                                     scraped_postpaid_price.contract_ufc,
                                     scraped_postpaid_price.url,
                                     scraped_postpaid_price.date,
                                     scraped_postpaid_price.time)
            spr_scrape_postpaid_promotions(device_soup,
                                           scraped_postpaid_price.url,
                                           scraped_postpaid_price.device,
                                           scraped_postpaid_price.storage)

    driver.quit()