Exemple #1
0
def run_process(browser, page_number=1):
    if connect_to_base(browser, page_number):
        print(f'Scraping page {page_number}...')
        sleep(2)
        html = browser.page_source
        return parse_html(html)
    else:
        return False
def run_process(page_number, filename, browser):
    if connect_to_base(browser, page_number):
        sleep(2)
        html = browser.page_source
        output_list = parse_html(html)
        write_to_file(output_list, filename)
    else:
        print("Error connecting to hacker news")
Exemple #3
0
def run_process(rowser):
    if connect_to_base(browser):
        print(f'Scraping random Wikipedia page...')
        sleep(2)
        html = browser.page_source
        return parse_html(html)
    else:
        print("Error connecting to Wikipedia")
        return False
Exemple #4
0
def run_process(page_number, filename):
    browser = get_driver()
    if connect_to_base(browser, page_number):
        sleep(2)
        html = browser.page_source
        output_list = parse_html(html)
        write_to_file(output_list, filename)
        browser.quit()
    else:
        print('Error connecting to hackernews')
        browser.quit()
def run_process(page_number, filename, headless):

    # init browser
    browser = get_driver(headless)

    if connect_to_base(browser, page_number):
        sleep(2)
        html = browser.page_source
        output_list = parse_html(html)
        write_to_file(output_list, filename)

        # exit
        browser.quit()
    else:
        print("Error connecting to hackernews")
        browser.quit()
Exemple #6
0
# options = webdriver.ChromeOptions()
# # pass in headless argument to options
# options.add_argument("--headless")
# # intialize driver
# browser = webdriver.Chrome(options=options)
# browser.get("https://www.reddit.com/r/AskReddit/")

# search_form = browser.find_element_by_id("search_form_input_homepage")
# search_form.send_keys("real_python")

# search_form.submit()

# results = browser.find_elements_by_class_name("result")
# print(results[0].text)
# browser.close()
# quit()

from scrapers.scraper import get_driver, connect_to_base

if __name__ == "__main__":
    browser = get_driver()
    connect_to_base(browser)