def run_process(browser, page_number=1): if connect_to_base(browser, page_number): print(f'Scraping page {page_number}...') sleep(2) html = browser.page_source return parse_html(html) else: return False
def run_process(page_number, filename, browser): if connect_to_base(browser, page_number): sleep(2) html = browser.page_source output_list = parse_html(html) write_to_file(output_list, filename) else: print("Error connecting to hacker news")
def run_process(rowser): if connect_to_base(browser): print(f'Scraping random Wikipedia page...') sleep(2) html = browser.page_source return parse_html(html) else: print("Error connecting to Wikipedia") return False
def run_process(page_number, filename): browser = get_driver() if connect_to_base(browser, page_number): sleep(2) html = browser.page_source output_list = parse_html(html) write_to_file(output_list, filename) browser.quit() else: print('Error connecting to hackernews') browser.quit()
def run_process(page_number, filename, headless): # init browser browser = get_driver(headless) if connect_to_base(browser, page_number): sleep(2) html = browser.page_source output_list = parse_html(html) write_to_file(output_list, filename) # exit browser.quit() else: print("Error connecting to hackernews") browser.quit()
# options = webdriver.ChromeOptions() # # pass in headless argument to options # options.add_argument("--headless") # # intialize driver # browser = webdriver.Chrome(options=options) # browser.get("https://www.reddit.com/r/AskReddit/") # search_form = browser.find_element_by_id("search_form_input_homepage") # search_form.send_keys("real_python") # search_form.submit() # results = browser.find_elements_by_class_name("result") # print(results[0].text) # browser.close() # quit() from scrapers.scraper import get_driver, connect_to_base if __name__ == "__main__": browser = get_driver() connect_to_base(browser)