def setUp(self, mock_get_load_time): mock_get_load_time.return_value = 'mocked!' with open('test.html', encoding='utf-8') as f: html = f.read() self.output = parse_html(html) self.driver_url="D:\Projects\PythonScraper\chromedriver.exe" self.db_url= r"D:\Projects\PythonScraper\scrapped.db"
def run_process(p_url, d_url, db_url, refresh_t): html = get_html(d_url, p_url, refresh_t) if html is not None: locations = parse_html(html) owm = pyowm.OWM(weather_app_id) # You MUST provide a valid API key #create a database connection conn = create_connection(db_url) if conn is not None: data_delete(conn, sql_table_name) create_table(conn, sql_create_scrapped_table) for location in locations: temp = get_temp(owm, location) if temp > -255: city = location.split(',')[0] data_entry(conn, sql_insert_scrapped_table, [ city, datetime.datetime.now().strftime('%Y%m%d%H%M%S'), temp, evaluate_rules(city, temp, note_rules) ]) data_print(conn, sql_table_name) conn.close() else: print("Error! cannot create the database connection.")
def run_process(browser, page_number=1): if connect_to_base(browser, page_number): print(f'Scraping page {page_number}...') sleep(2) html = browser.page_source return parse_html(html) else: return False
def run_scraper(db, url, browser): if connect_to_site(browser, url): sleep(2) html = browser.page_source output_list = parse_html(html) save_to_db(db, output_list) else: print('Error connecting to github')
def run_process(page_number, filename, browser): if connect_to_base(browser, page_number): sleep(2) html = browser.page_source output_list = parse_html(html) write_to_file(output_list, filename) else: print("Error connecting to hacker news")
def html_output(monkeypatch): def mock_get_load_time(url): return "mocked!" monkeypatch.setattr(scraper, "get_load_time", mock_get_load_time) with open(Path(BASE_DIR).joinpath("test.html"), encoding="utf-8") as f: html = f.read() yield scraper.parse_html(html)
def run_process(rowser): if connect_to_base(browser): print(f'Scraping random Wikipedia page...') sleep(2) html = browser.page_source return parse_html(html) else: print("Error connecting to Wikipedia") return False
def run_process(page_number, filename): browser = get_driver() if connect_to_base(browser, page_number): sleep(2) html = browser.page_source output_list = parse_html(html) write_to_file(output_list, filename) browser.quit() else: print('Error connecting to hackernews') browser.quit()
def run_process(page_number, filename, headless): # init browser browser = get_driver(headless) if connect_to_base(browser, page_number): sleep(2) html = browser.page_source output_list = parse_html(html) write_to_file(output_list, filename) # exit browser.quit() else: print("Error connecting to hackernews") browser.quit()
def setUp(self, mock_get_load_time): mock_get_load_time.return_value = 'mocked!' with open('test/test.html', encoding='utf-8') as f: html = f.read() self.output = parse_html(html)
def html_output(): with open(Path(BASE_DIR).joinpath("test.html"), encoding="utf-8") as f: html = f.read() yield scraper.parse_html(html)