Beispiel #1
0
 def setUp(self, mock_get_load_time):
     mock_get_load_time.return_value = 'mocked!'
     with open('test.html', encoding='utf-8') as f:
         html = f.read()
         self.output = parse_html(html)
     self.driver_url="D:\Projects\PythonScraper\chromedriver.exe"
     self.db_url= r"D:\Projects\PythonScraper\scrapped.db"
Beispiel #2
0
def run_process(p_url, d_url, db_url, refresh_t):

    html = get_html(d_url, p_url, refresh_t)

    if html is not None:

        locations = parse_html(html)

        owm = pyowm.OWM(weather_app_id)  # You MUST provide a valid API key

        #create a database connection
        conn = create_connection(db_url)

        if conn is not None:

            data_delete(conn, sql_table_name)

            create_table(conn, sql_create_scrapped_table)

            for location in locations:
                temp = get_temp(owm, location)
                if temp > -255:
                    city = location.split(',')[0]
                    data_entry(conn, sql_insert_scrapped_table, [
                        city,
                        datetime.datetime.now().strftime('%Y%m%d%H%M%S'), temp,
                        evaluate_rules(city, temp, note_rules)
                    ])

            data_print(conn, sql_table_name)

            conn.close()
        else:
            print("Error! cannot create the database connection.")
Beispiel #3
0
def run_process(browser, page_number=1):
    if connect_to_base(browser, page_number):
        print(f'Scraping page {page_number}...')
        sleep(2)
        html = browser.page_source
        return parse_html(html)
    else:
        return False
Beispiel #4
0
def run_scraper(db, url, browser):
  if connect_to_site(browser, url):
    sleep(2)
    html = browser.page_source
    output_list = parse_html(html)
    save_to_db(db, output_list)
  else:
    print('Error connecting to github')
def run_process(page_number, filename, browser):
    if connect_to_base(browser, page_number):
        sleep(2)
        html = browser.page_source
        output_list = parse_html(html)
        write_to_file(output_list, filename)
    else:
        print("Error connecting to hacker news")
def html_output(monkeypatch):
    def mock_get_load_time(url):
        return "mocked!"

    monkeypatch.setattr(scraper, "get_load_time", mock_get_load_time)
    with open(Path(BASE_DIR).joinpath("test.html"), encoding="utf-8") as f:
        html = f.read()
        yield scraper.parse_html(html)
Beispiel #7
0
def run_process(rowser):
    if connect_to_base(browser):
        print(f'Scraping random Wikipedia page...')
        sleep(2)
        html = browser.page_source
        return parse_html(html)
    else:
        print("Error connecting to Wikipedia")
        return False
Beispiel #8
0
def run_process(page_number, filename):
    browser = get_driver()
    if connect_to_base(browser, page_number):
        sleep(2)
        html = browser.page_source
        output_list = parse_html(html)
        write_to_file(output_list, filename)
        browser.quit()
    else:
        print('Error connecting to hackernews')
        browser.quit()
def run_process(page_number, filename, headless):

    # init browser
    browser = get_driver(headless)

    if connect_to_base(browser, page_number):
        sleep(2)
        html = browser.page_source
        output_list = parse_html(html)
        write_to_file(output_list, filename)

        # exit
        browser.quit()
    else:
        print("Error connecting to hackernews")
        browser.quit()
Beispiel #10
0
 def setUp(self, mock_get_load_time):
     mock_get_load_time.return_value = 'mocked!'
     with open('test/test.html', encoding='utf-8') as f:
         html = f.read()
         self.output = parse_html(html)
Beispiel #11
0
def html_output():
    with open(Path(BASE_DIR).joinpath("test.html"), encoding="utf-8") as f:
        html = f.read()
        yield scraper.parse_html(html)