def test_search_nologin_no_captcha(test_input, headless, court_scraper_dir): place_id, url, case_ids = test_input site = OdysseySite(place_id, url=url, download_dir=court_scraper_dir, headless=headless) results = site.search(case_numbers=case_ids) assert len(results) == 1
def test_maximize_displayed_results(test_input, headless, court_scraper_dir): "should automatically maximize the number of results displayed on results page" place_id, url, case_ids = test_input site = OdysseySite(place_id, url=url, download_dir=court_scraper_dir, headless=headless) results = site.search(case_numbers=case_ids) assert len(results) == 91
def test_login(webdriver_mock, login_page_mock): site = OdysseySite('http://somesite.com', '/tmp/some_path/') # Web driver instantiated during Site class initialization assert webdriver_mock.Chrome.called # Login requires password site.login('user', 'pass') # login method goes to login page and submits user creds expected_calls = [call().go_to(), call().login()] actual_calls = login_page_mock.mock_calls for expected in expected_calls: assert expected in actual_calls
def test_search(test_input, headless, live_configs, court_scraper_dir): auth = live_configs['ga_dekalb'] username = auth['username'] password = auth['password'] place_id, url, case_ids = test_input site = OdysseySite(place_id, url=url, download_dir=court_scraper_dir, headless=headless) site.login(username, password) results = site.search(case_numbers=case_ids) assert len(results) == 1 # Does *not* scrape Case Detail page (HTML) by default assert 'page_source' not in results[0].data.keys()
def test_malformed_result_listing(test_input, headless, live_configs, court_scraper_dir): "should handle result listings that have an extra leading blank cell" auth = live_configs['ga_dekalb'] username = auth['username'] password = auth['password'] place_id, url, case_ids = test_input site = OdysseySite(place_id, url=url, download_dir=court_scraper_dir, headless=headless) site.login(username, password) results = site.search(case_numbers=case_ids) assert len(results) == 1 assert results[0].data['File Date'] == '10/02/2019'
def test_scrape_case_details(test_input, headless, live_configs, court_scraper_dir): "should support optional scraping of case details" auth = live_configs['ga_dekalb'] username = auth['username'] password = auth['password'] place_id, url, case_ids = test_input site = OdysseySite(place_id, url=url, download_dir=court_scraper_dir, headless=headless) site.login(username, password) results = site.search(case_numbers=case_ids, case_details=True) # Should have case detail HTML stored on return object assert len(results) == 1 assert 'page_source' in results[0].data.keys()