コード例 #1
0
def test_search_nologin_no_captcha(test_input, headless, court_scraper_dir):
    place_id, url, case_ids = test_input
    site = OdysseySite(place_id,
                       url=url,
                       download_dir=court_scraper_dir,
                       headless=headless)
    results = site.search(case_numbers=case_ids)
    assert len(results) == 1
コード例 #2
0
def test_maximize_displayed_results(test_input, headless, court_scraper_dir):
    "should automatically maximize the number of results displayed on results page"
    place_id, url, case_ids = test_input
    site = OdysseySite(place_id,
                       url=url,
                       download_dir=court_scraper_dir,
                       headless=headless)
    results = site.search(case_numbers=case_ids)
    assert len(results) == 91
コード例 #3
0
def test_login(webdriver_mock, login_page_mock):
    site = OdysseySite('http://somesite.com', '/tmp/some_path/')
    # Web driver instantiated during Site class initialization
    assert webdriver_mock.Chrome.called
    # Login requires password
    site.login('user', 'pass')
    # login method goes to login page and submits user creds
    expected_calls = [call().go_to(), call().login()]
    actual_calls = login_page_mock.mock_calls
    for expected in expected_calls:
        assert expected in actual_calls
コード例 #4
0
def test_search(test_input, headless, live_configs, court_scraper_dir):
    auth = live_configs['ga_dekalb']
    username = auth['username']
    password = auth['password']
    place_id, url, case_ids = test_input
    site = OdysseySite(place_id,
                       url=url,
                       download_dir=court_scraper_dir,
                       headless=headless)
    site.login(username, password)
    results = site.search(case_numbers=case_ids)
    assert len(results) == 1
    # Does *not* scrape Case Detail page (HTML) by default
    assert 'page_source' not in results[0].data.keys()
コード例 #5
0
def test_malformed_result_listing(test_input, headless, live_configs,
                                  court_scraper_dir):
    "should handle result listings that have an extra leading blank cell"
    auth = live_configs['ga_dekalb']
    username = auth['username']
    password = auth['password']
    place_id, url, case_ids = test_input
    site = OdysseySite(place_id,
                       url=url,
                       download_dir=court_scraper_dir,
                       headless=headless)
    site.login(username, password)
    results = site.search(case_numbers=case_ids)
    assert len(results) == 1
    assert results[0].data['File Date'] == '10/02/2019'
コード例 #6
0
def test_scrape_case_details(test_input, headless, live_configs,
                             court_scraper_dir):
    "should support optional scraping of case details"
    auth = live_configs['ga_dekalb']
    username = auth['username']
    password = auth['password']
    place_id, url, case_ids = test_input
    site = OdysseySite(place_id,
                       url=url,
                       download_dir=court_scraper_dir,
                       headless=headless)
    site.login(username, password)
    results = site.search(case_numbers=case_ids, case_details=True)
    # Should have case detail HTML stored on return object
    assert len(results) == 1
    assert 'page_source' in results[0].data.keys()