コード例 #1
0
def search_portal(case_number):
    """
    Performs a search of the portal from its home page, including selecting the case number input, solving the captcha
    and pressing Search. Also handles the captcha being solved incorrectly
    :param case_number: Case to search
    :return: A set of case number(s).
    """
    # Load portal search page
    load_page(f"{settings['portal-base']}/Home.aspx/Search", 'Search', settings['verbose'])
    # Give some time for the captcha to load, as it does not load instantly.
    time.sleep(0.8)

    # Select Case Number textbox and enter case number
    select_case_input()
    case_input = driver.find_element_by_id('caseNumber')
    case_input.click()
    case_input.send_keys(case_number)

    if settings['solve-captchas']:
        # Solve captcha if it is required
        try:
            # Get Captcha
            captcha_image_elem = driver.find_element_by_xpath(
                '//*/img[@alt="Captcha"]')
            captcha_buffer = captcha_image_elem.screenshot_as_png
            captcha_answer = captcha_solver.solve_captcha(captcha_buffer)
            captcha_textbox = driver.find_element_by_xpath(
                '//*/input[@name="captcha"]')
            captcha_textbox.click()
            captcha_textbox.send_keys(captcha_answer)
        except NoSuchElementException:
            # No captcha on the page, continue.
            pass

        # Do search
        search_button = driver.find_element_by_id('searchButton')
        search_button.click()
    else:
        raise Exception("Automated captcha solving is disabled by default. Please seek advice before using this feature.")

    # If the title stays as 'Search': Captcha solving failed
    # If the title contains the case number or 'Search Results': Captcha solving succeeded
    # If a timeout occurs, retry 'connect-thresh' times.
    for i in range(settings['connect-thresh']):
        try:
            # Wait for page to load
            WebDriverWait(driver, 5).until(
                lambda x: 'Search' in driver.title or case_number in driver.title or 'Search Results:' in driver.title)
            # Page loaded
            if driver.title == 'Search':
                # Clicking search did not change the page. This could be because of a failed captcha attempt.
                try:
                    # Check if 'Invalid Captcha' dialog is showing
                    driver.find_element_by_xpath(
                        '//div[@class="alert alert-error"]')
                    print("Captcha was solved incorrectly")
                    captcha_solver.notify_last_captcha_fail()
                except NoSuchElementException:
                    pass
                # Clear cookies so a new captcha is presented upon refresh
                driver.delete_all_cookies()
                # Try solving the captcha again.
                search_portal(case_number)
            elif 'Search Results: CaseNumber:' in driver.title:
                # Captcha solved correctly
                captcha_solver.notify_last_captcha_success()
                # Figure out the numer of cases returned
                case_detail_tbl = driver.find_element_by_tag_name('table').text.split('\n')
                case_count_idx = case_detail_tbl.index('CASES FOUND') + 1
                case_count = int(case_detail_tbl[case_count_idx])
                # Case number search found multiple cases.
                if case_count > 1:
                    return ScraperUtils.get_associated_cases(driver)
                # Case number search found no cases
                else:
                    return set()
            elif case_number in driver.title:
                # Captcha solved correctly
                captcha_solver.notify_last_captcha_success()
                # Case number search did find a single court case.
                return {case_number}
        except TimeoutException:
            if i == settings['connect-thresh'] - 1:
                raise RuntimeError('Case page could not be loaded after {} attempts, or unexpected page title: {}'.format(settings['connect-thresh'], driver.title))
            else:
                search_portal(case_number)
コード例 #2
0
def search_portal(case_number):
    """
    Performs a search of the portal from its home page, including selecting the case number input, solving the captcha
    and pressing Search. Also handles the captcha being solved incorrectly
    :param case_number: Case to search
    :return: A set of case number(s).
    """
    # Load portal search page
    load_page(f"{settings['portal-base']}/Home.aspx/Search", 'Search',
              settings['verbose'])
    # Give some time for the captcha to load, as it does not load instantly.
    time.sleep(0.8)

    # Select Case Number textbox and enter case number
    select_case_input()
    case_input = driver.find_element_by_id('caseNumber')
    case_input.click()
    case_input.send_keys(case_number)

    # Solve captcha if it is required
    try:
        # Get Captcha. This is kinda nasty, but if there's no Captcha, then
        # this will throw (which is a good thing in this case) and we can
        # move on with processing.
        captcha_image_elem = driver.find_element_by_xpath(
            '//*/img[@alt="Captcha"]')
        captcha_buffer = captcha_image_elem.screenshot_as_png
        if settings['solve-captchas']:
            solved_captcha = captcha_solver.solve_captcha(captcha_buffer)
            captcha_textbox = driver.find_element_by_xpath(
                '//*/input[@name="captcha"]')
            captcha_textbox.click()
            captcha_textbox.send_keys(solved_captcha.answer)

            # Do search
            search_button = driver.find_element_by_id('searchButton')
            search_button.click()
        else:
            print(f"Captcha encountered trying to view case ID {case_number}.")
            print(
                "Please solve the captcha and click the search button to proceed."
            )
            while True:
                try:
                    WebDriverWait(
                        driver, 6 * 60 *
                        60).until(lambda x: case_number in driver.title)
                    print("continuing...")
                    break
                except TimeoutException:
                    print("still waiting for user to solve the captcha...")

    except NoSuchElementException:
        # No captcha on the page, continue.
        solved_captcha = None
        # Do search
        search_button = driver.find_element_by_id('searchButton')
        search_button.click()

    # If the title stays as 'Search': Captcha solving failed
    # If the title contains the case number or 'Search Results': Captcha solving succeeded
    # If a timeout occurs, retry 'connect-thresh' times.
    for i in range(settings['connect-thresh']):
        try:
            # Wait for page to load
            WebDriverWait(
                driver,
                5).until(lambda x: 'Search' in driver.title or case_number in
                         driver.title or 'Search Results:' in driver.title)
            # Page loaded
            if driver.title == 'Search':
                # Clicking search did not change the page. This could be because of a failed captcha attempt.
                try:
                    # Check if 'Invalid Captcha' dialog is showing
                    driver.find_element_by_xpath(
                        '//div[@class="alert alert-error"]')
                    print("Captcha was solved incorrectly")
                    if settings['solve-captchas'] and solved_captcha:
                        solved_captcha.notify_incorrect()
                except NoSuchElementException:
                    pass
                # Clear cookies so a new captcha is presented upon refresh
                driver.delete_all_cookies()
                # Try solving the captcha again.
                search_portal(case_number)
            elif 'Search Results: CaseNumber:' in driver.title:
                # Captcha solved correctly
                if settings['solve-captchas'] and solved_captcha:
                    solved_captcha.notify_correct()
                case_count = ScraperUtils.get_search_case_count(
                    driver, settings['county'])
                # Case number search found multiple cases.
                if case_count > 1:
                    return ScraperUtils.get_associated_cases(driver)
                # Case number search found no cases
                else:
                    return set()
            elif case_number in driver.title:
                # Captcha solved correctly
                if settings['solve-captchas'] and solved_captcha:
                    solved_captcha.notify_correct()
                # Case number search did find a single court case.
                return {case_number}
        except TimeoutException:
            if i == settings['connect-thresh'] - 1:
                raise RuntimeError(
                    'Case page could not be loaded after {} attempts, or unexpected page title: {}'
                    .format(settings['connect-thresh'], driver.title))
            else:
                search_portal(case_number)