def search_portal(case_number): """ Performs a search of the portal from its home page, including selecting the case number input, solving the captcha and pressing Search. Also handles the captcha being solved incorrectly :param case_number: Case to search :return: A set of case number(s). """ # Load portal search page load_page(f"{settings['portal-base']}/Home.aspx/Search", 'Search', settings['verbose']) # Give some time for the captcha to load, as it does not load instantly. time.sleep(0.8) # Select Case Number textbox and enter case number select_case_input() case_input = driver.find_element_by_id('caseNumber') case_input.click() case_input.send_keys(case_number) if settings['solve-captchas']: # Solve captcha if it is required try: # Get Captcha captcha_image_elem = driver.find_element_by_xpath( '//*/img[@alt="Captcha"]') captcha_buffer = captcha_image_elem.screenshot_as_png captcha_answer = captcha_solver.solve_captcha(captcha_buffer) captcha_textbox = driver.find_element_by_xpath( '//*/input[@name="captcha"]') captcha_textbox.click() captcha_textbox.send_keys(captcha_answer) except NoSuchElementException: # No captcha on the page, continue. pass # Do search search_button = driver.find_element_by_id('searchButton') search_button.click() else: raise Exception("Automated captcha solving is disabled by default. Please seek advice before using this feature.") # If the title stays as 'Search': Captcha solving failed # If the title contains the case number or 'Search Results': Captcha solving succeeded # If a timeout occurs, retry 'connect-thresh' times. for i in range(settings['connect-thresh']): try: # Wait for page to load WebDriverWait(driver, 5).until( lambda x: 'Search' in driver.title or case_number in driver.title or 'Search Results:' in driver.title) # Page loaded if driver.title == 'Search': # Clicking search did not change the page. This could be because of a failed captcha attempt. try: # Check if 'Invalid Captcha' dialog is showing driver.find_element_by_xpath( '//div[@class="alert alert-error"]') print("Captcha was solved incorrectly") captcha_solver.notify_last_captcha_fail() except NoSuchElementException: pass # Clear cookies so a new captcha is presented upon refresh driver.delete_all_cookies() # Try solving the captcha again. search_portal(case_number) elif 'Search Results: CaseNumber:' in driver.title: # Captcha solved correctly captcha_solver.notify_last_captcha_success() # Figure out the numer of cases returned case_detail_tbl = driver.find_element_by_tag_name('table').text.split('\n') case_count_idx = case_detail_tbl.index('CASES FOUND') + 1 case_count = int(case_detail_tbl[case_count_idx]) # Case number search found multiple cases. if case_count > 1: return ScraperUtils.get_associated_cases(driver) # Case number search found no cases else: return set() elif case_number in driver.title: # Captcha solved correctly captcha_solver.notify_last_captcha_success() # Case number search did find a single court case. return {case_number} except TimeoutException: if i == settings['connect-thresh'] - 1: raise RuntimeError('Case page could not be loaded after {} attempts, or unexpected page title: {}'.format(settings['connect-thresh'], driver.title)) else: search_portal(case_number)
def search_portal(case_number): """ Performs a search of the portal from its home page, including selecting the case number input, solving the captcha and pressing Search. Also handles the captcha being solved incorrectly :param case_number: Case to search :return: A set of case number(s). """ # Load portal search page load_page(f"{settings['portal-base']}/Home.aspx/Search", 'Search', settings['verbose']) # Give some time for the captcha to load, as it does not load instantly. time.sleep(0.8) # Select Case Number textbox and enter case number select_case_input() case_input = driver.find_element_by_id('caseNumber') case_input.click() case_input.send_keys(case_number) # Solve captcha if it is required try: # Get Captcha. This is kinda nasty, but if there's no Captcha, then # this will throw (which is a good thing in this case) and we can # move on with processing. captcha_image_elem = driver.find_element_by_xpath( '//*/img[@alt="Captcha"]') captcha_buffer = captcha_image_elem.screenshot_as_png if settings['solve-captchas']: solved_captcha = captcha_solver.solve_captcha(captcha_buffer) captcha_textbox = driver.find_element_by_xpath( '//*/input[@name="captcha"]') captcha_textbox.click() captcha_textbox.send_keys(solved_captcha.answer) # Do search search_button = driver.find_element_by_id('searchButton') search_button.click() else: print(f"Captcha encountered trying to view case ID {case_number}.") print( "Please solve the captcha and click the search button to proceed." ) while True: try: WebDriverWait( driver, 6 * 60 * 60).until(lambda x: case_number in driver.title) print("continuing...") break except TimeoutException: print("still waiting for user to solve the captcha...") except NoSuchElementException: # No captcha on the page, continue. solved_captcha = None # Do search search_button = driver.find_element_by_id('searchButton') search_button.click() # If the title stays as 'Search': Captcha solving failed # If the title contains the case number or 'Search Results': Captcha solving succeeded # If a timeout occurs, retry 'connect-thresh' times. for i in range(settings['connect-thresh']): try: # Wait for page to load WebDriverWait( driver, 5).until(lambda x: 'Search' in driver.title or case_number in driver.title or 'Search Results:' in driver.title) # Page loaded if driver.title == 'Search': # Clicking search did not change the page. This could be because of a failed captcha attempt. try: # Check if 'Invalid Captcha' dialog is showing driver.find_element_by_xpath( '//div[@class="alert alert-error"]') print("Captcha was solved incorrectly") if settings['solve-captchas'] and solved_captcha: solved_captcha.notify_incorrect() except NoSuchElementException: pass # Clear cookies so a new captcha is presented upon refresh driver.delete_all_cookies() # Try solving the captcha again. search_portal(case_number) elif 'Search Results: CaseNumber:' in driver.title: # Captcha solved correctly if settings['solve-captchas'] and solved_captcha: solved_captcha.notify_correct() case_count = ScraperUtils.get_search_case_count( driver, settings['county']) # Case number search found multiple cases. if case_count > 1: return ScraperUtils.get_associated_cases(driver) # Case number search found no cases else: return set() elif case_number in driver.title: # Captcha solved correctly if settings['solve-captchas'] and solved_captcha: solved_captcha.notify_correct() # Case number search did find a single court case. return {case_number} except TimeoutException: if i == settings['connect-thresh'] - 1: raise RuntimeError( 'Case page could not be loaded after {} attempts, or unexpected page title: {}' .format(settings['connect-thresh'], driver.title)) else: search_portal(case_number)