def open_welcome_page(self): url = self.url('caseSearch.do?welcomePage=welcomePage') page = self.opener.open(url) page_content = page.read() soup = BeautifulSoup(page_content) # See if we need to solve a captcha if 'By clicking Accept' in page_content: captcha_url = self.url('captchaVerification.do') captcha.solve(self.opener, captcha_url) page = self.opener.open(url) soup = BeautifulSoup(page.read()) return soup
def test_captcha_solver(samples=None, **params): if not samples: return for file in os.listdir(samples): image = captcha.image_filter(os.path.join(samples, file), **params) result = captcha.solve(image, **params) assert ''.join(map(str, result)) == os.path.splitext(file)[0], samples
def test_get_image_by_url(params): for i in range(3): image_data = captcha.fetch(params['url']) image = captcha.image_filter(image_data, params) result = captcha.solve(image, params) assert len(result) == params.get('length', 4) assert all(x is not None for x in result)
def test_captcha_solver(params): path = params.get('samples') if path is None: return for file in os.listdir(path): image = captcha.image_filter(path + file, params) result = captcha.solve(image, params) assert ''.join(map(str, result)) == file.rsplit('.', 1)[0]
def test_get_image_by_url(url=None, length=4, **params): if not url: return for i in range(3): image_data = captcha.fetch(url) image = captcha.image_filter(image_data, **params) result = captcha.solve(image, **params) assert len(result) == length, url assert all(x is not None for x in result), url
def parse_first_webpage(thread_index, ind, lastname, firstname, checkmiss): print "+++++++++++++++++++++++++++++++++++++++++++++++++++START+++++++++++++++++++++++++++++++++++++++++++++++++" # no_result = db.session.query(DashboardNoResult).filter_by(LastName=lastname, FirstLetter=firstname).first() # if no_result is not None: # print ("{} No Result in Last Name: {}, First Name: {}".format(prefix_letter(thread_index), lastname, firstname)) # save_names(lastname, firstname, nofound_filename) # save_names(lastname, firstname, success_filename, "No Found") # return tz = pytz.timezone('America/Los_Angeles') s = Scraper(use_cache=False, retries=3, timeout=30, proxy_file=proxy_file_name, one_proxy=True, log_file='logs/log{}.txt'.format(thread_index)) logger = s.logger logger.info("{} Loading First URL -> {}".format( prefix_letter(thread_index), first_url)) doc = s.load(first_url) if check_proxy_status(doc) == False: logger.info("{} Proxy Error".format(prefix_letter(thread_index))) return config.ERROR_NO_PROXY currentdate = datetime.now(tz).strftime('%Y-%m-%d') currenttime = datetime.now(tz).strftime('%H:%M') logger.info("{} Current Date & Time: {} , {}".format( prefix_letter(thread_index), currentdate, currenttime)) formdata = { 'method': 'post', 'currentdate': currentdate, 'currenttime': currenttime, 'startdate': '03/05/2014', 'starttime': '08 : 00', 'enddate': '03/05/2014', 'endtime': '12 : 00', 'last_name': str(lastname), 'first_name': str(firstname), 'middle_name': '', 'dob': '', 'search': 'Search' } logger.info("{} Name Index: {}, Last Name: {}, First Name: {}".format( prefix_letter(thread_index), ind, lastname, firstname)) doc = s.load(second_url, post=formdata) if check_proxy_status(doc) is False: logger.info("{} Proxy Error".format(prefix_letter(thread_index))) return config.ERROR_NO_PROXY img_url = doc.x('//img[@alt="Captcha image"]/@src') if img_url != "": ckey = doc.x('//input[@name="ckey"]/@value') imagefilepath = s.join_path( 'images/captcha_{}.jpg'.format(thread_index)) if os.path.exists(imagefilepath): os.remove(imagefilepath) u = s.client.opener.open(img_url) f = open(imagefilepath, 'wb') block_sz = 8192 while True: buf = u.read(block_sz) if not buf: break f.write(buf) f.close() #captcha = CaptchaUpload(config.captcha_api_key) first_captcha_code = captcha.solve(imagefilepath) if str(first_captcha_code) == "1": logger.info("Captcha Service Error") # return config.ERROR_NO_CAPTCHA captcha_code_old = first_captcha_code logger.info('{} 1st_captcha2 -> {}'.format(prefix_letter(thread_index), first_captcha_code)) wait() return parse_second_webpage(s, thread_index, first_captcha_code, currentdate, currenttime, lastname, firstname, checkmiss)
def parse_detail_page(sc_obj, thread_index, form_action_url, formdata, old_captcha_code, currentdate, currenttime, lastname, first_letter): logger = sc_obj.logger doc = sc_obj.load(form_action_url, post=formdata) if check_proxy_status(doc) == False: logger.info("{} Proxy Error of function parse_detail_page".format( prefix_letter(thread_index))) return config.ERROR_NO_PROXY img_url = doc.x('//img[@alt="Captcha image"]/@src') if img_url != "": imagefilepath = sc_obj.join_path( 'images/captcha_{}.jpg'.format(thread_index)) if os.path.exists(imagefilepath): os.remove(imagefilepath) u = sc_obj.client.opener.open(img_url) f = open(imagefilepath, 'wb') block_sz = 8192 while True: buf = u.read(block_sz) if not buf: break f.write(buf) f.close() #captcha = CaptchaUpload(config.captcha_api_key) second_captcha_code = captcha.solve(imagefilepath) if str(second_captcha_code) == "1": logger.info("Captcha Service Error") return config.ERROR_NO_CAPTCHA logger.info('{} 2nd_captcha2_code -> {}'.format( prefix_letter(thread_index), second_captcha_code)) formdata = { "ckey": old_captcha_code, "key": second_captcha_code, "submit": "Submit" } headers = { "Host": "app4.lasd.org", "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate, br", "Referer": form_action_url, "Upgrade-Insecure-Requests": "1" } doc = sc_obj.load(form_action_url, post=formdata, headers=headers, merge_headers=True) doc, captcha_error = captcha_retry(doc, sc_obj, thread_index, currentdate, currenttime, lastname, first_letter, form_action_url, True, old_captcha_code) if captcha_error == config.ERROR_NO_CAPTCHA: print "Captcha Error" return config.ERROR_NO_CAPTCHA if check_proxy_status(doc) == False: logger.info("{} Proxy Error of function parse_detail_page".format( prefix_letter(thread_index))) return config.ERROR_NO_PROXY logger.info('{} Captcha Code: Old -> {}, New -> {}'.format( prefix_letter(thread_index), old_captcha_code, second_captcha_code)) logger.info('{} form_action_url -> {}'.format( prefix_letter(thread_index), form_action_url)) try: booking_no_cointainer = doc.q("//tr[@class='Row2']/td") booking_no_cointainer = ''.join( [item.html() for item in booking_no_cointainer]) BookingNo = re.search( 'Booking No.: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() LastName = re.search( 'Last Name: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() FirstName = re.search( 'First Name: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() MiddleName = re.search( 'Middle Name: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() Birthday = re.search( 'Date Of Birth: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() Age = re.search('Age: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() Sex = re.search('Sex: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() Race = re.search( 'Race: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() Hair = re.search( 'Hair: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() Eyes = re.search( 'Eyes: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() Height = re.search( 'Height: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() Weight = re.search( 'Weight: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(booking_no_cointainer), re.M | re.I | re.S).group(2).strip() contents = doc.q("//tr[@class='Caption2']/td[@align='center']") contents = ''.join([item.html() for item in contents]) try: ArrestDateStr = re.search( 'Arrest Date: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', contents, re.M | re.I | re.S).group(2).strip() except Exception as e: print '*******************************' print e print contents print BookingNo print "LEN =", len(doc.q("//tr[@class='Caption2']")) print '*******************************' ArrestDateStr = re.search( 'Arrest Date: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', contents, re.M | re.I | re.S).group(2).strip() ArrestDateStr = ArrestDateStr.split('/') y = ArrestDateStr.pop() d = ArrestDateStr.pop() m = ArrestDateStr.pop() ArrestDate = y + "-" + m + "-" + d ArrestTimeStr = re.search( 'Arrest Time: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() ArrestTime = ArrestTimeStr[:2] + ":" + ArrestTimeStr[2:] CapturedDate = currentdate CapturedTime = currenttime ArrestAgency = re.search( 'Arrest Agency: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() AgencyDescription = re.search( 'Agency Description: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() DateBooked = re.search( 'Date Booked: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() TimeBooked = re.search( 'Time Booked: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() # DateBooked=currentdate # TimeBooked=currenttime BookingLocation = re.search( 'Booking Location: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() LocationDescription = re.search( 'Location Description: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() TotalBailAmount = re.search( 'Total Bail Amount: (\<strong\>)?([.\,\w\s]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() TotalHoldBailAmount = re.search( 'Total Hold Bail Amount: (\<strong\>)?([.\,\w\s]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() GrandTotal = re.search( 'Grand Total: (\<strong\>)?([.\,\w\s]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() HousingLocation = re.search( 'Housing Location: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() PermanentHousingAssignedDate = re.search( 'Permanent Housing Assigned Date: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() AssignedTime = re.search( 'Assigned Time: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() # PermanentHousingAssignedDate=currentdate # AssignedTime=currenttime VisitorStatus = re.search( 'Visitor Status: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() Facility = re.search( 'Facility: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() Address = re.search( 'Address: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() City = re.search( 'City: (\<strong\>)?([^\<.*]*)\<[\/]?strong[\/]?\>', str(contents), re.M | re.I | re.S).group(2).strip() la_tz = pytz.timezone('America/Los_Angeles') today = datetime.now(la_tz).strftime('%Y-%m-%d') yesterday = (datetime.now(la_tz) + timedelta(days=-1)).strftime('%Y-%m-%d') print "************************************************************" print today, yesterday print "Arrest = ", ArrestDate print "************************************************************" obj_key = lastname + "," + first_letter obj_booking_no = list_to_be_scraped[obj_key] booking_no = BookingNo if (obj_booking_no != ""): booking_no += config.PREFIX_VINE if (today == ArrestDate) or (yesterday == ArrestDate): # if True: booking = DashboardLasd( s_bookingno=booking_no, s_lastname=LastName, s_firstname=FirstName, s_middlename=MiddleName, s_birthday=Birthday, s_age=Age, s_sex=Sex, s_race=Race, s_hair=Hair, s_eyes=Eyes, s_height=Height, s_weight=Weight, s_arrestdate=ArrestDate, s_arresttime=ArrestTime, s_captureddate=CapturedDate, s_capturedtime=CapturedTime, s_arrestagency=ArrestAgency, s_agencydescription=AgencyDescription, s_datebooked=DateBooked, s_timebooked=TimeBooked, s_bookinglocation=BookingLocation, s_locationdescription=LocationDescription, s_totalbailamount=TotalBailAmount, s_totalholdbailamount=TotalHoldBailAmount, s_grandtotal=GrandTotal, s_housinglocation=HousingLocation, s_permanenthousingassigneddate=PermanentHousingAssignedDate, s_assignedtime=AssignedTime, s_visitorstatus=VisitorStatus, s_facility=Facility, s_address=Address, s_city=City, s_jail=1) try: db.session.add(booking) db.session.commit() if float(GrandTotal.replace(",", "")) >= 20000: send_notify_email(booking) except Exception as e: logger.info(e) logger.info('+++++++{} Data was saved {}, {}, {}, {}'.format( prefix_letter(thread_index), BookingNo, ArrestDate, LastName, FirstName)) save_names(lastname, first_letter, success_filename, "", booking_no, ArrestDate) else: logger.info('{} Arrest Date is {}, {}'.format( prefix_letter(thread_index), ArrestDate, booking_no)) save_names(lastname, first_letter, outofdate_filename, "", booking_no, ArrestDate) save_names(lastname, first_letter, success_filename, "Out Of Date", booking_no, ArrestDate) booking_history = DashboardJailHistory(s_bookingno=BookingNo, s_lastname=LastName, s_firstname=FirstName, s_captureddate=CapturedDate, s_capturedtime=CapturedTime, s_duplication=0, s_arrestdate=ArrestDate) try: db.session.add(booking_history) db.session.commit() except Exception as e: logger.info(e) try: obj_key = lastname + "," + first_letter obj_booking_no = list_to_be_scraped[obj_key] print "Delete VINE Booking No for ", obj_booking_no db.session.query(DashboardVineName).filter( DashboardVineName.BookingNo == obj_booking_no).delete() db.session.commit() except Exception as e: print e except Exception as e: logger.info(e) show_exception_detail(e) pass return config.ERROR_NO_NONE
def captcha_retry(doc, s, thread_index, currentdate, currenttime, lastname, firstname, url, merge_headers, old_captcha_code): logger = s.logger img_url = doc.x('//img[@alt="Captcha image"]/@src') #logger.info('{} img_url -> {}'.format(prefix_letter(thread_index), img_url) # print "+++++++++++++++++++++++++" # print img_url # print "+++++++++++++++++++++++++" if merge_headers == True: old_code = old_captcha_code catpcha_try_count = 0 while img_url != "": if catpcha_try_count > config.captcha_max_tries: print "--------------------Captcha tries reached as max value---------------" return doc, config.ERROR_NO_CAPTCHA catpcha_try_count += 1 print "*****************************************" if merge_headers == True: print "Try to solve captcha in 2nd step:", img_url else: print "Try to solve captcha in 1st step:", img_url ckey = doc.x('//input[@name="ckey"]/@value') #logger.info('{} ckey -> {}, thread-> {}'.format(prefix_letter(thread_index), ckey, thread_index) imagefilepath = s.join_path( 'images/captcha_{}.jpg'.format(thread_index)) if os.path.exists(imagefilepath): os.remove(imagefilepath) u = s.client.opener.open(img_url) f = open(imagefilepath, 'wb') block_sz = 8192 while True: buf = u.read(block_sz) if not buf: break f.write(buf) f.close() #captcha = CaptchaUpload(config.captcha_api_key) first_captcha_code = captcha.solve(imagefilepath) if str(first_captcha_code) == "1": logger.info("Captcha Service Error") # return config.ERROR_NO_CAPTCHA # global_export_filename = "retry/retry_{}_{}_{}.csv".format(currentdate, currenttime, thread_index) # global_s.save([ # "Current Date", currentdate, # "Current Time", currenttime, # "Captcha", first_captcha_code, # ], global_export_filename) captcha_code_old = first_captcha_code if merge_headers == True: logger.info('{} 2nd_captcha2_code -> {}'.format( prefix_letter(thread_index), first_captcha_code)) else: logger.info('{} 1st_captcha2_code -> {}'.format( prefix_letter(thread_index), first_captcha_code)) wait() if merge_headers == True: formdata = { "ckey": old_code, "key": first_captcha_code, "submit": "Submit" } headers = { "Host": "app4.lasd.org", "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate, br", "Referer": url, "Upgrade-Insecure-Requests": "1" } doc = s.load(url, post=formdata, headers=headers, merge_headers=True) else: formdata = { "key": first_captcha_code, "ckey": first_captcha_code, "submit": "Submit" } doc = s.load(second_url, post=formdata) img_url = doc.x('//img[@alt="Captcha image"]/@src') old_code = first_captcha_code # return parse_second_webpage(s, thread_index, first_captcha_code, currentdate, currenttime, lastname, firstname) if merge_headers == True: print "********2nd Step : Solved Captcha Successfully**********" else: print "********1st Step : Solved Captcha Successfully**********" return doc, config.ERROR_NO_NONE
def apiCall(self, method, params, retry=False): with self.api_lock: params['v'] = self.api_version url = 'https://api.vk.com/method/' + method + '?' + urllib.parse.urlencode(params) + '&access_token=' + self.getToken() now = time.time() if now - self.last_call < CALL_INTERVAL: time.sleep(CALL_INTERVAL - now + self.last_call) self.last_call = now try: json_string = urllib.request.urlopen(url, timeout=self.timeout).read() except OSError as e: log.warning(method + ' failed ({})'.format(e)) time.sleep(1) return self.apiCall(method, params) except Exception as e: if retry: log.error('({}) {}: {}'.format(method, e.__class__.__name__, str(e)), True) return None else: time.sleep(1) log.warning('({}) {}: {}, retrying'.format(method, e.__class__.__name__, str(e))) return self.apiCall(method, params, 1) try: data_array = json.loads(json_string.decode('utf-8')) except json.decoder.JSONDecodeError: log.error('Invalid JSON') data_array = None if self.logging: with open('inf.log', 'a') as f: print('[{}]\nmethod: {}, params: {}\nresponse: {}\n'.format(time.strftime(log.datetime_format, time.localtime()), method, json.dumps(params), json.dumps(data_array)), file=f) duration = time.time() - now if duration > self.timeout: log.warning('{} timeout'.format(method)) if data_array is None: return None if 'response' in data_array: if self.captcha_delayed or self.externalCaptcha: self.captcha_delayed = 0 self.externalCaptcha = False log.info('Captcha no longer needed') self.captcha_sid = '' captcha.delete() return data_array['response'] elif 'error' in data_array: if data_array['error']['error_code'] == 14: #Captcha needed self.externalCaptcha = False if self.captcha_delayed == 0: log.warning('Captcha needed') self.captcha_sid = data_array['error']['captcha_sid'] with open(accounts.getFile('captcha.txt'), 'w') as f: f.write('sid ' + self.captcha_sid) captcha.receive(data_array['error']['captcha_img']) elif self.captcha_sid: key = open(accounts.getFile('captcha.txt')).read() if key.startswith('key'): log.info('Trying a key from captcha.txt') params['captcha_sid'] = self.captcha_sid params['captcha_key'] = key.split()[1] self.captcha_sid = '' captcha.delete() self.captcha_delayed = 0 self.externalCaptcha = True return self.apiCall(method, params) if self.captcha_delayed == self.checks_before_antigate: log.info('Using antigate') ans = captcha.solve() if ans is None: self.captchaError = True time.sleep(5) elif not ans: captcha.receive(data_array['error']['captcha_img']) self.captcha_sid = data_array['error']['captcha_sid'] return self.apiCall(method, params) else: params['captcha_sid'] = self.captcha_sid params['captcha_key'] = ans self.captcha_delayed = 0 else: time.sleep(self.captcha_check_interval) self.captcha_delayed += 1 return self.apiCall(method, params) elif data_array['error']['error_code'] == 5: #Auth error self.login() return self.apiCall(method, params) elif (data_array['error']['error_code'], method) in self.ignored_errors: handler = self.ignored_errors[(data_array['error']['error_code'], method)] if not handler: return None if retry or not handler[1]: log.warning(handler[0]) return None else: log.warning(handler[0] + ', retrying') time.sleep(3) return self.apiCall(method, params, True) else: log.error('{}, params {}\ncode {}: {}'.format(method, json.dumps(params), data_array['error']['error_code'], data_array['error'].get('error_msg'))) return None else: return self.apiCall(method, params)
print "found the success page, do the jump" jump_link = browser.find_by_xpath('/html/body/table/tbody/tr[2]/td/div/a') jump_link.click() elif entrance_no == 5: try_time = 0 try_time_max = 50 while True: captcha.do_delete() os.mkdir(captcha_folder) print "try to recognize the captcha.." captcha_img = browser.find_by_xpath('//*[@id="regimg"]') captcha_img = captcha_img[0] get_captcha(browser.driver, captcha_img, captcha_folder) captcha.preprocess('captcha.bmp', 'captcha_output.bmp') captcha_word = captcha.solve('captcha_output.bmp') if captcha_word == '': if try_time < try_time_max: print "download_from_niuniu::captcha.solve() failed, try_time = " + str(try_time) try_time += 1 print "refresh the captcha" # refresh_captcha_link = browser.find_by_xpath('//*[@id="loginHtml"]/a[1]') # '//*[@id="loginHtml"]/a[1]' # refresh_captcha_link.click() browser.reload() continue else: print "download_from_niuniu::captcha.solve() failed, too many attempts, abort." browser.quit() return ''
browser = webdriver.Firefox() browser.get('http://reddit.com'); browser.get(browser.find_element_by_class_name('login-required').get_attribute('href')) browser.find_element_by_id('user_reg').click() browser.find_element_by_id('user_reg').send_keys(username) browser.find_element_by_id('passwd_reg').click() browser.find_element_by_id('passwd_reg').send_keys(password) browser.find_element_by_id('passwd2_reg').click() browser.find_element_by_id('passwd2_reg').send_keys(password) captcha_url = browser.find_element_by_class_name('capimage').get_attribute('src') print "solving captcha: " + captcha_url print "be patient ..." solved_captcha = captcha.solve(captcha_url) print "done." browser.find_element_by_id('captcha_').click() browser.find_element_by_id('captcha_').send_keys(solved_captcha) for btn in browser.find_elements_by_class_name('button'): if btn.text == 'create account': btn.click() break