def test_content_type_error(self): link = 'https://ibb.co/kh13H5P' with self.assertRaises(ContentTypeError) as context: AmazonCaptcha.fromlink(link) self.assertTrue('is not supported as a Content-Type' in str(context.exception))
def solve_captcha(session, form_element, pdp_url: str): log.warning("Encountered CAPTCHA. Attempting to solve.") # Starting from the form, get the inputs and image captcha_images = form_element.xpath( '//img[contains(@src, "amazon.com/captcha/")]') if captcha_images: link = captcha_images[0].attrib["src"] # link = 'https://images-na.ssl-images-amazon.com/captcha/usvmgloq/Captcha_kwrrnqwkph.jpg' captcha = AmazonCaptcha.fromlink(link) solution = captcha.solve() if solution: form_inputs = form_element.xpath(".//input") input_dict = {} for form_input in form_inputs: if form_input.type == "text": input_dict[form_input.name] = solution else: input_dict[form_input.name] = form_input.value f = furl( pdp_url) # Use the original URL to get the schema and host f = f.set(path=form_element.attrib["action"]) f.add(args=input_dict) response = session.get(f.furl) log.debug(f"Captcha response was {response.status_code}") return response.text, response.status_code return html.fromstring(""), 404
def get_captcha_help(self): if not self.on_captcha_page(): log.info("Not on captcha page.") return try: log.info("Stuck on a captcha... Lets try to solve it.") captcha = AmazonCaptcha.fromdriver(self.driver) solution = captcha.solve() log.info(f"The solution is: {solution}") if solution == "Not solved": log.info( f"Failed to solve {captcha.image_link}, lets reload and get a new captcha." ) self.driver.refresh() time.sleep(5) self.get_captcha_help() else: self.save_screenshot("captcha") self.driver.find_element_by_xpath( '//*[@id="captchacharacters"]').send_keys(solution + Keys.RETURN) except Exception as e: log.debug(e) log.info("Error trying to solve captcha. Refresh and retry.") self.driver.refresh() time.sleep(5)
def handle_captcha(self): # wait for captcha to load time.sleep(DEFAULT_MAX_WEIRD_PAGE_DELAY) try: if self.driver.find_element_by_xpath( '//form[@action="/errors/validateCaptcha"]'): try: log.info("Stuck on a captcha... Lets try to solve it.") captcha = AmazonCaptcha.fromdriver(self.driver) solution = captcha.solve() log.info(f"The solution is: {solution}") if solution == "Not solved": log.info( f"Failed to solve {captcha.image_link}, lets reload and get a new captcha." ) self.driver.refresh() else: if self.no_screenshots: self.notification_handler.send_notification( "Solving captcha") else: self.save_screenshot("captcha") self.driver.find_element_by_xpath( '//*[@id="captchacharacters"]').send_keys( solution + Keys.RETURN) except Exception as e: log.debug(e) log.info( "Error trying to solve captcha. Refresh and retry.") self.driver.refresh() except exceptions.NoSuchElementException: log.error("captcha page does not contain captcha element") log.error("refreshing") self.driver.refresh()
def validate_captcha(chromeDriver): time.sleep(1) l.info("Solving CAPTCHA") chromeDriver.get('https://www.amazon.com/errors/validateCaptcha') captcha = AmazonCaptcha.fromdriver(chromeDriver) solution = captcha.solve() chromeDriver.find_element_by_id('captchacharacters').send_keys(solution) chromeDriver.find_element_by_class_name('a-button-text').click() time.sleep(1)
def solveCaptcha(d: webdriver.Chrome): if 'Amazon.com' == d.title: try: time.sleep(random.randint(1,3)) captcha = AmazonCaptcha.fromdriver(d).solve() print(captcha + 'doc: ' + d.page_source) d.find_element_by_id('captchacharacters').send_keys(captcha) d.find_element_by_tag_name('button').click() except: l('Unable to pass captcha!!!')
def test_fromdriver(self): capabilities = webdriver.ChromeCapabilities() capabilities.add_argument('--headless') capabilities.add_argument('--no-sandbox') driver = webdriver.ChromeDriver(ChromeDriverManager().install(), desired_capabilities = capabilities.desired) solutions = list() for i in range(5): driver.get('https://www.amazon.com/errors/validateCaptcha') captcha = AmazonCaptcha.fromdriver(driver) solutions.append(len(captcha.solve())) driver.quit() self.assertIn(6, solutions)
def createTask(): taskId = random.randint(10000, 100000) r = request.get_json() image = "{}.jpeg".format( taskId ) with open(image, 'wb') as f: f.write( base64.b64decode( r['task']['body'] ) ) solution = AmazonCaptcha(image).solve() result = "{}_result.txt".format( taskId ) with open(result, 'w') as f: f.write( solution ) return { "errorId": 0, "taskId": taskId }
def get_captcha_help(self): try: log.info("Stuck on a captcha... Lets try to solve it.") captcha = AmazonCaptcha.from_webdriver(self.driver) solution = captcha.solve() log.info(f"The solution is: {solution}") if solution == "Not solved": self.driver.execute_script("window.location.reload()") time.sleep(5) self.get_captcha_help() else: self.driver.find_element_by_xpath( '//*[@id="captchacharacters"]').send_keys(solution + Keys.RETURN) except Exception as e: log.debug(e) log.info( "We were unable to solve the captcha, need help from the user." ) self.notification_handler.send_notification( "Amazon bot is stuck on a captcha!")
def test_corrupted_image_with_both_overlap_and_separated_letter(self): solution = AmazonCaptcha( os.path.join(captchas_folder, 'corrupted_2.png')).solve() self.assertEqual(solution, 'KMGMXE')
def test_corrupted_image_with_letters_overlapping(self): solution = AmazonCaptcha( os.path.join(captchas_folder, 'corrupted_1.png')).solve() self.assertEqual(solution, 'BPXHGH')
def test_corrupted_image_with_last_letter_ending_at_the_beginning(self): solution = AmazonCaptcha(os.path.join(captchas_folder, 'corrupted.png')).solve() self.assertEqual(solution, 'UGXGMM')
def test_image_link_property_warning(self): captcha = AmazonCaptcha( os.path.join(captchas_folder, 'notcorrupted.jpg')) self.assertEqual(captcha.image_link, None)
def test_not_corrupted_image(self): solution = AmazonCaptcha( os.path.join(captchas_folder, 'notcorrupted.jpg')).solve() self.assertEqual(solution, 'KRJNBY')
import requests import lxml from bs4 import BeautifulSoup from amazoncaptcha import AmazonCaptcha amazon_url = "https://www.amazon.com/Instant-Pot-Duo-Evo-Plus/dp/B07W55DDFB/ref=sr_1_1?qid=1597662463" amz_headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0", "Accept-Language": "en-US,en;q=0.5" } response = requests.get(amazon_url, headers=amz_headers) captcha_webpage = response.text soup = BeautifulSoup(captcha_webpage, "lxml") captcha_link = soup.select(".a-text-center>img")[0].get("src") print(captcha_link) captcha = AmazonCaptcha.fromlink(captcha_link) solution = captcha.solve() print(solution) # <div class="a-row a-text-center"> # <img src="https://images-na.ssl-images-amazon.com/captcha/twhhswbk/Captcha_fwgzoazcal.jpg"> # </div> #print(response.text)
def test_fromlink_with_predefined_undolvable_captcha(self): link = 'https://i.ibb.co/Cn2J1mS/notsolved.jpg' captcha = AmazonCaptcha.fromlink(link) solution = captcha.solve() self.assertEqual(solution, 'Not solved')
def test_corrupted_1(self): solution = AmazonCaptcha('tests/captchas/corrupted_1.png').solve() self.assertEqual(solution, 'BPXHGH')
def test_corrupted_2(self): solution = AmazonCaptcha('tests/captchas/corrupted_2.png').solve() self.assertEqual(solution, 'KMGMXE')
def test_notsolved_1(self): solution = AmazonCaptcha('tests/captchas/notsolved_1.jpg').solve() self.assertEqual(solution, 'Not solved')
def test_notcorrupted(self): solution = AmazonCaptcha('tests/captchas/notcorrupted.jpg').solve() self.assertEqual(solution, 'KRJNBY')
def test_image_with_6_unrecognizable_letters(self): solution = AmazonCaptcha(os.path.join(captchas_folder, 'notsolved.jpg')).solve() self.assertEqual(solution, 'Not solved')
def test_totally_broken_image(self): solution = AmazonCaptcha( os.path.join(captchas_folder, 'notsolved_1.jpg')).solve() self.assertEqual(solution, 'Not solved')
def run(self): while True: # calc next time next_time_monitor = time.time() + self.delay_monitor self.logger.info('checking stock') try: # randomize user agent user_agent = self.get_random_user_agent() # randomize proxy proxies = self.get_random_proxy() # send ajax request start_time = time.time() r = requests.get( f"{AMAZON_SMILE_BASE_URL}/gp/aod/ajax?asin={self.item['asin']}", cookies={'session-id': ''}, headers={'user-agent': user_agent}, proxies=proxies) self.logger.debug( f'ajax request took {int(1000 * (time.time() - start_time))} ms' ) self.logger.debug( f'ajax request returned status code {r.status_code}') if r.status_code == 200: offer_divs = html.fromstring(r.text).xpath( "//div[@id='aod-sticky-pinned-offer'] | //div[@id='aod-offer']" ) for offer_div in offer_divs: price_spans = offer_div.xpath( ".//span[@class='a-price-whole']") if price_spans: price = int(price_spans[0].text.replace(',', '')) self.logger.info(f'offer for ${price}') # check price if self.item['min_price'] <= price <= self.item[ 'max_price']: self.logger.success('price in range') # get the offering id offering_id = offer_div.xpath( ".//input[@name='offeringID.1']")[0].value self.logger.debug( f'offering_id = {offering_id}') # build data data = { 'offerListing.1': offering_id, 'quantity.1': '1', } # create session s = requests.Session() s.headers = { 'content-type': 'application/x-www-form-urlencoded', 'x-amz-checkout-csrf-token': self.cookies['session-id'], } for n, v in self.cookies.items(): s.cookies.set(n, v) # calc timeout time timeout_time = time.time() + self.timeout_buy while True: # calc next time next_time_buy = time.time( ) + self.delay_buy self.logger.info('trying to cart') # randomize user agent s.headers.update({ 'user-agent': get_random_user_agent() }) # randomize proxy s.proxies = get_random_proxy() # send turbo init request start_time = time.time() r = s.post( f'{AMAZON_SMILE_BASE_URL}/checkout/turbo-initiate?pipelineType=turbo', data) self.logger.debug( f'turbo init request took {calc_time_delta(start_time)} ms' ) self.logger.debug( f'turbo init request returned status code {r.status_code}' ) if r.status_code == 200: if r.text != ' ': self.logger.success('carted') # check for captcha captcha_forms = html.fromstring( r.text ).xpath( '//form[contains(@action, "validateCaptcha")]' ) if captcha_forms: self.logger.info('got captcha') # try to solve captcha captcha_form = captcha_forms[0] captcha_img_link = captcha_form.xpath( '//img[contains(@src, "amazon.com/captcha/")]' )[0].attrib['src'] captcha_solution = AmazonCaptcha.fromlink( captcha_img_link).solve() # check for captcha solution if captcha_solution: self.logger.success( 'solved captcha') self.logger.debug( f'captcha_solution = {captcha_solution}' ) # send validate captcha request captcha_inputs = captcha_form.xpath( './/input') args = { captcha_input.name: captcha_solution if captcha_input.type == 'text' else captcha_input.value for captcha_input in captcha_inputs } f = furl( AMAZON_SMILE_BASE_URL) f.set(path=captcha_form. attrib['action']) f.add(args=args) start_time = time.time() r = s.get(f.url) self.logger.debug( f'validate captcha request took {calc_time_delta(start_time)} ms' ) self.logger.debug( f'validate captcha request returned status code {r.status_code}' ) self.check_out(r.text, s) # no captcha solution else: self.logger.warning( 'could not solve captcha' ) # no captcha else: self.check_out(r.text, s) # no stock else: self.logger.warning( 'could not cart') # check for timeout if timeout_time - time.time() < 0: self.logger.info( 'timed out trying to buy') break sleep_time_left(next_time_buy) except Exception as e: self.logger.error(e) sleep_time_left(next_time_monitor)
def test_fromlink_with_predefined_undolvable_captcha_and_keep_logs(self): link = 'https://i.ibb.co/Cn2J1mS/notsolved.jpg' captcha = AmazonCaptcha.fromlink(link) solution = captcha.solve(keep_logs=True) self.assertIn('not-solved-captcha.log', os.listdir())
def login(self): log.info("Email") email_field = None password_field = None timeout = self.get_timeout() while True: try: email_field = self.driver.find_element_by_xpath('//*[@id="ap_email"]') break except exceptions.NoSuchElementException: try: password_field = self.driver.find_element_by_xpath( '//*[@id="ap_password"]' ) break except exceptions.NoSuchElementException: pass if time.time() > timeout: break if email_field: try: email_field.send_keys(self.username + Keys.RETURN) except exceptions.ElementNotInteractableException: log.info("Email not needed.") else: log.info("Email not needed.") if self.driver.find_elements_by_xpath('//*[@id="auth-error-message-box"]'): log.error("Login failed, delete your credentials file") time.sleep(240) exit(1) time.sleep(self.page_wait_delay()) log.info("Remember me checkbox") try: self.driver.find_element_by_xpath('//*[@name="rememberMe"]').click() except exceptions.NoSuchElementException: log.error("Remember me checkbox did not exist") log.info("Password") password_field = None timeout = self.get_timeout() current_page = self.driver.title while True: try: password_field = self.driver.find_element_by_xpath( '//*[@id="ap_password"]' ) break except exceptions.NoSuchElementException: pass if time.time() > timeout: break if password_field: password_field.send_keys(self.password + Keys.RETURN) self.wait_for_page_change(current_page) else: log.error("Password entry box did not exist") # check for captcha try: if self.driver.find_element_by_xpath( '//form[@action="/errors/validateCaptcha"]' ): try: log.info("Stuck on a captcha... Lets try to solve it.") captcha = AmazonCaptcha.fromdriver(self.driver) solution = captcha.solve() log.info(f"The solution is: {solution}") if solution == "Not solved": log.info( f"Failed to solve {captcha.image_link}, lets reload and get a new captcha." ) self.driver.refresh() else: self.send_notification( "Solving catpcha", "captcha", self.take_screenshots ) self.driver.find_element_by_xpath( '//*[@id="captchacharacters"]' ).send_keys(solution + Keys.RETURN) except Exception as e: log.debug(e) log.info("Error trying to solve captcha. Refresh and retry.") self.driver.refresh() except exceptions.NoSuchElementException: log.debug("login page did not have captcha element") # time.sleep(self.page_wait_delay()) if self.driver.title in TWOFA_TITLES: log.info("enter in your two-step verification code in browser") while self.driver.title in TWOFA_TITLES: time.sleep(0.2) log.info(f"Logged in as {self.username}")
def do_GET(self): self._set_response() captcha = AmazonCaptcha.fromlink(self.path[1:]) solution = captcha.solve() self.wfile.write(solution.encode('utf-8'))
def test_corrupted(self): solution = AmazonCaptcha('tests/captchas/corrupted.png').solve() self.assertEqual(solution, 'UGXGMM')