Ejemplo n.º 1
0
    def test_content_type_error(self):
        link = 'https://ibb.co/kh13H5P'

        with self.assertRaises(ContentTypeError) as context:
            AmazonCaptcha.fromlink(link)

        self.assertTrue('is not supported as a Content-Type' in str(context.exception))
Ejemplo n.º 2
0
def solve_captcha(session, form_element, pdp_url: str):
    log.warning("Encountered CAPTCHA. Attempting to solve.")
    # Starting from the form, get the inputs and image
    captcha_images = form_element.xpath(
        '//img[contains(@src, "amazon.com/captcha/")]')
    if captcha_images:
        link = captcha_images[0].attrib["src"]
        # link = 'https://images-na.ssl-images-amazon.com/captcha/usvmgloq/Captcha_kwrrnqwkph.jpg'
        captcha = AmazonCaptcha.fromlink(link)
        solution = captcha.solve()

        if solution:
            form_inputs = form_element.xpath(".//input")
            input_dict = {}
            for form_input in form_inputs:
                if form_input.type == "text":
                    input_dict[form_input.name] = solution
                else:
                    input_dict[form_input.name] = form_input.value
            f = furl(
                pdp_url)  # Use the original URL to get the schema and host
            f = f.set(path=form_element.attrib["action"])
            f.add(args=input_dict)
            response = session.get(f.furl)
            log.debug(f"Captcha response was {response.status_code}")
            return response.text, response.status_code

    return html.fromstring(""), 404
Ejemplo n.º 3
0
 def test_fromlink_with_predefined_undolvable_captcha_and_keep_logs(self):
     link = 'https://i.ibb.co/Cn2J1mS/notsolved.jpg'
     captcha = AmazonCaptcha.fromlink(link)
     solution = captcha.solve(keep_logs=True)
     self.assertIn('not-solved-captcha.log', os.listdir())
Ejemplo n.º 4
0
 def test_fromlink_with_predefined_undolvable_captcha(self):
     link = 'https://i.ibb.co/Cn2J1mS/notsolved.jpg'
     captcha = AmazonCaptcha.fromlink(link)
     solution = captcha.solve()
     self.assertEqual(solution, 'Not solved')
Ejemplo n.º 5
0
    def run(self):
        while True:
            # calc next time
            next_time_monitor = time.time() + self.delay_monitor

            self.logger.info('checking stock')

            try:
                # randomize user agent
                user_agent = self.get_random_user_agent()

                # randomize proxy
                proxies = self.get_random_proxy()

                # send ajax request
                start_time = time.time()
                r = requests.get(
                    f"{AMAZON_SMILE_BASE_URL}/gp/aod/ajax?asin={self.item['asin']}",
                    cookies={'session-id': ''},
                    headers={'user-agent': user_agent},
                    proxies=proxies)
                self.logger.debug(
                    f'ajax request took {int(1000 * (time.time() - start_time))} ms'
                )
                self.logger.debug(
                    f'ajax request returned status code {r.status_code}')

                if r.status_code == 200:
                    offer_divs = html.fromstring(r.text).xpath(
                        "//div[@id='aod-sticky-pinned-offer'] | //div[@id='aod-offer']"
                    )

                    for offer_div in offer_divs:
                        price_spans = offer_div.xpath(
                            ".//span[@class='a-price-whole']")

                        if price_spans:
                            price = int(price_spans[0].text.replace(',', ''))
                            self.logger.info(f'offer for ${price}')

                            # check price
                            if self.item['min_price'] <= price <= self.item[
                                    'max_price']:
                                self.logger.success('price in range')

                                # get the offering id
                                offering_id = offer_div.xpath(
                                    ".//input[@name='offeringID.1']")[0].value
                                self.logger.debug(
                                    f'offering_id = {offering_id}')

                                # build data
                                data = {
                                    'offerListing.1': offering_id,
                                    'quantity.1': '1',
                                }

                                # create session
                                s = requests.Session()
                                s.headers = {
                                    'content-type':
                                    'application/x-www-form-urlencoded',
                                    'x-amz-checkout-csrf-token':
                                    self.cookies['session-id'],
                                }
                                for n, v in self.cookies.items():
                                    s.cookies.set(n, v)

                                # calc timeout time
                                timeout_time = time.time() + self.timeout_buy

                                while True:
                                    # calc next time
                                    next_time_buy = time.time(
                                    ) + self.delay_buy

                                    self.logger.info('trying to cart')

                                    # randomize user agent
                                    s.headers.update({
                                        'user-agent':
                                        get_random_user_agent()
                                    })

                                    # randomize proxy
                                    s.proxies = get_random_proxy()

                                    # send turbo init request
                                    start_time = time.time()
                                    r = s.post(
                                        f'{AMAZON_SMILE_BASE_URL}/checkout/turbo-initiate?pipelineType=turbo',
                                        data)
                                    self.logger.debug(
                                        f'turbo init request took {calc_time_delta(start_time)} ms'
                                    )
                                    self.logger.debug(
                                        f'turbo init request returned status code {r.status_code}'
                                    )

                                    if r.status_code == 200:
                                        if r.text != ' ':
                                            self.logger.success('carted')

                                            # check for captcha
                                            captcha_forms = html.fromstring(
                                                r.text
                                            ).xpath(
                                                '//form[contains(@action, "validateCaptcha")]'
                                            )
                                            if captcha_forms:
                                                self.logger.info('got captcha')

                                                # try to solve captcha
                                                captcha_form = captcha_forms[0]
                                                captcha_img_link = captcha_form.xpath(
                                                    '//img[contains(@src, "amazon.com/captcha/")]'
                                                )[0].attrib['src']
                                                captcha_solution = AmazonCaptcha.fromlink(
                                                    captcha_img_link).solve()

                                                # check for captcha solution
                                                if captcha_solution:
                                                    self.logger.success(
                                                        'solved captcha')
                                                    self.logger.debug(
                                                        f'captcha_solution = {captcha_solution}'
                                                    )

                                                    # send validate captcha request
                                                    captcha_inputs = captcha_form.xpath(
                                                        './/input')
                                                    args = {
                                                        captcha_input.name:
                                                        captcha_solution
                                                        if captcha_input.type
                                                        == 'text' else
                                                        captcha_input.value
                                                        for captcha_input in
                                                        captcha_inputs
                                                    }
                                                    f = furl(
                                                        AMAZON_SMILE_BASE_URL)
                                                    f.set(path=captcha_form.
                                                          attrib['action'])
                                                    f.add(args=args)
                                                    start_time = time.time()
                                                    r = s.get(f.url)
                                                    self.logger.debug(
                                                        f'validate captcha request took {calc_time_delta(start_time)} ms'
                                                    )
                                                    self.logger.debug(
                                                        f'validate captcha request returned status code {r.status_code}'
                                                    )

                                                    self.check_out(r.text, s)
                                                # no captcha solution
                                                else:
                                                    self.logger.warning(
                                                        'could not solve captcha'
                                                    )
                                            # no captcha
                                            else:
                                                self.check_out(r.text, s)
                                        # no stock
                                        else:
                                            self.logger.warning(
                                                'could not cart')

                                    # check for timeout
                                    if timeout_time - time.time() < 0:
                                        self.logger.info(
                                            'timed out trying to buy')
                                        break

                                    sleep_time_left(next_time_buy)
            except Exception as e:
                self.logger.error(e)

            sleep_time_left(next_time_monitor)
Ejemplo n.º 6
0
 def do_GET(self):
     self._set_response()
     captcha = AmazonCaptcha.fromlink(self.path[1:])
     solution = captcha.solve()
     self.wfile.write(solution.encode('utf-8'))
Ejemplo n.º 7
0
import requests
import lxml
from bs4 import BeautifulSoup
from amazoncaptcha import AmazonCaptcha

amazon_url = "https://www.amazon.com/Instant-Pot-Duo-Evo-Plus/dp/B07W55DDFB/ref=sr_1_1?qid=1597662463"

amz_headers = {
    "Accept":
    "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Encoding": "gzip, deflate",
    "User-Agent":
    "Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0",
    "Accept-Language": "en-US,en;q=0.5"
}

response = requests.get(amazon_url, headers=amz_headers)
captcha_webpage = response.text
soup = BeautifulSoup(captcha_webpage, "lxml")
captcha_link = soup.select(".a-text-center>img")[0].get("src")
print(captcha_link)
captcha = AmazonCaptcha.fromlink(captcha_link)
solution = captcha.solve()
print(solution)
# <div class="a-row a-text-center">
# <img src="https://images-na.ssl-images-amazon.com/captcha/twhhswbk/Captcha_fwgzoazcal.jpg">
# </div>

#print(response.text)