def test_get_review_identifier_from_url(self):
        url = "http://www.amazon.com/review/RRSU6FI3Y6D5I/ref=cm_cr_pr_viewpnt#RRSU6FI3Y6D5I"
        self.assertEqual(AmazonUrlCreator.get_review_identifier_from_url(url),
                         'RRSU6FI3Y6D5I')

        url = "http://www.amazon.co.uk/review/R3E56QODZNNIW4/ref=cm_cr_pr_perm?ie=UTF8&ASIN=0735619670"
        self.assertEqual(AmazonUrlCreator.get_review_identifier_from_url(url),
                         'R3E56QODZNNIW4')
    def test_domain_name_from_url(self):
        url = "http://www.amazon.com/gp/product/B001OW7JT8/ref=s9_psimh_gw_p201_d1_i2?pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0QB7BBXPS3J4660YQN0X&pf_rd_t=101&pf_rd_p=1688200382&pf_rd_i=507846"
        self.assertEqual(AmazonUrlCreator.get_domain_from_url(url),
                         'www.amazon.com')

        url = "http://www.amazon.co.uk/gp/product/B001UHOQ98/ref=s9_simh_gw_p200_d17_i3?pf_rd_m=A3P5ROKL5A1OLE&pf_rd_s=center-2&pf_rd_r=0XD4CJ7P35531QB9PRQM&pf_rd_t=101&pf_rd_p=455345507&pf_rd_i=468294"
        self.assertEqual(AmazonUrlCreator.get_domain_from_url(url),
                         'www.amazon.co.uk')
Example #3
0
    def test_process_next_step_redirects_to_next_search(self):
        engine = MockEngine()
        crawler = MockCrawler(engine)

        class TestSpider(BaseAmazonSpider):
            name = "Test Spider"
            type = 'asins'

            def get_asins_generator(self):
                for text in ["BA123", "BA456"]:
                    item = {'asin': text, 'sku': text.lower()}
                    yield item['asin'], item['sku']

        spider = TestSpider('amazon.com')
        spider.crawler = crawler

        spider.start_requests()
        spider.collected_items = []

        self.assertRaises(DontCloseSpider, spider.process_next_step, spider)
        request = engine.last_request

        self.assertEqual(engine.crawl_called, 1)

        # check redirects to home page
        self.assertIs(spider, engine.last_spider)
        self.assertEqual(
            request.url,
            AmazonUrlCreator.build_url_from_asin('amazon.com', "BA456"))
        self.assertTrue(request.dont_filter)
        self.assertEqual(request.meta['search_item']['asin'], "BA456")
        self.assertEqual(request.meta['search_item']['sku'], "BA456".lower())

        # check callback yields items
        self.assertEqual(request.callback, spider.parse_product)
Example #4
0
    def build_url_amazon_direct(domain, url):
        domain = AmazonUrlCreator._fix_domain(domain)

        if '.com' in domain:
            amazon_id = 'ATVPDKIKX0DER'
        elif '.co.uk' in domain:
            amazon_id = 'A3P5ROKL5A1OLE'
        elif '.fr' in domain:
            amazon_id = 'A1X6FK5RDHNB96'
        elif '.it' in domain:
            amazon_id = 'A11IL2PNWYJU7H'
        elif '.de' in domain:
            amazon_id = 'A3JWKAKR8XB7XF'
        elif '.ca' in domain:
            amazon_id = 'A3DWYIK6Y9EEQB'
        elif '.es' in domain:
            amazon_id = 'A1AT7YVPFBWXBL'
        else:
            raise AmazonUrlCreatorException('Domain %s not found!' % domain)

        url = add_or_replace_parameter(url, 'm', amazon_id)
        return url
 def test_build_url_from_asin(self):
     domain = 'amazon.com'
     asin = '123456'
     expected_url = 'http://www.amazon.com/gp/product/123456/?ref=twister_dp_update&ie=UTF8&psc=1'
     self.assertEqual(AmazonUrlCreator.build_url_from_asin(domain, asin),
                      expected_url)