Beispiel #1
0
 def test_amazon_scraper(self):
     '''Test get_item_info for AmazonScraper
     '''
     scraper = Scraper()
     scraper = scraper.get_scraper('www.amazon.com')
     item = scraper.get_item_info('http://www.amazon.com/gp/product/B002P8T0L0/ref=s9_simh_gw_p23_d0_g23_i1?pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0WQ1VFHRSY7ZTB93FGYG&pf_rd_t=101&pf_rd_p=470938631&pf_rd_i=507846','http://ecx.images-amazon.com/images/I/31hak2cSIOL.jpg')
     self.assertEqual(item.price, 75.99)
     self.assertEqual(item.currency_code, '$')
     self.assertEqual(item.user_interaction.likes, 42)
     self.assertEqual(item.quantity.new, 5)
     self.assertEqual(item.details.discount.value, 43.96)
Beispiel #2
0
 def test_amazon_scraper(self):
     '''Test get_item_info for AmazonScraper
     '''
     scraper = Scraper()
     scraper = scraper.get_scraper('www.amazon.com')
     item = scraper.get_item_info(
         'http://www.amazon.com/gp/product/B002P8T0L0/ref=s9_simh_gw_p23_d0_g23_i1?pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0WQ1VFHRSY7ZTB93FGYG&pf_rd_t=101&pf_rd_p=470938631&pf_rd_i=507846',
         'http://ecx.images-amazon.com/images/I/31hak2cSIOL.jpg')
     self.assertEqual(item.price, 75.99)
     self.assertEqual(item.currency_code, '$')
     self.assertEqual(item.user_interaction.likes, 42)
     self.assertEqual(item.quantity.new, 5)
     self.assertEqual(item.details.discount.value, 43.96)
Beispiel #3
0
def _pinscraperow(row, row_num):
    scraper = Scraper()
    url = row[0].strip()
    img_url = row[1].strip()
    dir_name = urllib.parse.quote_plus(url)
    mkdir(dir_name)
    download_image(img_url, dir_name)
    domain = get_domain(url)
    scraper = scraper.get_scraper(domain)
    if (scraper):
        print("Getting information from {0}... ".format(domain))
        content = scraper.get_item_info(url, img_url)
        if (content):
            json_dump_to_file('{0}/info.json'.format(dir_name), content)
        else:
            write_to_file('{0}/not_found.txt'.format(dir_name), 'w',
                          'The url at {0} was not found'.format(url))
        return True
    else:
        return domain