예제 #1
0
    def parse(self, response):
        self.log('Scrapping: {}({})'.format(self.brand, response.url))
        images = response.css('.models > div.col-4 > a > img::attr(src)').extract()
        print('parsing')

        for img_url in images:
            # skip empty image
            if img_url.endswith('no-image-170x113.jpg'):
                self.log('Skipping empty image: {}'.format(img_url))
                continue

            # skip image if it already in DB
            try:
                Image.objects.get(pk=img_url)
                self.log('Image already exists: {}'.format(img_url))
                continue
            except Image.DoesNotExist:
                pass

            img = Image(url=img_url,
                        is_car=True,
                        test_set=random.randint(1, 100) < 10,
                        brand=self.brand)
            
            img.save()
            print('saved: ', img_url)

            self.log('New image saved: {}'.format(img_url))

            time.sleep(5)