Beispiel #1
0
 def __init__(self, store_uri=config.STORE_URI, connect=True):
     ImagesPipeline.__init__(self, store_uri)
     self.kafka = None
     if connect:
         self.initES()
         self.initKafka()
         self.initRedis()
Beispiel #2
0
 def test_different_settings_for_different_instances(self):
     """
     If there are two instances of ImagesPipeline class with different settings, they should
     have different settings.
     """
     custom_settings = self._generate_fake_settings()
     default_settings = Settings()
     default_sts_pipe = ImagesPipeline(self.tempdir, settings=default_settings)
     user_sts_pipe = ImagesPipeline.from_settings(Settings(custom_settings))
     for pipe_attr, settings_attr in self.img_cls_attribute_names:
         expected_default_value = self.default_pipeline_settings.get(pipe_attr)
         custom_value = custom_settings.get(settings_attr)
         self.assertNotEqual(expected_default_value, custom_value)
         self.assertEqual(getattr(default_sts_pipe, pipe_attr.lower()), expected_default_value)
         self.assertEqual(getattr(user_sts_pipe, pipe_attr.lower()), custom_value)
Beispiel #3
0
 def test_images_result_field(self):
     another_pipeline = ImagesPipeline.from_settings(
         Settings({
             'IMAGES_STORE': self.tempdir,
             'IMAGES_RESULT_FIELD': 'funny_field'
         }))
     self.assertEqual(self.pipeline.images_result_field,
                      self.default_settings.get('IMAGES_RESULT_FIELD'))
     self.assertEqual(another_pipeline.images_result_field, 'funny_field')
Beispiel #4
0
 def test_min_width(self):
     another_pipeline = ImagesPipeline.from_settings(
         Settings({
             'IMAGES_STORE': self.tempdir,
             'IMAGES_MIN_WIDTH': 42
         }))
     self.assertEqual(self.pipeline.min_width,
                      self.default_settings.getint('IMAGES_MIN_WIDTH'))
     self.assertEqual(another_pipeline.min_width, 42)
Beispiel #5
0
 def process_item(self, item, spider):
     if item.get('image_urls'):
         urls = item['image_urls']
         item['image_urls'] = [
             url for url in urls if not url.endswith(".gif")
         ]
         return ImagesPipeline.process_item(self, item, spider)
     else:
         raise DropItem("Invalid Item")
Beispiel #6
0
 def test_expires(self):
     another_pipeline = ImagesPipeline.from_settings(
         Settings({
             'IMAGES_STORE': self.tempdir,
             'IMAGES_EXPIRES': 42
         }))
     self.assertEqual(self.pipeline.expires,
                      self.default_settings.getint('IMAGES_EXPIRES'))
     self.assertEqual(another_pipeline.expires, 42)
Beispiel #7
0
 def test_min_height(self):
     another_pipeline = ImagesPipeline.from_settings(
         Settings({
             'IMAGES_STORE': self.tempdir,
             'IMAGES_MIN_HEIGHT': 42
         }))
     self.assertEqual(self.pipeline.min_height,
                      self.default_settings.getint('IMAGES_MIN_HEIGHT'))
     self.assertEqual(another_pipeline.min_height, 42)
Beispiel #8
0
 def test_thumbs(self):
     custom_thumbs = {'small': (50, 50), 'big': (270, 270)}
     another_pipeline = ImagesPipeline.from_settings(
         Settings({
             'IMAGES_STORE': self.tempdir,
             'IMAGES_THUMBS': custom_thumbs
         }))
     self.assertEqual(self.pipeline.thumbs,
                      self.default_settings.get('IMAGES_THUMBS'))
     self.assertEqual(another_pipeline.thumbs, custom_thumbs)
 def test_item_fields_default(self):
     url = 'http://www.example.com/images/1.jpg'
     item = self.item_class(name='item1', image_urls=[url])
     pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': 's3://example/images/'}))
     requests = list(pipeline.get_media_requests(item, None))
     self.assertEqual(requests[0].url, url)
     results = [(True, {'url': url})]
     item = pipeline.item_completed(results, item, None)
     images = ItemAdapter(item).get("images")
     self.assertEqual(images, [results[0][1]])
     self.assertIsInstance(item, self.item_class)
Beispiel #10
0
 def file_path(self, request, response=None, info=None):
     # проверяем поле , встравленно специльно чтобы хранить артикул в meta запроса
     if self.get_article_meta(info) in request.meta:
         # в лерой сам путь к картинке вроде хранит артикул - но вдруг они что то поменяют и тогда облом
         # создаем более универсальный вариант. Хотя мне он не очень нравится. но пока лучше не нашел.....
         # сохраняем картики в папке  <артикулТовара>/<исходноеИмяКартинки>
         targetfile = request.meta[self.get_article_meta(
             info)] + '/' + request.url.split('/')[-1]
     else:
         targetfile = ImagesPipeline.file_path(self, request, response,
                                               info)
     return targetfile
Beispiel #11
0
    def test_item_fields_default(self):
        class TestItem(Item):
            name = Field()
            image_urls = Field()
            images = Field()

        for cls in TestItem, dict:
            url = 'http://www.example.com/images/1.jpg'
            item = cls({'name': 'item1', 'image_urls': [url]})
            pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': 's3://example/images/'}))
            requests = list(pipeline.get_media_requests(item, None))
            self.assertEqual(requests[0].url, url)
            results = [(True, {'url': url})]
            pipeline.item_completed(results, item, None)
            self.assertEqual(item['images'], [results[0][1]])
Beispiel #12
0
 def test_expires(self):
     another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir,
                                                             'IMAGES_EXPIRES': 42}))
     self.assertEqual(self.pipeline.expires, self.default_settings.getint('IMAGES_EXPIRES'))
     self.assertEqual(another_pipeline.expires, 42)
Beispiel #13
0
class ImagesPipelineTestCase(unittest.TestCase):

    skip = skip_pillow

    def setUp(self):
        self.tempdir = mkdtemp()
        self.pipeline = ImagesPipeline(self.tempdir,
                                       download_func=_mocked_download_func)

    def tearDown(self):
        rmtree(self.tempdir)

    def test_file_path(self):
        file_path = self.pipeline.file_path
        self.assertEqual(
            file_path(Request("https://dev.mydeco.com/mydeco.gif")),
            'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
        self.assertEqual(
            file_path(
                Request(
                    "http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.jpg"
                )), 'full/0ffcd85d563bca45e2f90becd0ca737bc58a00b2.jpg')
        self.assertEqual(
            file_path(
                Request(
                    "https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.gif"
                )), 'full/b250e3a74fff2e4703e310048a5b13eba79379d2.jpg')
        self.assertEqual(
            file_path(
                Request(
                    "http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg"
                )), 'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
        self.assertEqual(
            file_path(
                Request(
                    "http://www.dorma.co.uk/images/product_details/2532/")),
            'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2.jpg')
        self.assertEqual(
            file_path(
                Request("http://www.dorma.co.uk/images/product_details/2532")),
            'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
        self.assertEqual(
            file_path(
                Request("http://www.dorma.co.uk/images/product_details/2532"),
                response=Response(
                    "http://www.dorma.co.uk/images/product_details/2532"),
                info=object()),
            'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')

    def test_thumbnail_name(self):
        thumb_path = self.pipeline.thumb_path
        name = '50'
        self.assertEqual(
            thumb_path(Request("file:///tmp/foo.jpg"), name),
            'thumbs/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg')
        self.assertEqual(
            thumb_path(Request("file://foo.png"), name),
            'thumbs/50/e55b765eba0ec7348e50a1df496040449071b96a.jpg')
        self.assertEqual(
            thumb_path(Request("file:///tmp/foo"), name),
            'thumbs/50/0329ad83ebb8e93ea7c7906d46e9ed55f7349a50.jpg')
        self.assertEqual(
            thumb_path(Request("file:///tmp/some.name/foo"), name),
            'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg')
        self.assertEqual(
            thumb_path(Request("file:///tmp/some.name/foo"),
                       name,
                       response=Response("file:///tmp/some.name/foo"),
                       info=object()),
            'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg')

    def test_invalid_image_downloaded(self):
        image_downloaded = self.pipeline.image_downloaded
        self.assertEqual(
            image_downloaded(
                response=Response('https://scrapy.org/img/scrapylogo.png'),
                request=Request('https://scrapy.org/img/scrapylogo.png'),
                info=object()), 'f4c913a7bbd3b8edf0a1433a1655ab16')
        self.assertEqual(
            image_downloaded(
                response=Response(
                    'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A'
                ),
                request=Request(
                    'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A'
                ),
                info=object()), 'None')
        self.assertEqual(
            image_downloaded(
                response=Response(
                    'http://racedata.gr/wp-content/uploads/2017/01/man3-252x300.jpg'
                ),
                request=Request(
                    'http://racedata.gr/wp-content/uploads/2017/01/man3-252x300.jpg'
                ),
                info=object()), '95dd6a450ecfefbd5916635c5dc11a03')

    def test_logger_image_downloaded(self):
        image_downloaded = self.pipeline.image_downloaded
        with self.assertLogs() as cm:
            image_downloaded(
                response=Response(
                    'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A'
                ),
                request=Request(
                    'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A'
                ),
                info=object())
            self.assertEqual('INFO:', __name__, 'Could not process image',
                             cm.output)

        with self.assertRaises(OSError):
            image_downloaded(
                response=Response(
                    'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A'
                ),
                request=Request(
                    'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A'
                ),
                info=object())

    def test_convert_image(self):
        SIZE = (100, 100)
        # straigh forward case: RGB and JPEG
        COLOUR = (0, 127, 255)
        im = _create_image('JPEG', 'RGB', SIZE, COLOUR)
        converted, _ = self.pipeline.convert_image(im)
        self.assertEqual(converted.mode, 'RGB')
        self.assertEqual(converted.getcolors(), [(10000, COLOUR)])

        # check that thumbnail keep image ratio
        thumbnail, _ = self.pipeline.convert_image(converted, size=(10, 25))
        self.assertEqual(thumbnail.mode, 'RGB')
        self.assertEqual(thumbnail.size, (10, 10))

        # transparency case: RGBA and PNG
        COLOUR = (0, 127, 255, 50)
        im = _create_image('PNG', 'RGBA', SIZE, COLOUR)
        converted, _ = self.pipeline.convert_image(im)
        self.assertEqual(converted.mode, 'RGB')
        self.assertEqual(converted.getcolors(), [(10000, (205, 230, 255))])

        # transparency case with palette: P and PNG
        COLOUR = (0, 127, 255, 50)
        im = _create_image('PNG', 'RGBA', SIZE, COLOUR)
        im = im.convert('P')
        converted, _ = self.pipeline.convert_image(im)
        self.assertEqual(converted.mode, 'RGB')
        self.assertEqual(converted.getcolors(), [(10000, (205, 230, 255))])
Beispiel #14
0
class ImagesPipelineTestCase(unittest.TestCase):

    skip = skip

    def setUp(self):
        self.tempdir = mkdtemp()
        self.pipeline = ImagesPipeline(self.tempdir, download_func=_mocked_download_func)

    def tearDown(self):
        rmtree(self.tempdir)

    def test_file_path(self):
        file_path = self.pipeline.file_path
        self.assertEqual(file_path(Request("https://dev.mydeco.com/mydeco.gif")),
                         'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
        self.assertEqual(file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.jpg")),
                         'full/0ffcd85d563bca45e2f90becd0ca737bc58a00b2.jpg')
        self.assertEqual(file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.gif")),
                         'full/b250e3a74fff2e4703e310048a5b13eba79379d2.jpg')
        self.assertEqual(file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
                         'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
                         'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2.jpg')
        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
                         'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
                                   response=Response("http://www.dorma.co.uk/images/product_details/2532"),
                                   info=object()),
                         'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')

    def test_thumbnail_name(self):
        thumb_path = self.pipeline.thumb_path
        name = '50'
        self.assertEqual(thumb_path(Request("file:///tmp/foo.jpg"), name),
                         'thumbs/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg')
        self.assertEqual(thumb_path(Request("file://foo.png"), name),
                         'thumbs/50/e55b765eba0ec7348e50a1df496040449071b96a.jpg')
        self.assertEqual(thumb_path(Request("file:///tmp/foo"), name),
                         'thumbs/50/0329ad83ebb8e93ea7c7906d46e9ed55f7349a50.jpg')
        self.assertEqual(thumb_path(Request("file:///tmp/some.name/foo"), name),
                         'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg')
        self.assertEqual(thumb_path(Request("file:///tmp/some.name/foo"), name,
                                    response=Response("file:///tmp/some.name/foo"),
                                    info=object()),
                         'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg')

    def test_convert_image(self):
        SIZE = (100, 100)
        # straigh forward case: RGB and JPEG
        COLOUR = (0, 127, 255)
        im = _create_image('JPEG', 'RGB', SIZE, COLOUR)
        converted, _ = self.pipeline.convert_image(im)
        self.assertEquals(converted.mode, 'RGB')
        self.assertEquals(converted.getcolors(), [(10000, COLOUR)])

        # check that thumbnail keep image ratio
        thumbnail, _ = self.pipeline.convert_image(converted, size=(10, 25))
        self.assertEquals(thumbnail.mode, 'RGB')
        self.assertEquals(thumbnail.size, (10, 10))

        # transparency case: RGBA and PNG
        COLOUR = (0, 127, 255, 50)
        im = _create_image('PNG', 'RGBA', SIZE, COLOUR)
        converted, _ = self.pipeline.convert_image(im)
        self.assertEquals(converted.mode, 'RGB')
        self.assertEquals(converted.getcolors(), [(10000, (205, 230, 255))])
Beispiel #15
0
 def setUp(self):
     self.tempdir = mkdtemp()
     self.pipeline = ImagesPipeline(self.tempdir)
     self.default_settings = Settings()
Beispiel #16
0
 def test_thumbs(self):
     custom_thumbs = {'small': (50, 50), 'big': (270, 270)}
     another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir,
                                                             'IMAGES_THUMBS': custom_thumbs}))
     self.assertEqual(self.pipeline.thumbs, self.default_settings.get('IMAGES_THUMBS'))
     self.assertEqual(another_pipeline.thumbs, custom_thumbs)
Beispiel #17
0
 def test_min_height(self):
     another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir,
                                                             'IMAGES_MIN_HEIGHT': 42}))
     self.assertEqual(self.pipeline.min_height, self.default_settings.getint('IMAGES_MIN_HEIGHT'))
     self.assertEqual(another_pipeline.min_height, 42)
Beispiel #18
0
 def test_min_width(self):
     another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir,
                                                             'IMAGES_MIN_WIDTH': 42}))
     self.assertEqual(self.pipeline.min_width, self.default_settings.getint('IMAGES_MIN_WIDTH'))
     self.assertEqual(another_pipeline.min_width, 42)
Beispiel #19
0
 def test_images_result_field(self):
     another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir,
                                                             'IMAGES_RESULT_FIELD': 'funny_field'}))
     self.assertEqual(self.pipeline.images_result_field, self.default_settings.get('IMAGES_RESULT_FIELD'))
     self.assertEqual(another_pipeline.images_result_field, 'funny_field')
Beispiel #20
0
 def setUp(self):
     self.tempdir = mkdtemp()
     self.pipeline = ImagesPipeline(self.tempdir, download_func=_mocked_download_func)
Beispiel #21
0
 def process_item(self, item, spider):
     self.spider = spider
     return ImagesPipeline.process_item(self, item, spider)