def __init__(self, store_uri=config.STORE_URI, connect=True): ImagesPipeline.__init__(self, store_uri) self.kafka = None if connect: self.initES() self.initKafka() self.initRedis()
def test_different_settings_for_different_instances(self): """ If there are two instances of ImagesPipeline class with different settings, they should have different settings. """ custom_settings = self._generate_fake_settings() default_settings = Settings() default_sts_pipe = ImagesPipeline(self.tempdir, settings=default_settings) user_sts_pipe = ImagesPipeline.from_settings(Settings(custom_settings)) for pipe_attr, settings_attr in self.img_cls_attribute_names: expected_default_value = self.default_pipeline_settings.get(pipe_attr) custom_value = custom_settings.get(settings_attr) self.assertNotEqual(expected_default_value, custom_value) self.assertEqual(getattr(default_sts_pipe, pipe_attr.lower()), expected_default_value) self.assertEqual(getattr(user_sts_pipe, pipe_attr.lower()), custom_value)
def test_images_result_field(self): another_pipeline = ImagesPipeline.from_settings( Settings({ 'IMAGES_STORE': self.tempdir, 'IMAGES_RESULT_FIELD': 'funny_field' })) self.assertEqual(self.pipeline.images_result_field, self.default_settings.get('IMAGES_RESULT_FIELD')) self.assertEqual(another_pipeline.images_result_field, 'funny_field')
def test_min_width(self): another_pipeline = ImagesPipeline.from_settings( Settings({ 'IMAGES_STORE': self.tempdir, 'IMAGES_MIN_WIDTH': 42 })) self.assertEqual(self.pipeline.min_width, self.default_settings.getint('IMAGES_MIN_WIDTH')) self.assertEqual(another_pipeline.min_width, 42)
def process_item(self, item, spider): if item.get('image_urls'): urls = item['image_urls'] item['image_urls'] = [ url for url in urls if not url.endswith(".gif") ] return ImagesPipeline.process_item(self, item, spider) else: raise DropItem("Invalid Item")
def test_expires(self): another_pipeline = ImagesPipeline.from_settings( Settings({ 'IMAGES_STORE': self.tempdir, 'IMAGES_EXPIRES': 42 })) self.assertEqual(self.pipeline.expires, self.default_settings.getint('IMAGES_EXPIRES')) self.assertEqual(another_pipeline.expires, 42)
def test_min_height(self): another_pipeline = ImagesPipeline.from_settings( Settings({ 'IMAGES_STORE': self.tempdir, 'IMAGES_MIN_HEIGHT': 42 })) self.assertEqual(self.pipeline.min_height, self.default_settings.getint('IMAGES_MIN_HEIGHT')) self.assertEqual(another_pipeline.min_height, 42)
def test_thumbs(self): custom_thumbs = {'small': (50, 50), 'big': (270, 270)} another_pipeline = ImagesPipeline.from_settings( Settings({ 'IMAGES_STORE': self.tempdir, 'IMAGES_THUMBS': custom_thumbs })) self.assertEqual(self.pipeline.thumbs, self.default_settings.get('IMAGES_THUMBS')) self.assertEqual(another_pipeline.thumbs, custom_thumbs)
def test_item_fields_default(self): url = 'http://www.example.com/images/1.jpg' item = self.item_class(name='item1', image_urls=[url]) pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': 's3://example/images/'})) requests = list(pipeline.get_media_requests(item, None)) self.assertEqual(requests[0].url, url) results = [(True, {'url': url})] item = pipeline.item_completed(results, item, None) images = ItemAdapter(item).get("images") self.assertEqual(images, [results[0][1]]) self.assertIsInstance(item, self.item_class)
def file_path(self, request, response=None, info=None): # проверяем поле , встравленно специльно чтобы хранить артикул в meta запроса if self.get_article_meta(info) in request.meta: # в лерой сам путь к картинке вроде хранит артикул - но вдруг они что то поменяют и тогда облом # создаем более универсальный вариант. Хотя мне он не очень нравится. но пока лучше не нашел..... # сохраняем картики в папке <артикулТовара>/<исходноеИмяКартинки> targetfile = request.meta[self.get_article_meta( info)] + '/' + request.url.split('/')[-1] else: targetfile = ImagesPipeline.file_path(self, request, response, info) return targetfile
def test_item_fields_default(self): class TestItem(Item): name = Field() image_urls = Field() images = Field() for cls in TestItem, dict: url = 'http://www.example.com/images/1.jpg' item = cls({'name': 'item1', 'image_urls': [url]}) pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': 's3://example/images/'})) requests = list(pipeline.get_media_requests(item, None)) self.assertEqual(requests[0].url, url) results = [(True, {'url': url})] pipeline.item_completed(results, item, None) self.assertEqual(item['images'], [results[0][1]])
def test_expires(self): another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir, 'IMAGES_EXPIRES': 42})) self.assertEqual(self.pipeline.expires, self.default_settings.getint('IMAGES_EXPIRES')) self.assertEqual(another_pipeline.expires, 42)
class ImagesPipelineTestCase(unittest.TestCase): skip = skip_pillow def setUp(self): self.tempdir = mkdtemp() self.pipeline = ImagesPipeline(self.tempdir, download_func=_mocked_download_func) def tearDown(self): rmtree(self.tempdir) def test_file_path(self): file_path = self.pipeline.file_path self.assertEqual( file_path(Request("https://dev.mydeco.com/mydeco.gif")), 'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg') self.assertEqual( file_path( Request( "http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.jpg" )), 'full/0ffcd85d563bca45e2f90becd0ca737bc58a00b2.jpg') self.assertEqual( file_path( Request( "https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.gif" )), 'full/b250e3a74fff2e4703e310048a5b13eba79379d2.jpg') self.assertEqual( file_path( Request( "http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg" )), 'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg') self.assertEqual( file_path( Request( "http://www.dorma.co.uk/images/product_details/2532/")), 'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2.jpg') self.assertEqual( file_path( Request("http://www.dorma.co.uk/images/product_details/2532")), 'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg') self.assertEqual( file_path( Request("http://www.dorma.co.uk/images/product_details/2532"), response=Response( "http://www.dorma.co.uk/images/product_details/2532"), info=object()), 'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg') def test_thumbnail_name(self): thumb_path = self.pipeline.thumb_path name = '50' self.assertEqual( thumb_path(Request("file:///tmp/foo.jpg"), name), 'thumbs/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg') self.assertEqual( thumb_path(Request("file://foo.png"), name), 'thumbs/50/e55b765eba0ec7348e50a1df496040449071b96a.jpg') self.assertEqual( thumb_path(Request("file:///tmp/foo"), name), 'thumbs/50/0329ad83ebb8e93ea7c7906d46e9ed55f7349a50.jpg') self.assertEqual( thumb_path(Request("file:///tmp/some.name/foo"), name), 'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg') self.assertEqual( thumb_path(Request("file:///tmp/some.name/foo"), name, response=Response("file:///tmp/some.name/foo"), info=object()), 'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg') def test_invalid_image_downloaded(self): image_downloaded = self.pipeline.image_downloaded self.assertEqual( image_downloaded( response=Response('https://scrapy.org/img/scrapylogo.png'), request=Request('https://scrapy.org/img/scrapylogo.png'), info=object()), 'f4c913a7bbd3b8edf0a1433a1655ab16') self.assertEqual( image_downloaded( response=Response( 'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A' ), request=Request( 'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A' ), info=object()), 'None') self.assertEqual( image_downloaded( response=Response( 'http://racedata.gr/wp-content/uploads/2017/01/man3-252x300.jpg' ), request=Request( 'http://racedata.gr/wp-content/uploads/2017/01/man3-252x300.jpg' ), info=object()), '95dd6a450ecfefbd5916635c5dc11a03') def test_logger_image_downloaded(self): image_downloaded = self.pipeline.image_downloaded with self.assertLogs() as cm: image_downloaded( response=Response( 'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A' ), request=Request( 'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A' ), info=object()) self.assertEqual('INFO:', __name__, 'Could not process image', cm.output) with self.assertRaises(OSError): image_downloaded( response=Response( 'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A' ), request=Request( 'https://img01.mgo-images.com/image/thumbnail?id=MSRE18FBB98AF3BD2EF21EF2283708829B8A' ), info=object()) def test_convert_image(self): SIZE = (100, 100) # straigh forward case: RGB and JPEG COLOUR = (0, 127, 255) im = _create_image('JPEG', 'RGB', SIZE, COLOUR) converted, _ = self.pipeline.convert_image(im) self.assertEqual(converted.mode, 'RGB') self.assertEqual(converted.getcolors(), [(10000, COLOUR)]) # check that thumbnail keep image ratio thumbnail, _ = self.pipeline.convert_image(converted, size=(10, 25)) self.assertEqual(thumbnail.mode, 'RGB') self.assertEqual(thumbnail.size, (10, 10)) # transparency case: RGBA and PNG COLOUR = (0, 127, 255, 50) im = _create_image('PNG', 'RGBA', SIZE, COLOUR) converted, _ = self.pipeline.convert_image(im) self.assertEqual(converted.mode, 'RGB') self.assertEqual(converted.getcolors(), [(10000, (205, 230, 255))]) # transparency case with palette: P and PNG COLOUR = (0, 127, 255, 50) im = _create_image('PNG', 'RGBA', SIZE, COLOUR) im = im.convert('P') converted, _ = self.pipeline.convert_image(im) self.assertEqual(converted.mode, 'RGB') self.assertEqual(converted.getcolors(), [(10000, (205, 230, 255))])
class ImagesPipelineTestCase(unittest.TestCase): skip = skip def setUp(self): self.tempdir = mkdtemp() self.pipeline = ImagesPipeline(self.tempdir, download_func=_mocked_download_func) def tearDown(self): rmtree(self.tempdir) def test_file_path(self): file_path = self.pipeline.file_path self.assertEqual(file_path(Request("https://dev.mydeco.com/mydeco.gif")), 'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg') self.assertEqual(file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.jpg")), 'full/0ffcd85d563bca45e2f90becd0ca737bc58a00b2.jpg') self.assertEqual(file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.gif")), 'full/b250e3a74fff2e4703e310048a5b13eba79379d2.jpg') self.assertEqual(file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")), 'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg') self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")), 'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2.jpg') self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532")), 'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg') self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532"), response=Response("http://www.dorma.co.uk/images/product_details/2532"), info=object()), 'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg') def test_thumbnail_name(self): thumb_path = self.pipeline.thumb_path name = '50' self.assertEqual(thumb_path(Request("file:///tmp/foo.jpg"), name), 'thumbs/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg') self.assertEqual(thumb_path(Request("file://foo.png"), name), 'thumbs/50/e55b765eba0ec7348e50a1df496040449071b96a.jpg') self.assertEqual(thumb_path(Request("file:///tmp/foo"), name), 'thumbs/50/0329ad83ebb8e93ea7c7906d46e9ed55f7349a50.jpg') self.assertEqual(thumb_path(Request("file:///tmp/some.name/foo"), name), 'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg') self.assertEqual(thumb_path(Request("file:///tmp/some.name/foo"), name, response=Response("file:///tmp/some.name/foo"), info=object()), 'thumbs/50/850233df65a5b83361798f532f1fc549cd13cbe9.jpg') def test_convert_image(self): SIZE = (100, 100) # straigh forward case: RGB and JPEG COLOUR = (0, 127, 255) im = _create_image('JPEG', 'RGB', SIZE, COLOUR) converted, _ = self.pipeline.convert_image(im) self.assertEquals(converted.mode, 'RGB') self.assertEquals(converted.getcolors(), [(10000, COLOUR)]) # check that thumbnail keep image ratio thumbnail, _ = self.pipeline.convert_image(converted, size=(10, 25)) self.assertEquals(thumbnail.mode, 'RGB') self.assertEquals(thumbnail.size, (10, 10)) # transparency case: RGBA and PNG COLOUR = (0, 127, 255, 50) im = _create_image('PNG', 'RGBA', SIZE, COLOUR) converted, _ = self.pipeline.convert_image(im) self.assertEquals(converted.mode, 'RGB') self.assertEquals(converted.getcolors(), [(10000, (205, 230, 255))])
def setUp(self): self.tempdir = mkdtemp() self.pipeline = ImagesPipeline(self.tempdir) self.default_settings = Settings()
def test_thumbs(self): custom_thumbs = {'small': (50, 50), 'big': (270, 270)} another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir, 'IMAGES_THUMBS': custom_thumbs})) self.assertEqual(self.pipeline.thumbs, self.default_settings.get('IMAGES_THUMBS')) self.assertEqual(another_pipeline.thumbs, custom_thumbs)
def test_min_height(self): another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir, 'IMAGES_MIN_HEIGHT': 42})) self.assertEqual(self.pipeline.min_height, self.default_settings.getint('IMAGES_MIN_HEIGHT')) self.assertEqual(another_pipeline.min_height, 42)
def test_min_width(self): another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir, 'IMAGES_MIN_WIDTH': 42})) self.assertEqual(self.pipeline.min_width, self.default_settings.getint('IMAGES_MIN_WIDTH')) self.assertEqual(another_pipeline.min_width, 42)
def test_images_result_field(self): another_pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': self.tempdir, 'IMAGES_RESULT_FIELD': 'funny_field'})) self.assertEqual(self.pipeline.images_result_field, self.default_settings.get('IMAGES_RESULT_FIELD')) self.assertEqual(another_pipeline.images_result_field, 'funny_field')
def setUp(self): self.tempdir = mkdtemp() self.pipeline = ImagesPipeline(self.tempdir, download_func=_mocked_download_func)
def process_item(self, item, spider): self.spider = spider return ImagesPipeline.process_item(self, item, spider)