def test_parse_credentials(self): try: import boto # noqa: F401 except ImportError: raise unittest.SkipTest("S3FeedStorage requires boto") aws_credentials = {'AWS_ACCESS_KEY_ID': 'settings_key', 'AWS_SECRET_ACCESS_KEY': 'settings_secret'} crawler = get_crawler(settings_dict=aws_credentials) # Instantiate with crawler storage = S3FeedStorage.from_crawler(crawler, 's3://mybucket/export.csv') self.assertEqual(storage.access_key, 'settings_key') self.assertEqual(storage.secret_key, 'settings_secret') # Instantiate directly storage = S3FeedStorage('s3://mybucket/export.csv', aws_credentials['AWS_ACCESS_KEY_ID'], aws_credentials['AWS_SECRET_ACCESS_KEY']) self.assertEqual(storage.access_key, 'settings_key') self.assertEqual(storage.secret_key, 'settings_secret') # URI priority > settings priority storage = S3FeedStorage('s3://uri_key:uri_secret@mybucket/export.csv', aws_credentials['AWS_ACCESS_KEY_ID'], aws_credentials['AWS_SECRET_ACCESS_KEY']) self.assertEqual(storage.access_key, 'uri_key') self.assertEqual(storage.secret_key, 'uri_secret') # Backward compatibility for initialising without settings with warnings.catch_warnings(record=True) as w: storage = S3FeedStorage('s3://mybucket/export.csv') self.assertEqual(storage.access_key, 'conf_key') self.assertEqual(storage.secret_key, 'conf_secret') self.assertTrue('without AWS keys' in str(w[-1].message))
def test_parse_credentials(self): try: import boto except ImportError: raise unittest.SkipTest("S3FeedStorage requires boto") aws_credentials = {'AWS_ACCESS_KEY_ID': 'settings_key', 'AWS_SECRET_ACCESS_KEY': 'settings_secret'} crawler = get_crawler(settings_dict=aws_credentials) # Instantiate with crawler storage = S3FeedStorage.from_crawler(crawler, 's3://mybucket/export.csv') self.assertEqual(storage.access_key, 'settings_key') self.assertEqual(storage.secret_key, 'settings_secret') # Instantiate directly storage = S3FeedStorage('s3://mybucket/export.csv', aws_credentials['AWS_ACCESS_KEY_ID'], aws_credentials['AWS_SECRET_ACCESS_KEY']) self.assertEqual(storage.access_key, 'settings_key') self.assertEqual(storage.secret_key, 'settings_secret') # URI priority > settings priority storage = S3FeedStorage('s3://uri_key:uri_secret@mybucket/export.csv', aws_credentials['AWS_ACCESS_KEY_ID'], aws_credentials['AWS_SECRET_ACCESS_KEY']) self.assertEqual(storage.access_key, 'uri_key') self.assertEqual(storage.secret_key, 'uri_secret') # Backwards compatibility for initialising without settings with warnings.catch_warnings(record=True) as w: storage = S3FeedStorage('s3://mybucket/export.csv') self.assertEqual(storage.access_key, 'conf_key') self.assertEqual(storage.secret_key, 'conf_secret') self.assertTrue('without AWS keys' in str(w[-1].message))
def test_init_without_acl(self): storage = S3FeedStorage( 's3://mybucket/export.csv', 'access_key', 'secret_key' ) self.assertEqual(storage.access_key, 'access_key') self.assertEqual(storage.secret_key, 'secret_key') self.assertEqual(storage.acl, None)
def test_from_crawler_without_acl(self): settings = { 'AWS_ACCESS_KEY_ID': 'access_key', 'AWS_SECRET_ACCESS_KEY': 'secret_key', } crawler = get_crawler(settings_dict=settings) storage = S3FeedStorage.from_crawler(crawler, 's3://mybucket/export.csv') self.assertEqual(storage.access_key, 'access_key') self.assertEqual(storage.secret_key, 'secret_key') self.assertEqual(storage.acl, None)
def test_from_crawler_without_acl(self): settings = { 'AWS_ACCESS_KEY_ID': 'access_key', 'AWS_SECRET_ACCESS_KEY': 'secret_key', } crawler = get_crawler(settings_dict=settings) storage = S3FeedStorage.from_crawler( crawler, 's3://mybucket/export.csv' ) self.assertEqual(storage.access_key, 'access_key') self.assertEqual(storage.secret_key, 'secret_key') self.assertEqual(storage.acl, None)
def spider_opened(self, spider): self.stats.set_value(self._namespace('backend'), 'rawS3') # spider finish time only available when `spider_closed` # uri used here only as a filler to fulfil feed storage contract self.storage = S3FeedStorage( uri=f's3://{ITEMS_BUCKET}', access_key=spider.settings['AWS_ACCESS_KEY_ID'], secret_key=spider.settings['AWS_SECRET_ACCESS_KEY'], ) self.raw_content = self.storage.open(spider) self.exporter = JsonLinesItemExporter(self.raw_content) self.exporter.start_exporting()
def test_store(self): assert_aws_environ() uri = os.environ.get('S3_TEST_FILE_URI') if not uri: raise unittest.SkipTest("No S3 URI available for testing") storage = S3FeedStorage(uri) verifyObject(IFeedStorage, storage) file = storage.open(scrapy.Spider("default")) expected_content = b"content: \xe2\x98\x83" file.write(expected_content) yield storage.store(file) u = urlparse(uri) content = get_s3_content_and_delete(u.hostname, u.path[1:]) self.assertEqual(content, expected_content)
def test_store(self): assert_aws_environ() uri = os.environ.get('FEEDTEST_S3_URI') if not uri: raise unittest.SkipTest("No S3 URI available for testing") from boto import connect_s3 storage = S3FeedStorage(uri) verifyObject(IFeedStorage, storage) file = storage.open(scrapy.Spider("default")) file.write("content") yield storage.store(file) u = urlparse(uri) key = connect_s3().get_bucket(u.hostname, validate=False).get_key(u.path) self.assertEqual(key.get_contents_as_string(), "content")
def test_store_botocore_with_acl(self): try: import botocore # noqa: F401 except ImportError: raise unittest.SkipTest('botocore is required') storage = S3FeedStorage('s3://mybucket/export.csv', 'access_key', 'secret_key', 'custom-acl') self.assertEqual(storage.access_key, 'access_key') self.assertEqual(storage.secret_key, 'secret_key') self.assertEqual(storage.acl, 'custom-acl') storage.s3_client = mock.MagicMock() yield storage.store(BytesIO(b'test file')) self.assertEqual(storage.s3_client.put_object.call_args[1].get('ACL'), 'custom-acl')
def test_store_not_botocore_with_acl(self): storage = S3FeedStorage('s3://mybucket/export.csv', 'access_key', 'secret_key', 'custom-acl') self.assertEqual(storage.access_key, 'access_key') self.assertEqual(storage.secret_key, 'secret_key') self.assertEqual(storage.acl, 'custom-acl') storage.is_botocore = False storage.connect_s3 = mock.MagicMock() self.assertFalse(storage.is_botocore) yield storage.store(BytesIO(b'test file')) conn = storage.connect_s3(*storage.connect_s3.call_args) bucket = conn.get_bucket(*conn.get_bucket.call_args) key = bucket.new_key(*bucket.new_key.call_args) self.assertIn(dict(policy='custom-acl'), key.set_contents_from_file.call_args)