Example #1
0
 def test_parse_credentials(self):
     try:
         import boto  # noqa: F401
     except ImportError:
         raise unittest.SkipTest("S3FeedStorage requires boto")
     aws_credentials = {'AWS_ACCESS_KEY_ID': 'settings_key',
                        'AWS_SECRET_ACCESS_KEY': 'settings_secret'}
     crawler = get_crawler(settings_dict=aws_credentials)
     # Instantiate with crawler
     storage = S3FeedStorage.from_crawler(crawler,
                                          's3://mybucket/export.csv')
     self.assertEqual(storage.access_key, 'settings_key')
     self.assertEqual(storage.secret_key, 'settings_secret')
     # Instantiate directly
     storage = S3FeedStorage('s3://mybucket/export.csv',
                             aws_credentials['AWS_ACCESS_KEY_ID'],
                             aws_credentials['AWS_SECRET_ACCESS_KEY'])
     self.assertEqual(storage.access_key, 'settings_key')
     self.assertEqual(storage.secret_key, 'settings_secret')
     # URI priority > settings priority
     storage = S3FeedStorage('s3://uri_key:uri_secret@mybucket/export.csv',
                             aws_credentials['AWS_ACCESS_KEY_ID'],
                             aws_credentials['AWS_SECRET_ACCESS_KEY'])
     self.assertEqual(storage.access_key, 'uri_key')
     self.assertEqual(storage.secret_key, 'uri_secret')
     # Backward compatibility for initialising without settings
     with warnings.catch_warnings(record=True) as w:
         storage = S3FeedStorage('s3://mybucket/export.csv')
         self.assertEqual(storage.access_key, 'conf_key')
         self.assertEqual(storage.secret_key, 'conf_secret')
         self.assertTrue('without AWS keys' in str(w[-1].message))
Example #2
0
 def test_parse_credentials(self):
     try:
         import boto
     except ImportError:
         raise unittest.SkipTest("S3FeedStorage requires boto")
     aws_credentials = {'AWS_ACCESS_KEY_ID': 'settings_key',
                        'AWS_SECRET_ACCESS_KEY': 'settings_secret'}
     crawler = get_crawler(settings_dict=aws_credentials)
     # Instantiate with crawler
     storage = S3FeedStorage.from_crawler(crawler,
                                          's3://mybucket/export.csv')
     self.assertEqual(storage.access_key, 'settings_key')
     self.assertEqual(storage.secret_key, 'settings_secret')
     # Instantiate directly
     storage = S3FeedStorage('s3://mybucket/export.csv',
                             aws_credentials['AWS_ACCESS_KEY_ID'],
                             aws_credentials['AWS_SECRET_ACCESS_KEY'])
     self.assertEqual(storage.access_key, 'settings_key')
     self.assertEqual(storage.secret_key, 'settings_secret')
     # URI priority > settings priority
     storage = S3FeedStorage('s3://uri_key:uri_secret@mybucket/export.csv',
                             aws_credentials['AWS_ACCESS_KEY_ID'],
                             aws_credentials['AWS_SECRET_ACCESS_KEY'])
     self.assertEqual(storage.access_key, 'uri_key')
     self.assertEqual(storage.secret_key, 'uri_secret')
     # Backwards compatibility for initialising without settings
     with warnings.catch_warnings(record=True) as w:
         storage = S3FeedStorage('s3://mybucket/export.csv')
         self.assertEqual(storage.access_key, 'conf_key')
         self.assertEqual(storage.secret_key, 'conf_secret')
         self.assertTrue('without AWS keys' in str(w[-1].message))
Example #3
0
 def test_init_without_acl(self):
     storage = S3FeedStorage(
         's3://mybucket/export.csv',
         'access_key',
         'secret_key'
     )
     self.assertEqual(storage.access_key, 'access_key')
     self.assertEqual(storage.secret_key, 'secret_key')
     self.assertEqual(storage.acl, None)
Example #4
0
 def test_from_crawler_without_acl(self):
     settings = {
         'AWS_ACCESS_KEY_ID': 'access_key',
         'AWS_SECRET_ACCESS_KEY': 'secret_key',
     }
     crawler = get_crawler(settings_dict=settings)
     storage = S3FeedStorage.from_crawler(crawler,
                                          's3://mybucket/export.csv')
     self.assertEqual(storage.access_key, 'access_key')
     self.assertEqual(storage.secret_key, 'secret_key')
     self.assertEqual(storage.acl, None)
Example #5
0
 def test_from_crawler_without_acl(self):
     settings = {
         'AWS_ACCESS_KEY_ID': 'access_key',
         'AWS_SECRET_ACCESS_KEY': 'secret_key',
     }
     crawler = get_crawler(settings_dict=settings)
     storage = S3FeedStorage.from_crawler(
         crawler,
         's3://mybucket/export.csv'
     )
     self.assertEqual(storage.access_key, 'access_key')
     self.assertEqual(storage.secret_key, 'secret_key')
     self.assertEqual(storage.acl, None)
Example #6
0
    def spider_opened(self, spider):
        self.stats.set_value(self._namespace('backend'), 'rawS3')
        # spider finish time only available when `spider_closed`
        # uri used here only as a filler to fulfil feed storage contract
        self.storage = S3FeedStorage(
            uri=f's3://{ITEMS_BUCKET}',
            access_key=spider.settings['AWS_ACCESS_KEY_ID'],
            secret_key=spider.settings['AWS_SECRET_ACCESS_KEY'],
        )

        self.raw_content = self.storage.open(spider)
        self.exporter = JsonLinesItemExporter(self.raw_content)
        self.exporter.start_exporting()
Example #7
0
 def test_store(self):
     assert_aws_environ()
     uri = os.environ.get('S3_TEST_FILE_URI')
     if not uri:
         raise unittest.SkipTest("No S3 URI available for testing")
     storage = S3FeedStorage(uri)
     verifyObject(IFeedStorage, storage)
     file = storage.open(scrapy.Spider("default"))
     expected_content = b"content: \xe2\x98\x83"
     file.write(expected_content)
     yield storage.store(file)
     u = urlparse(uri)
     content = get_s3_content_and_delete(u.hostname, u.path[1:])
     self.assertEqual(content, expected_content)
Example #8
0
 def test_store(self):
     assert_aws_environ()
     uri = os.environ.get('FEEDTEST_S3_URI')
     if not uri:
         raise unittest.SkipTest("No S3 URI available for testing")
     from boto import connect_s3
     storage = S3FeedStorage(uri)
     verifyObject(IFeedStorage, storage)
     file = storage.open(scrapy.Spider("default"))
     file.write("content")
     yield storage.store(file)
     u = urlparse(uri)
     key = connect_s3().get_bucket(u.hostname,
                                   validate=False).get_key(u.path)
     self.assertEqual(key.get_contents_as_string(), "content")
Example #9
0
    def test_store_botocore_with_acl(self):
        try:
            import botocore  # noqa: F401
        except ImportError:
            raise unittest.SkipTest('botocore is required')

        storage = S3FeedStorage('s3://mybucket/export.csv', 'access_key',
                                'secret_key', 'custom-acl')
        self.assertEqual(storage.access_key, 'access_key')
        self.assertEqual(storage.secret_key, 'secret_key')
        self.assertEqual(storage.acl, 'custom-acl')

        storage.s3_client = mock.MagicMock()
        yield storage.store(BytesIO(b'test file'))
        self.assertEqual(storage.s3_client.put_object.call_args[1].get('ACL'),
                         'custom-acl')
Example #10
0
    def test_store_not_botocore_with_acl(self):
        storage = S3FeedStorage('s3://mybucket/export.csv', 'access_key',
                                'secret_key', 'custom-acl')
        self.assertEqual(storage.access_key, 'access_key')
        self.assertEqual(storage.secret_key, 'secret_key')
        self.assertEqual(storage.acl, 'custom-acl')

        storage.is_botocore = False
        storage.connect_s3 = mock.MagicMock()
        self.assertFalse(storage.is_botocore)

        yield storage.store(BytesIO(b'test file'))

        conn = storage.connect_s3(*storage.connect_s3.call_args)
        bucket = conn.get_bucket(*conn.get_bucket.call_args)
        key = bucket.new_key(*bucket.new_key.call_args)
        self.assertIn(dict(policy='custom-acl'),
                      key.set_contents_from_file.call_args)