def test_persist(self):
     assert_aws_environ()
     uri = os.environ.get('S3_TEST_FILE_URI')
     if not uri:
         raise unittest.SkipTest("No S3 URI available for testing")
     data = b"TestS3FilesStore: \xe2\x98\x83"
     buf = BytesIO(data)
     meta = {'foo': 'bar'}
     path = ''
     store = S3FilesStore(uri)
     yield store.persist_file(
         path, buf, info=None, meta=meta,
         headers={'Content-Type': 'image/png'})
     s = yield store.stat_file(path, info=None)
     self.assertIn('last_modified', s)
     self.assertIn('checksum', s)
     self.assertEqual(s['checksum'], '3187896a9657a28163abb31667df64c8')
     u = urlparse(uri)
     content, key = get_s3_content_and_delete(
         u.hostname, u.path[1:], with_key=True)
     self.assertEqual(content, data)
     if is_botocore():
         self.assertEqual(key['Metadata'], {'foo': 'bar'})
         self.assertEqual(
             key['CacheControl'], S3FilesStore.HEADERS['Cache-Control'])
         self.assertEqual(key['ContentType'], 'image/png')
     else:
         self.assertEqual(key.metadata, {'foo': 'bar'})
         self.assertEqual(
             key.cache_control, S3FilesStore.HEADERS['Cache-Control'])
         self.assertEqual(key.content_type, 'image/png')
Example #2
0
 def __init__(self, uri, access_key=None, secret_key=None):
     # BEGIN Backwards compatibility for initialising without keys (and
     # without using from_crawler)
     no_defaults = access_key is None and secret_key is None
     if no_defaults:
         from scrapy.conf import settings
         if 'AWS_ACCESS_KEY_ID' in settings or 'AWS_SECRET_ACCESS_KEY' in settings:
             import warnings
             from scrapy.exceptions import ScrapyDeprecationWarning
             warnings.warn(
                 "Initialising `scrapy.extensions.feedexport.S3FeedStorage` "
                 "without AWS keys is deprecated. Please supply credentials or "
                 "use the `from_crawler()` constructor.",
                 category=ScrapyDeprecationWarning,
                 stacklevel=2
             )
             access_key = settings['AWS_ACCESS_KEY_ID']
             secret_key = settings['AWS_SECRET_ACCESS_KEY']
     # END Backwards compatibility
     u = urlparse(uri)
     self.bucketname = u.hostname
     self.access_key = u.username or access_key
     self.secret_key = u.password or secret_key
     self.is_botocore = is_botocore()
     self.keyname = u.path[1:]  # remove first "/"
     if self.is_botocore:
         import botocore.session
         session = botocore.session.get_session()
         self.s3_client = session.create_client(
             's3', aws_access_key_id=self.access_key,
             aws_secret_access_key=self.secret_key)
     else:
         import boto
         self.connect_s3 = boto.connect_s3
Example #3
0
 def test_persist(self):
     assert_aws_environ()
     uri = os.environ.get("S3_TEST_FILE_URI")
     if not uri:
         raise unittest.SkipTest("No S3 URI available for testing")
     data = b"TestS3FilesStore: \xe2\x98\x83"
     buf = BytesIO(data)
     meta = {"foo": "bar"}
     path = ""
     store = S3FilesStore(uri)
     yield store.persist_file(path, buf, info=None, meta=meta, headers={"Content-Type": "image/png"})
     s = yield store.stat_file(path, info=None)
     self.assertIn("last_modified", s)
     self.assertIn("checksum", s)
     self.assertEqual(s["checksum"], "3187896a9657a28163abb31667df64c8")
     u = urlparse(uri)
     content, key = get_s3_content_and_delete(u.hostname, u.path[1:], with_key=True)
     self.assertEqual(content, data)
     if is_botocore():
         self.assertEqual(key["Metadata"], {"foo": "bar"})
         self.assertEqual(key["CacheControl"], S3FilesStore.HEADERS["Cache-Control"])
         self.assertEqual(key["ContentType"], "image/png")
     else:
         self.assertEqual(key.metadata, {"foo": "bar"})
         self.assertEqual(key.cache_control, S3FilesStore.HEADERS["Cache-Control"])
         self.assertEqual(key.content_type, "image/png")
Example #4
0
 def __init__(self, uri):
     self.is_botocore = is_botocore()
     if self.is_botocore:
         import botocore.session
         session = botocore.session.get_session()
         self.s3_client = session.create_client(
             's3', aws_access_key_id=self.AWS_ACCESS_KEY_ID,
             aws_secret_access_key=self.AWS_SECRET_ACCESS_KEY)
     else:
         from boto.s3.connection import S3Connection
         self.S3Connection = S3Connection
     assert uri.startswith('s3://')
     self.bucket, self.prefix = uri[5:].split('/', 1)
Example #5
0
 def __init__(self, uri):
     from scrapy.conf import settings
     u = urlparse(uri)
     self.bucketname = u.hostname
     self.access_key = u.username or settings['AWS_ACCESS_KEY_ID']
     self.secret_key = u.password or settings['AWS_SECRET_ACCESS_KEY']
     self.is_botocore = is_botocore()
     self.keyname = u.path[1:]  # remove first "/"
     if self.is_botocore:
         import botocore.session
         session = botocore.session.get_session()
         self.s3_client = session.create_client(
             's3', aws_access_key_id=self.access_key,
             aws_secret_access_key=self.secret_key)
     else:
         import boto
         self.connect_s3 = boto.connect_s3
Example #6
0
def get_s3_content_and_delete(bucket, path, with_key=False):
    """ Get content from s3 key, and delete key afterwards.
    """
    if is_botocore():
        import botocore.session
        session = botocore.session.get_session()
        client = session.create_client('s3')
        key = client.get_object(Bucket=bucket, Key=path)
        content = key['Body'].read()
        client.delete_object(Bucket=bucket, Key=path)
    else:
        import boto
        # assuming boto=2.2.2
        bucket = boto.connect_s3().get_bucket(bucket, validate=False)
        key = bucket.get_key(path)
        content = key.get_contents_as_string()
        bucket.delete_key(path)
    return (content, key) if with_key else content
Example #7
0
 def __init__(self, uri):
     self.is_botocore = is_botocore()
     if self.is_botocore:
         import botocore.session
         session = botocore.session.get_session()
         self.s3_client = session.create_client(
             's3',
             aws_access_key_id=self.AWS_ACCESS_KEY_ID,
             aws_secret_access_key=self.AWS_SECRET_ACCESS_KEY,
             endpoint_url=self.AWS_ENDPOINT_URL,
             region_name=self.AWS_REGION_NAME,
             use_ssl=self.AWS_USE_SSL,
             verify=self.AWS_VERIFY
         )
     else:
         from boto.s3.connection import S3Connection
         self.S3Connection = S3Connection
     assert uri.startswith('s3://')
     self.bucket, self.prefix = uri[5:].split('/', 1)
Example #8
0
File: s3.py Project: runt18/scrapy
    def __init__(self, settings, aws_access_key_id=None, aws_secret_access_key=None, \
            httpdownloadhandler=HTTPDownloadHandler, **kw):

        if not aws_access_key_id:
            aws_access_key_id = settings['AWS_ACCESS_KEY_ID']
        if not aws_secret_access_key:
            aws_secret_access_key = settings['AWS_SECRET_ACCESS_KEY']

        # If no credentials could be found anywhere,
        # consider this an anonymous connection request by default;
        # unless 'anon' was set explicitly (True/False).
        anon = kw.get('anon')
        if anon is None and not aws_access_key_id and not aws_secret_access_key:
            kw['anon'] = True
        self.anon = kw.get('anon')

        self._signer = None
        if is_botocore():
            import botocore.auth
            import botocore.credentials
            kw.pop('anon', None)
            if kw:
                raise TypeError('Unexpected keyword arguments: {0!s}'.format(kw))
            if not self.anon:
                SignerCls = botocore.auth.AUTH_TYPE_MAPS['s3']
                self._signer = SignerCls(botocore.credentials.Credentials(
                    aws_access_key_id, aws_secret_access_key))
        else:
            _S3Connection = _get_boto_connection()
            try:
                self.conn = _S3Connection(
                    aws_access_key_id, aws_secret_access_key, **kw)
            except Exception as ex:
                raise NotConfigured(str(ex))

        self._download_http = httpdownloadhandler(settings).download_request
Example #9
0
def skip_if_no_boto():
    try:
        is_botocore()
    except NotConfigured as e:
        raise SkipTest(e)