Esempio n. 1
0
def upload_file_on_s3(file_path, file, params=None):
    '''Upload a file on S3 using boto3

    Args:
        file_path: string
            file path on S3 (key)
        file: bytes | BytesIO | File
            file to upload
        params: None | dict
            parameters to path to boto3.upload_fileobj() as ExtraArgs
    '''
    s3 = get_s3_resource()
    obj = s3.Object(settings.AWS_STORAGE_BUCKET_NAME, file_path)
    if isinstance(file, bytes):
        file = BytesIO(file)
    if params is not None:
        extra_args = params
    else:
        extra_args = {
            'Metadata': {
                'sha256': hashlib.sha256(file.read()).hexdigest()
            },
            "CacheControl":
            f"max-age={settings.STORAGE_ASSETS_CACHE_SECONDS}, public"
        }
    obj.upload_fileobj(file, ExtraArgs=extra_args)
Esempio n. 2
0
 def assertS3ObjectNotExists(self, path):  # pylint: disable=invalid-name
     s3 = get_s3_resource()
     with self.assertRaises(
             botocore.exceptions.ClientError) as exception_context:
         s3.Object(settings.AWS_STORAGE_BUCKET_NAME, path).load()
     error = exception_context.exception
     self.assertEqual(error.response['Error']['Code'], "404")
Esempio n. 3
0
 def clean(self):
     self.print_warning("Deleting all assets with prefix %s on S3...",
                        PREFIX)
     s3 = get_s3_resource()
     obj_iter = s3.Bucket(
         settings.AWS_STORAGE_BUCKET_NAME).objects.filter(Prefix=PREFIX)
     for obj in obj_iter:
         obj.delete()
     self.print_success('Done')
Esempio n. 4
0
    def assertS3ObjectExists(self, path):  # pylint: disable=invalid-name
        s3 = get_s3_resource()

        try:
            s3.Object(settings.AWS_STORAGE_BUCKET_NAME, path).load()
        except botocore.exceptions.ClientError as error:
            if error.response['Error']['Code'] == "404":
                # Object Was Not Found
                self.fail("the object was not found at the expected location")
            self.fail(f"object lookup failed for unexpected reason: {error}")
Esempio n. 5
0
 def _create_file_on_s3(self, file_path, file):
     s3 = get_s3_resource()
     obj = s3.Object(settings.AWS_STORAGE_BUCKET_NAME, file_path)
     obj.upload_fileobj(
         file,
         ExtraArgs={
             'Metadata': {
                 'sha256': hashlib.sha256(file.read()).hexdigest()
             },
             "CacheControl":
             f"max-age={settings.STORAGE_ASSETS_CACHE_SECONDS}, public"
         })
Esempio n. 6
0
def mock_s3_bucket():
    '''Mock an S3 bucket

    This functions check if a S3 bucket exists and create it if not. This
    can be used to mock the bucket for unittest.
    '''
    start = time.time()
    s3 = get_s3_resource()
    try:
        s3.meta.client.head_bucket(Bucket=settings.AWS_STORAGE_BUCKET_NAME)
    except botocore.exceptions.ClientError as error:
        # If a client error is thrown, then check that it was a 404 error.
        # If it was a 404 error, then the bucket does not exist.
        error_code = error.response['Error']['Code']
        if error_code == '404':
            # We need to create the bucket since this is all in Moto's 'virtual' AWS account
            s3.create_bucket(Bucket=settings.AWS_STORAGE_BUCKET_NAME,
                             CreateBucketConfiguration={
                                 'LocationConstraint':
                                 settings.AWS_S3_REGION_NAME
                             })
            logger.debug('Mock S3 bucket created in %fs', time.time() - start)
    logger.debug('Mock S3 bucket in %fs', time.time() - start)
Esempio n. 7
0
    def upload(self):
        number_of_assets = (self.options['collections'] *
                            self.options['items'] * self.options['assets'])
        self.print_warning("Uploading %s assets on S3...", number_of_assets)
        self.print('-' * 100, level=2)
        s3 = get_s3_resource()
        for collection_id in map(lambda i: f'{PREFIX}collection-{i}',
                                 range(1, self.options['collections'] + 1)):
            for item_id in map(lambda i: f'{PREFIX}item-{i}',
                               range(1, self.options['items'] + 1)):
                for asset_id in map(lambda i: f'{PREFIX}asset-{i}',
                                    range(1, self.options['assets'] + 1)):
                    if self.options['assets'] == 1:
                        media_extension = '.txt'
                    else:
                        media_extension = random.choice(
                            random.choice(MEDIA_TYPES)[2])

                    file = f'{collection_id}/{item_id}/{asset_id}{media_extension}'
                    obj = s3.Object(settings.AWS_STORAGE_BUCKET_NAME, file)
                    content = f'Dummy Asset data: {uuid.uuid4()}'.encode()
                    filelike = BytesIO(content)
                    obj.upload_fileobj(
                        filelike,
                        ExtraArgs={
                            'Metadata': {
                                'sha256': hashlib.sha256(content).hexdigest()
                            },
                            "CacheControl":
                            f"max-age={settings.STORAGE_ASSETS_CACHE_SECONDS}, public"
                        })
                    self.print('%s,%s',
                               file,
                               get_sha256_multihash(content),
                               level=2)
        self.print('-' * 100, level=2)
        self.print_success('Done')
Esempio n. 8
0
def validate_asset_file(href, original_name, attrs):
    '''Validate Asset file

    Validate the Asset file located at href. The file must exist and match the multihash. The file
    hash is retrieved by doing a HTTP HEAD request at href.

    Args:
        href: string
            Asset file href to validate
        original_name: string
            Asset original name in case of renaming
        expected_multihash: string (optional)
            Asset file expected multihash (must be a sha2-256 multihash !)

    Raises:
        rest_framework.exceptions.serializers.ValidationError:
            in case of invalid Asset (asset doesn't exist or hash doesn't match)
        rest_framework.exceptions.APIException:
            in case of other networking errors
    '''
    logger.debug('Validate asset file at %s with attrs %s', href, attrs)

    asset_path = get_asset_path(attrs['item'], original_name)
    try:
        s3 = get_s3_resource()
        obj = s3.Object(settings.AWS_STORAGE_BUCKET_NAME, asset_path)
        obj.load()
        logger.debug('S3 obj %s etag=%s, metadata=%s', asset_path, obj.e_tag,
                     obj.metadata)
    except botocore.exceptions.ClientError as error:
        logger.error('Failed to retrieve S3 object %s metadata: %s',
                     asset_path, error)
        if error.response.get('Error', {}).get('Code', None) == '404':
            logger.error('Asset at href %s doesn\'t exists', href)
            raise serializers.ValidationError(
                {'href': _(f"Asset doesn't exists at href {href}")}) from error
        raise APIException({'href': _("Error while checking href existence")
                            }) from error

    # Get the hash from response
    asset_multihash = None
    asset_sha256 = obj.metadata.get('sha256', None)
    logger.debug('Asset file %s checksums from headers: sha256=%s', href,
                 asset_sha256)
    if asset_sha256:
        asset_multihash = create_multihash(asset_sha256, 'sha2-256')

    if asset_multihash is None:
        logger.error(
            "Asset at href %s doesn't provide a mandatory checksum header "
            "(x-amz-meta-sha256) for validation", href)
        raise serializers.ValidationError({
            'href':
            _(f"Asset at href {href} doesn't provide a mandatory checksum header "
              "(x-amz-meta-sha256) for validation")
        }) from None

    expected_multihash = attrs.get('checksum_multihash', None)
    if expected_multihash is None:
        # checksum_multihash attribute not found in attributes, therefore set it with the multihash
        # created from the HEAD Header and terminates the validation
        attrs['checksum_multihash'] = create_multihash_string(
            asset_multihash.digest, asset_multihash.code)
        return attrs

    # When a checksum_multihash is found in attributes then make sure that it match the checksum of
    # found in the HEAD header.

    _validate_asset_file_checksum(href, expected_multihash, asset_multihash)

    return attrs