Ejemplo n.º 1
0
    def write(self, b):
        """
        Write the given bytes (binary string) into the S3 file from constructor.

        Note there's buffering happening under the covers, so this may not actually
        do any HTTP transfer right away.

        """
        if isinstance(b, six.text_type):
            # not part of API: also accept unicode => encode it as utf8
            b = b.encode('utf8')

        if not isinstance(b, six.binary_type):
            raise TypeError("input must be a binary string")

        self.lines.append(b)
        self.chunk_bytes += len(b)
        self.total_size += len(b)

        if self.chunk_bytes >= self.min_part_size:
            buff = b"".join(self.lines)
            logger.info("uploading part #%i, %i bytes (total %.3fGB)" %
                        (self.parts, len(buff), self.total_size / 1024.0**3))
            self.mp.upload_part_from_file(BytesIO(buff),
                                          part_num=self.parts + 1)
            logger.debug("upload of part #%i finished" % self.parts)
            self.parts += 1
            self.lines, self.chunk_bytes = [], 0
Ejemplo n.º 2
0
    def close(self):
        buff = b"".join(self.lines)
        if buff:
            logger.info("uploading last part #%i, %i bytes (total %.3fGB)" % (self.parts, len(buff), self.total_size / 1024.0 ** 3))
            self.mp.upload_part_from_file(BytesIO(buff), part_num=self.parts + 1)
            logger.debug("upload of last part #%i finished" % self.parts)

        if self.total_size:
            self.mp.complete_upload()
        else:
            # AWS complains with "The XML you provided was not well-formed or did not validate against our published schema"
            # when the input is completely empty => abort the upload, no file created
            # TODO: or create the empty file some other way?
            logger.info("empty input, ignoring multipart upload")
            self.outbucket.cancel_multipart_upload(self.mp.key_name, self.mp.id)
Ejemplo n.º 3
0
    async def _complete_multipart_upload(self, path, session_upload_id,
                                         parts_metadata):
        """This operation completes a multipart upload by assembling previously uploaded parts.

        Docs: https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadComplete.html
        """

        payload = ''.join([
            '<?xml version="1.0" encoding="UTF-8"?><CompleteMultipartUpload>',
            ''.join([
                '<Part><PartNumber>{}</PartNumber><ETag>{}</ETag></Part>'.
                format(i + 1, xml.sax.saxutils.escape(part['ETAG']))
                for i, part in enumerate(parts_metadata)
            ]),
            '</CompleteMultipartUpload>',
        ]).encode('utf-8')
        headers = {
            'Content-Length': str(len(payload)),
            'Content-MD5': compute_md5(BytesIO(payload))[1],
            'Content-Type': 'text/xml',
        }
        params = {'uploadId': session_upload_id}
        complete_url = functools.partial(self.bucket.new_key(
            path.path).generate_url,
                                         settings.TEMP_URL_SECS,
                                         'POST',
                                         query_parameters=params,
                                         headers=headers)

        resp = await self.make_request(
            'POST',
            complete_url,
            data=payload,
            headers=headers,
            params=params,
            expects=(
                200,
                201,
            ),
            throws=exceptions.UploadError,
        )
        await resp.release()
Ejemplo n.º 4
0
    async def _delete_folder(self, path, **kwargs):
        """Query for recursive contents of folder and delete in batches of 1000

        Called from: func: delete if not path.is_file

        Calls: func: self._check_region
               func: self.make_request
               func: self.bucket.generate_url

        :param *ProviderPath path: Path to be deleted

        On S3, folders are not first-class objects, but are instead inferred
        from the names of their children.  A regular DELETE request issued
        against a folder will not work unless that folder is completely empty.
        To fully delete an occupied folder, we must delete all of the comprising
        objects.  Amazon provides a bulk delete operation to simplify this.
        """
        await self._check_region()

        more_to_come = True
        content_keys = []
        query_params = {'prefix': path.path}
        marker = None

        while more_to_come:
            if marker is not None:
                query_params['marker'] = marker

            resp = await self.make_request(
                'GET',
                self.bucket.generate_url(settings.TEMP_URL_SECS,
                                         'GET',
                                         query_parameters=query_params),
                params=query_params,
                expects=(200, ),
                throws=exceptions.MetadataError,
            )

            contents = await resp.read()
            parsed = xmltodict.parse(
                contents, strip_whitespace=False)['ListBucketResult']
            more_to_come = parsed.get('IsTruncated') == 'true'
            contents = parsed.get('Contents', [])

            if isinstance(contents, dict):
                contents = [contents]

            content_keys.extend([content['Key'] for content in contents])
            if len(content_keys) > 0:
                marker = content_keys[-1]

        # Query against non-existant folder does not return 404
        if len(content_keys) == 0:
            raise exceptions.NotFoundError(str(path))

        while len(content_keys) > 0:
            key_batch = content_keys[:1000]
            del content_keys[:1000]

            payload = '<?xml version="1.0" encoding="UTF-8"?>'
            payload += '<Delete>'
            payload += ''.join(
                map(
                    lambda x: '<Object><Key>{}</Key></Object>'.format(
                        xml.sax.saxutils.escape(x)), key_batch))
            payload += '</Delete>'
            payload = payload.encode('utf-8')
            md5 = compute_md5(BytesIO(payload))

            query_params = {'delete': ''}
            headers = {
                'Content-Length': str(len(payload)),
                'Content-MD5': md5[1],
                'Content-Type': 'text/xml',
            }

            # We depend on a customized version of boto that can make query parameters part of
            # the signature.
            url = functools.partial(
                self.bucket.generate_url,
                settings.TEMP_URL_SECS,
                'POST',
                query_parameters=query_params,
                headers=headers,
            )
            resp = await self.make_request(
                'POST',
                url,
                params=query_params,
                data=payload,
                headers=headers,
                expects=(
                    200,
                    204,
                ),
                throws=exceptions.DeleteError,
            )
            await resp.release()
Ejemplo n.º 5
0
 def test_compute_hash_bytesio(self):
     # Compute a hash from a file-like BytesIO object.
     f = BytesIO(self._gen_data())
     compute_hashes_from_fileobj(f, chunk_size=512)