Python read_in_chunks 예제들, libcloud.utils.files.read_in_chunks Python 예제들

예제 #1

0

파일 보기

    def upload_object(self,
                      file_path,
                      container,
                      object_name,
                      extra=None,
                      verify_hash=True,
                      headers=None):
        """
        Upload an object.

        Note: This will override file with a same name if it already exists.
        """
        # Note: We don't use any of the base driver functions since Backblaze
        # API requires you to provide SHA1 has upfront and the base methods
        # don't support that

        with open(file_path, 'rb') as fp:
            iterator = iter(fp)
            iterator = read_in_chunks(iterator=iterator)
            data = exhaust_iterator(iterator=iterator)

        obj = self._perform_upload(data=data,
                                   container=container,
                                   object_name=object_name,
                                   extra=extra,
                                   verify_hash=verify_hash,
                                   headers=headers)

        return obj

예제 #2

0

파일 보기

파일: atmos.py 프로젝트: xiama/automations

    def upload_object_via_stream(self, iterator, container, object_name,
                                 extra=None):
        if isinstance(iterator, file):
            iterator = iter(iterator)

        data_hash = hashlib.md5()
        generator = read_in_chunks(iterator, CHUNK_SIZE, True)
        bytes_transferred = 0
        try:
            chunk = next(generator)
        except StopIteration:
            chunk = ''

        path = self._namespace_path(container.name + '/' + object_name)

        while True:
            end = bytes_transferred + len(chunk) - 1
            data_hash.update(b(chunk))
            headers = {
                'x-emc-meta': 'md5=' + data_hash.hexdigest(),
            }
            if len(chunk) > 0:
                headers['Range'] = 'Bytes=%d-%d' % (bytes_transferred, end)
            result = self.connection.request(path, method='PUT', data=chunk,
                                             headers=headers)
            bytes_transferred += len(chunk)

            try:
                chunk = next(generator)
            except StopIteration:
                break
            if len(chunk) == 0:
                break

        data_hash = data_hash.hexdigest()

        if extra is None:
            meta_data = {}
        else:
            meta_data = extra.get('meta_data', {})
        meta_data['md5'] = data_hash
        user_meta = ', '.join([k + '=' + str(v) for k, v in
                               list(meta_data.items())])
        self.connection.request(path + '?metadata/user', method='POST',
                                headers={'x-emc-meta': user_meta})

        result = self.connection.request(path + '?metadata/system')

        meta = self._emc_meta(result)
        extra = {
            'object_id': meta['objectid'],
            'meta_data': meta_data,
        }

        return Object(object_name, bytes_transferred, data_hash, extra,
                      meta_data, container, self)

예제 #3

0

파일 보기

파일: local.py 프로젝트: yishuihanhan/libcloud

    def download_object_as_stream(self, obj, chunk_size=None):
        """
        Return a generator which yields object data.

        :param obj: Object instance
        :type obj: :class:`Object`

        :param chunk_size: Optional chunk size (in bytes).
        :type chunk_size: ``int``

        :return: A stream of binary chunks of data.
        :rtype: ``object``
        """
        path = self.get_object_cdn_url(obj)
        with open(path, 'rb') as obj_file:
            for data in read_in_chunks(obj_file, chunk_size=chunk_size):
                yield data

예제 #4

0

파일 보기

파일: local.py 프로젝트: miseyu/libcloud

    def download_object_as_stream(self, obj, chunk_size=None):
        """
        Return a generator which yields object data.

        :param obj: Object instance
        :type obj: :class:`Object`

        :param chunk_size: Optional chunk size (in bytes).
        :type chunk_size: ``int``

        :return: A stream of binary chunks of data.
        :rtype: ``object``
        """
        path = self.get_object_cdn_url(obj)
        with open(path, "rb") as obj_file:
            for data in read_in_chunks(obj_file, chunk_size=chunk_size):
                yield data

예제 #5

0

파일 보기

파일: local.py 프로젝트: schevalier/conpaas

    def download_object_as_stream(self, obj, chunk_size=None):
        """
        Return a generator which yields object data.

        @param obj: Object instance
        @type obj: L{Object}

        @param chunk_size: Optional chunk size (in bytes).
        @type chunk_size: C{int}

        @rtype: C{object}
        """

        path = self.get_object_cdn_url(obj)

        with open(path) as obj_file:
            for data in read_in_chunks(obj_file, chunk_size=chunk_size):
                yield data

예제 #6

0

파일 보기

파일: local.py 프로젝트: ConPaaS-team/conpaas

    def download_object_as_stream(self, obj, chunk_size=None):
        """
        Return a generator which yields object data.

        @param obj: Object instance
        @type obj: L{Object}

        @param chunk_size: Optional chunk size (in bytes).
        @type chunk_size: C{int}

        @rtype: C{object}
        """

        path = self.get_object_cdn_url(obj)

        with open(path) as obj_file:
            for data in read_in_chunks(obj_file, chunk_size=chunk_size):
                yield data

예제 #7

0

파일 보기

파일: backblaze_b2.py 프로젝트: SecurityCompass/libcloud

    def upload_object_via_stream(self, iterator, container, object_name,
                                 extra=None, headers=None):
        """
        Upload an object.

        Note: Backblaze does not yet support uploading via stream,
        so this calls upload_object internally requiring the object data
        to be loaded into memory at once
        """

        iterator = read_in_chunks(iterator=iterator)
        data = exhaust_iterator(iterator=iterator)

        obj = self._perform_upload(data=data, container=container,
                                   object_name=object_name,
                                   extra=extra,
                                   headers=headers)

        return obj

예제 #8

0

파일 보기

파일: backblaze_b2.py 프로젝트: zzzz123321/libcloud

    def upload_object_via_stream(self, iterator, container, object_name,
                                 extra=None, headers=None):
        """
        Upload an object.

        Note: Backblaze does not yet support uploading via stream,
        so this calls upload_object internally requiring the object data
        to be loaded into memory at once
        """

        iterator = read_in_chunks(iterator=iterator)
        data = exhaust_iterator(iterator=iterator)

        obj = self._perform_upload(data=data, container=container,
                                   object_name=object_name,
                                   extra=extra,
                                   headers=headers)

        return obj

예제 #9

0

파일 보기

파일: backblaze_b2.py 프로젝트: SecurityCompass/libcloud

    def upload_object(self, file_path, container, object_name, extra=None,
                      verify_hash=True, headers=None):
        """
        Upload an object.

        Note: This will override file with a same name if it already exists.
        """
        # Note: We don't use any of the base driver functions since Backblaze
        # API requires you to provide SHA1 has upfront and the base methods
        # don't support that

        with open(file_path, 'rb') as fp:
            iterator = iter(fp)
            iterator = read_in_chunks(iterator=iterator)
            data = exhaust_iterator(iterator=iterator)

        obj = self._perform_upload(data=data, container=container,
                                   object_name=object_name,
                                   extra=extra,
                                   verify_hash=verify_hash,
                                   headers=headers)

        return obj

예제 #10

0

파일 보기

    def _upload_in_chunks(self,
                          response,
                          data,
                          iterator,
                          object_path,
                          blob_type,
                          lease,
                          calculate_hash=True):
        """
        Uploads data from an interator in fixed sized chunks to S3

        :param response: Response object from the initial POST request
        :type response: :class:`RawResponse`

        :param data: Any data from the initial POST request
        :type data: ``str``

        :param iterator: The generator for fetching the upload data
        :type iterator: ``generator``

        :param object_path: The path of the object to which we are uploading
        :type object_name: ``str``

        :param blob_type: The blob type being uploaded
        :type blob_type: ``str``

        :param lease: The lease object to be used for renewal
        :type lease: :class:`AzureBlobLease`

        :keyword calculate_hash: Indicates if we must calculate the data hash
        :type calculate_hash: ``bool``

        :return: A tuple of (status, checksum, bytes transferred)
        :rtype: ``tuple``
        """

        # Get the upload id from the response xml
        if response.status != httplib.CREATED:
            raise LibcloudError('Error initializing upload. Code: %d' %
                                (response.status),
                                driver=self)

        data_hash = None
        if calculate_hash:
            data_hash = self._get_hash_function()

        bytes_transferred = 0
        count = 1
        chunks = []
        headers = {}

        lease.update_headers(headers)

        if blob_type == 'BlockBlob':
            params = {'comp': 'block'}
        else:
            params = {'comp': 'page'}

        # Read the input data in chunk sizes suitable for AWS
        for data in read_in_chunks(iterator, AZURE_CHUNK_SIZE):
            data = b(data)
            content_length = len(data)
            offset = bytes_transferred
            bytes_transferred += content_length

            if calculate_hash:
                data_hash.update(data)

            chunk_hash = self._get_hash_function()
            chunk_hash.update(data)
            chunk_hash = base64.b64encode(b(chunk_hash.digest()))

            headers['Content-MD5'] = chunk_hash.decode('utf-8')
            headers['Content-Length'] = str(content_length)

            if blob_type == 'BlockBlob':
                # Block id can be any unique string that is base64 encoded
                # A 10 digit number can hold the max value of 50000 blocks
                # that are allowed for azure
                block_id = base64.b64encode(b('%10d' % (count)))
                block_id = block_id.decode('utf-8')
                params['blockid'] = block_id

                # Keep this data for a later commit
                chunks.append(block_id)
            else:
                headers['x-ms-page-write'] = 'update'
                headers['x-ms-range'] = 'bytes=%d-%d' % \
                    (offset, (bytes_transferred - 1))

            # Renew lease before updating
            lease.renew()

            resp = self.connection.request(object_path,
                                           method='PUT',
                                           data=data,
                                           headers=headers,
                                           params=params)

            if resp.status != httplib.CREATED:
                resp.parse_error()
                raise LibcloudError('Error uploading chunk %d. Code: %d' %
                                    (count, resp.status),
                                    driver=self)

            count += 1

        if calculate_hash:
            data_hash = data_hash.hexdigest()

        if blob_type == 'BlockBlob':
            self._commit_blocks(object_path, chunks, lease)

        # The Azure service does not return a hash immediately for
        # chunked uploads. It takes some time for the data to get synced
        response.headers['content-md5'] = None

        return (True, data_hash, bytes_transferred)

예제 #11

0

파일 보기

파일: s3.py 프로젝트: kevin-zhangsen/badam

    def _upload_from_iterator(self, iterator, object_path, upload_id,
                              calculate_hash=True):
        """
        Uploads data from an interator in fixed sized chunks to S3

        :param iterator: The generator for fetching the upload data
        :type iterator: ``generator``

        :param object_path: The path of the object to which we are uploading
        :type object_name: ``str``

        :param upload_id: The upload id allocated for this multipart upload
        :type upload_id: ``str``

        :keyword calculate_hash: Indicates if we must calculate the data hash
        :type calculate_hash: ``bool``

        :return: A tuple of (chunk info, checksum, bytes transferred)
        :rtype: ``tuple``
        """

        data_hash = None
        if calculate_hash:
            data_hash = self._get_hash_function()

        bytes_transferred = 0
        count = 1
        chunks = []
        params = {'uploadId': upload_id}

        # Read the input data in chunk sizes suitable for AWS
        for data in read_in_chunks(iterator, chunk_size=CHUNK_SIZE,
                                   fill_size=True, yield_empty=True):
            bytes_transferred += len(data)

            if calculate_hash:
                data_hash.update(data)

            chunk_hash = self._get_hash_function()
            chunk_hash.update(data)
            chunk_hash = base64.b64encode(chunk_hash.digest()).decode('utf-8')

            # This provides an extra level of data check and is recommended
            # by amazon
            headers = {'Content-MD5': chunk_hash}
            params['partNumber'] = count

            request_path = '?'.join((object_path, urlencode(params)))

            resp = self.connection.request(request_path, method='PUT',
                                           data=data, headers=headers)

            if resp.status != httplib.OK:
                raise LibcloudError('Error uploading chunk', driver=self)

            server_hash = resp.headers['etag']

            # Keep this data for a later commit
            chunks.append((count, server_hash))
            count += 1

        if calculate_hash:
            data_hash = data_hash.hexdigest()

        return (chunks, data_hash, bytes_transferred)

예제 #12

0

파일 보기

    def _upload_in_chunks(self, stream, object_path, lease, meta_data,
                          content_type, object_name, file_path, verify_hash,
                          headers):
        """
        Uploads data from an interator in fixed sized chunks to Azure Storage
        """

        data_hash = None
        if verify_hash:
            data_hash = self._get_hash_function()

        bytes_transferred = 0
        count = 1
        chunks = []
        headers = headers or {}

        lease.update_headers(headers)

        params = {'comp': 'block'}

        # Read the input data in chunk sizes suitable for Azure
        for data in read_in_chunks(stream, AZURE_UPLOAD_CHUNK_SIZE,
                                   fill_size=True):
            data = b(data)
            content_length = len(data)
            bytes_transferred += content_length

            if verify_hash:
                data_hash.update(data)

            chunk_hash = self._get_hash_function()
            chunk_hash.update(data)
            chunk_hash = base64.b64encode(b(chunk_hash.digest()))

            headers['Content-MD5'] = chunk_hash.decode('utf-8')
            headers['Content-Length'] = str(content_length)

            # Block id can be any unique string that is base64 encoded
            # A 10 digit number can hold the max value of 50000 blocks
            # that are allowed for azure
            block_id = base64.b64encode(b('%10d' % (count)))
            block_id = block_id.decode('utf-8')
            params['blockid'] = block_id

            # Keep this data for a later commit
            chunks.append(block_id)

            # Renew lease before updating
            lease.renew()

            resp = self.connection.request(object_path, method='PUT',
                                           data=data, headers=headers,
                                           params=params)

            if resp.status != httplib.CREATED:
                resp.parse_error()
                raise LibcloudError('Error uploading chunk %d. Code: %d' %
                                    (count, resp.status), driver=self)

            count += 1

        if verify_hash:
            data_hash = base64.b64encode(b(data_hash.digest()))
            data_hash = data_hash.decode('utf-8')

        response = self._commit_blocks(object_path=object_path,
                                       chunks=chunks,
                                       lease=lease,
                                       meta_data=meta_data,
                                       content_type=content_type,
                                       data_hash=data_hash,
                                       object_name=object_name,
                                       file_path=file_path)

        # According to the Azure docs:
        # > This header refers to the content of the request, meaning, in this
        # > case, the list of blocks, and not the content of the blob itself.
        # However, the validation code assumes that the content-md5 in the
        # server response refers to the object so we must discard the value
        response.headers['content-md5'] = None

        return {
            'response': response,
            'data_hash': data_hash,
            'bytes_transferred': bytes_transferred,
        }

예제 #13

0

파일 보기

파일: atmos.py 프로젝트: Schnitzl42/JLibcloud

    def upload_object_via_stream(self, iterator, container, object_name,
                                 extra=None):
        if isinstance(iterator, file):
            iterator = iter(iterator)

        data_hash = hashlib.md5()
        generator = read_in_chunks(iterator, CHUNK_SIZE, True)
        bytes_transferred = 0
        try:
            chunk = next(generator)
        except StopIteration:
            chunk = ''

        path = self._namespace_path(container.name + '/' + object_name)
        method = 'PUT'

        if extra is not None:
            content_type = extra.get('content_type', None)
        else:
            content_type = None
        if not content_type:
            content_type, _ = guess_file_mime_type(object_name)

            if not content_type:
                raise AttributeError(
                    'File content-type could not be guessed and' +
                    ' no content_type value provided')

        try:
            self.connection.request(path + '?metadata/system')
        except AtmosError:
            e = sys.exc_info()[1]
            if e.code != 1003:
                raise
            method = 'POST'

        while True:
            end = bytes_transferred + len(chunk) - 1
            data_hash.update(b(chunk))
            headers = {
                'x-emc-meta': 'md5=' + data_hash.hexdigest(),
                'Content-Type': content_type,
            }

            if len(chunk) > 0 and bytes_transferred > 0:
                headers['Range'] = 'Bytes=%d-%d' % (bytes_transferred, end)
                method = 'PUT'

            result = self.connection.request(path, method=method, data=chunk,
                                             headers=headers)
            bytes_transferred += len(chunk)

            try:
                chunk = next(generator)
            except StopIteration:
                break
            if len(chunk) == 0:
                break

        data_hash = data_hash.hexdigest()

        if extra is None:
            meta_data = {}
        else:
            meta_data = extra.get('meta_data', {})
        meta_data['md5'] = data_hash
        user_meta = ', '.join([k + '=' + str(v) for k, v in
                               list(meta_data.items())])
        self.connection.request(path + '?metadata/user', method='POST',
                                headers={'x-emc-meta': user_meta})

        result = self.connection.request(path + '?metadata/system')

        meta = self._emc_meta(result)
        extra = {
            'object_id': meta['objectid'],
            'meta_data': meta_data,
        }

        return Object(object_name, bytes_transferred, data_hash, extra,
                      meta_data, container, self)

예제 #14

0

파일 보기

파일: azure_blobs.py 프로젝트: ClusterHQ/libcloud

    def _upload_in_chunks(self, response, data, iterator, object_path,
                          blob_type, lease, calculate_hash=True):
        """
        Uploads data from an interator in fixed sized chunks to S3

        :param response: Response object from the initial POST request
        :type response: :class:`RawResponse`

        :param data: Any data from the initial POST request
        :type data: ``str``

        :param iterator: The generator for fetching the upload data
        :type iterator: ``generator``

        :param object_path: The path of the object to which we are uploading
        :type object_name: ``str``

        :param blob_type: The blob type being uploaded
        :type blob_type: ``str``

        :param lease: The lease object to be used for renewal
        :type lease: :class:`AzureBlobLease`

        :keyword calculate_hash: Indicates if we must calculate the data hash
        :type calculate_hash: ``bool``

        :return: A tuple of (status, checksum, bytes transferred)
        :rtype: ``tuple``
        """

        # Get the upload id from the response xml
        if response.status != httplib.CREATED:
            raise LibcloudError('Error initializing upload. Code: %d' %
                                (response.status), driver=self)

        data_hash = None
        if calculate_hash:
            data_hash = self._get_hash_function()

        bytes_transferred = 0
        count = 1
        chunks = []
        headers = {}

        lease.update_headers(headers)

        if blob_type == 'BlockBlob':
            params = {'comp': 'block'}
        else:
            params = {'comp': 'page'}

        # Read the input data in chunk sizes suitable for AWS
        for data in read_in_chunks(iterator, AZURE_CHUNK_SIZE):
            data = b(data)
            content_length = len(data)
            offset = bytes_transferred
            bytes_transferred += content_length

            if calculate_hash:
                data_hash.update(data)

            chunk_hash = self._get_hash_function()
            chunk_hash.update(data)
            chunk_hash = base64.b64encode(b(chunk_hash.digest()))

            headers['Content-MD5'] = chunk_hash.decode('utf-8')
            headers['Content-Length'] = content_length

            if blob_type == 'BlockBlob':
                # Block id can be any unique string that is base64 encoded
                # A 10 digit number can hold the max value of 50000 blocks
                # that are allowed for azure
                block_id = base64.b64encode(b('%10d' % (count)))
                block_id = block_id.decode('utf-8')
                params['blockid'] = block_id

                # Keep this data for a later commit
                chunks.append(block_id)
            else:
                headers['x-ms-page-write'] = 'update'
                headers['x-ms-range'] = 'bytes=%d-%d' % \
                    (offset, bytes_transferred-1)

            # Renew lease before updating
            lease.renew()

            resp = self.connection.request(object_path, method='PUT',
                                           data=data, headers=headers,
                                           params=params)

            if resp.status != httplib.CREATED:
                resp.parse_error()
                raise LibcloudError('Error uploading chunk %d. Code: %d' %
                                    (count, resp.status), driver=self)

            count += 1

        if calculate_hash:
            data_hash = data_hash.hexdigest()

        if blob_type == 'BlockBlob':
            self._commit_blocks(object_path, chunks, lease)

        # The Azure service does not return a hash immediately for
        # chunked uploads. It takes some time for the data to get synced
        response.headers['content-md5'] = None

        return (True, data_hash, bytes_transferred)

예제 #15

0

파일 보기

파일: atmos.py 프로젝트: racker/rackspace_cloudmonitoring_migration_tool

    def upload_object_via_stream(self, iterator, container, object_name, extra=None):
        if isinstance(iterator, file):
            iterator = iter(iterator)

        data_hash = hashlib.md5()
        generator = read_in_chunks(iterator, CHUNK_SIZE, True)
        bytes_transferred = 0
        try:
            chunk = next(generator)
        except StopIteration:
            chunk = ""

        path = self._namespace_path(container.name + "/" + object_name)
        method = "PUT"

        if extra is not None:
            content_type = extra.get("content_type", None)
        else:
            content_type = None
        if not content_type:
            content_type, _ = guess_file_mime_type(object_name)

            if not content_type:
                raise AttributeError("File content-type could not be guessed and" + " no content_type value provided")

        try:
            self.connection.request(path + "?metadata/system")
        except AtmosError:
            e = sys.exc_info()[1]
            if e.code != 1003:
                raise
            method = "POST"

        while True:
            end = bytes_transferred + len(chunk) - 1
            data_hash.update(b(chunk))
            headers = {"x-emc-meta": "md5=" + data_hash.hexdigest(), "Content-Type": content_type}

            if len(chunk) > 0 and bytes_transferred > 0:
                headers["Range"] = "Bytes=%d-%d" % (bytes_transferred, end)
                method = "PUT"

            result = self.connection.request(path, method=method, data=chunk, headers=headers)
            bytes_transferred += len(chunk)

            try:
                chunk = next(generator)
            except StopIteration:
                break
            if len(chunk) == 0:
                break

        data_hash = data_hash.hexdigest()

        if extra is None:
            meta_data = {}
        else:
            meta_data = extra.get("meta_data", {})
        meta_data["md5"] = data_hash
        user_meta = ", ".join([k + "=" + str(v) for k, v in list(meta_data.items())])
        self.connection.request(path + "?metadata/user", method="POST", headers={"x-emc-meta": user_meta})

        result = self.connection.request(path + "?metadata/system")

        meta = self._emc_meta(result)
        extra = {"object_id": meta["objectid"], "meta_data": meta_data}

        return Object(object_name, bytes_transferred, data_hash, extra, meta_data, container, self)

예제 #16

0

파일 보기

    def upload_object(self,
                      file_path,
                      container,
                      object_name,
                      extra=None,
                      verify_hash=True,
                      headers=None):
        """
        Upload an object.

        Note: This will override file with a same name if it already exists.
        """
        # Note: We don't use any of the base driver functions since Backblaze
        # API requires you to provide SHA1 has upfront and the base methods
        # don't support that
        with open(file_path, 'rb') as fp:
            iterator = iter(fp)
            iterator = read_in_chunks(iterator=iterator)
            data = exhaust_iterator(iterator=iterator)

        extra = extra or {}
        content_type = extra.get('content_type', 'b2/x-auto')
        meta_data = extra.get('meta_data', {})

        # Note: Backblaze API doesn't support chunked encoding and we need to
        # provide Content-Length up front (this is one inside _upload_object):/
        headers = headers or {}
        headers['X-Bz-File-Name'] = object_name
        headers['Content-Type'] = content_type

        sha1 = hashlib.sha1()
        sha1.update(b(data))
        headers['X-Bz-Content-Sha1'] = sha1.hexdigest()

        # Include optional meta-data (up to 10 items)
        for key, value in meta_data:
            # TODO: Encode / escape key
            headers['X-Bz-Info-%s' % (key)] = value

        upload_data = self.ex_get_upload_data(
            container_id=container.extra['id'])
        upload_token = upload_data['authorizationToken']
        parsed_url = urlparse.urlparse(upload_data['uploadUrl'])

        upload_host = parsed_url.netloc
        request_path = parsed_url.path

        response = self.connection.upload_request(action=request_path,
                                                  headers=headers,
                                                  upload_host=upload_host,
                                                  auth_token=upload_token,
                                                  data=data)

        if response.status == httplib.OK:
            obj = self._to_object(item=response.object, container=container)
            return obj
        else:
            body = response.response.read()
            raise LibcloudError('Upload failed. status_code=%s, body=%s' %
                                (response.status, body),
                                driver=self)

예제 #17

0

파일 보기

    def _upload_object(self,
                       object_name,
                       content_type,
                       upload_func,
                       upload_func_kwargs,
                       request_path,
                       request_method='PUT',
                       headers=None,
                       file_path=None,
                       iterator=None,
                       container=None):
        """
        Helper function for setting common request headers and calling the
        passed in callback which uploads an object.
        """
        headers = headers or {}

        if file_path and not os.path.exists(file_path):
            raise OSError('File %s does not exist' % (file_path))

        if iterator is not None and not hasattr(iterator, 'next') and not \
                hasattr(iterator, '__next__'):
            raise AttributeError('iterator object must implement next() ' +
                                 'method.')

        if not content_type:
            if file_path:
                name = file_path
            else:
                name = object_name
            content_type, _ = guess_file_mime_type(name)

            if not content_type:
                if self.strict_mode:
                    raise AttributeError('File content-type could not be '
                                         'guessed and no content_type value '
                                         'is provided')
                else:
                    # Fallback to a content-type
                    content_type = DEFAULT_CONTENT_TYPE

        file_size = None

        if iterator:
            if self.supports_chunked_encoding:
                headers['Transfer-Encoding'] = 'chunked'
                upload_func_kwargs['chunked'] = True
            else:
                # Chunked transfer encoding is not supported. Need to buffer
                # all the data in memory so we can determine file size.
                iterator = read_in_chunks(iterator=iterator)
                data = exhaust_iterator(iterator=iterator)

                file_size = len(data)
                upload_func_kwargs['data'] = data
        else:
            file_size = os.path.getsize(file_path)
            upload_func_kwargs['chunked'] = False

        if file_size is not None and 'Content-Length' not in headers:
            headers['Content-Length'] = file_size

        headers['Content-Type'] = content_type
        response = self.connection.request(request_path,
                                           method=request_method,
                                           data=None,
                                           headers=headers,
                                           raw=True,
                                           container=container)

        upload_func_kwargs['response'] = response
        success, data_hash, bytes_transferred = upload_func(
            **upload_func_kwargs)

        if not success:
            raise LibcloudError(
                value='Object upload failed, Perhaps a timeout?', driver=self)

        result_dict = {
            'response': response,
            'data_hash': data_hash,
            'bytes_transferred': bytes_transferred
        }
        return result_dict

예제 #18

0

파일 보기

파일: s3.py 프로젝트: pakdel/libcloud

    def _upload_from_iterator(self,
                              iterator,
                              object_path,
                              upload_id,
                              calculate_hash=True):
        """
        Uploads data from an interator in fixed sized chunks to S3

        @param iterator: The generator for fetching the upload data
        @type iterator: C{generator}

        @param object_path: The path of the object to which we are uploading
        @type object_name: C{str}

        @param upload_id: The upload id allocated for this multipart upload
        @type upload_id: C{str}

        @keyword calculate_hash: Indicates if we must calculate the data hash
        @type calculate_hash: C{bool}

        @return: A tuple of (chunk info, checksum, bytes transferred)
        @rtype: C{tuple}
        """

        data_hash = None
        if calculate_hash:
            data_hash = self._get_hash_function()

        bytes_transferred = 0
        count = 1
        chunks = []
        params = {'uploadId': upload_id}

        # Read the input data in chunk sizes suitable for AWS
        for data in read_in_chunks(iterator,
                                   chunk_size=CHUNK_SIZE,
                                   fill_size=True):
            bytes_transferred += len(data)

            if calculate_hash:
                data_hash.update(data)

            chunk_hash = self._get_hash_function()
            chunk_hash.update(data)
            chunk_hash = base64.b64encode(chunk_hash.digest()).decode('utf-8')

            # This provides an extra level of data check and is recommended
            # by amazon
            headers = {'Content-MD5': chunk_hash}
            params['partNumber'] = count

            request_path = '?'.join((object_path, urlencode(params)))

            resp = self.connection.request(request_path,
                                           method='PUT',
                                           data=data,
                                           headers=headers)

            if resp.status != httplib.OK:
                raise LibcloudError('Error uploading chunk', driver=self)

            server_hash = resp.headers['etag']

            # Keep this data for a later commit
            chunks.append((count, server_hash))
            count += 1

        if calculate_hash:
            data_hash = data_hash.hexdigest()

        return (chunks, data_hash, bytes_transferred)

예제 #19

0

파일 보기

파일: oss.py 프로젝트: omardelacruzurquia/challenge2

    def _upload_from_iterator(self,
                              iterator,
                              object_path,
                              upload_id,
                              calculate_hash=True,
                              container=None):
        """
        Uploads data from an interator in fixed sized chunks to OSS

        :param iterator: The generator for fetching the upload data
        :type iterator: ``generator``

        :param object_path: The path of the object to which we are uploading
        :type object_name: ``str``

        :param upload_id: The upload id allocated for this multipart upload
        :type upload_id: ``str``

        :keyword calculate_hash: Indicates if we must calculate the data hash
        :type calculate_hash: ``bool``

        :keyword container: the container object to upload object to
        :type container: :class:`Container`

        :return: A tuple of (chunk info, checksum, bytes transferred)
        :rtype: ``tuple``
        """

        data_hash = None
        if calculate_hash:
            data_hash = self._get_hash_function()

        bytes_transferred = 0
        count = 1
        chunks = []
        params = {'uploadId': upload_id}

        # Read the input data in chunk sizes suitable for AWS
        for data in read_in_chunks(iterator,
                                   chunk_size=CHUNK_SIZE,
                                   fill_size=True,
                                   yield_empty=True):
            bytes_transferred += len(data)

            if calculate_hash:
                data_hash.update(data)

            chunk_hash = self._get_hash_function()
            chunk_hash.update(data)
            chunk_hash = base64.b64encode(chunk_hash.digest()).decode('utf-8')

            # OSS will calculate hash of the uploaded data and
            # check this header.
            headers = {'Content-MD5': chunk_hash}
            params['partNumber'] = count

            request_path = '?'.join((object_path, urlencode(params)))

            resp = self.connection.request(request_path,
                                           method='PUT',
                                           data=data,
                                           headers=headers,
                                           container=container)

            if resp.status != httplib.OK:
                raise LibcloudError('Error uploading chunk', driver=self)

            server_hash = resp.headers['etag']

            # Keep this data for a later commit
            chunks.append((count, server_hash))
            count += 1

        if calculate_hash:
            data_hash = data_hash.hexdigest()

        return (chunks, data_hash, bytes_transferred)

예제 #20

0

파일 보기

    def upload_object_via_stream(self,
                                 iterator,
                                 container,
                                 object_name,
                                 extra=None,
                                 headers=None):
        if isinstance(iterator, file):
            iterator = iter(iterator)

        extra_headers = headers or {}
        data_hash = hashlib.md5()
        generator = read_in_chunks(iterator, CHUNK_SIZE, True)
        bytes_transferred = 0
        try:
            chunk = next(generator)
        except StopIteration:
            chunk = ""

        path = self._namespace_path(container.name + "/" + object_name)
        method = "PUT"

        if extra is not None:
            content_type = extra.get("content_type", None)
        else:
            content_type = None

        content_type = self._determine_content_type(content_type, object_name)

        try:
            self.connection.request(path + "?metadata/system")
        except AtmosError as e:
            if e.code != 1003:
                raise
            method = "POST"

        while True:
            end = bytes_transferred + len(chunk) - 1
            data_hash.update(b(chunk))
            headers = dict(extra_headers)

            headers.update({
                "x-emc-meta": "md5=" + data_hash.hexdigest(),
                "Content-Type": content_type,
            })

            if len(chunk) > 0 and bytes_transferred > 0:
                headers["Range"] = "Bytes=%d-%d" % (bytes_transferred, end)
                method = "PUT"

            result = self.connection.request(path,
                                             method=method,
                                             data=chunk,
                                             headers=headers)
            bytes_transferred += len(chunk)

            try:
                chunk = next(generator)
            except StopIteration:
                break
            if len(chunk) == 0:
                break

        data_hash = data_hash.hexdigest()

        if extra is None:
            meta_data = {}
        else:
            meta_data = extra.get("meta_data", {})
        meta_data["md5"] = data_hash
        user_meta = ", ".join(
            [k + "=" + str(v) for k, v in list(meta_data.items())])
        self.connection.request(path + "?metadata/user",
                                method="POST",
                                headers={"x-emc-meta": user_meta})

        result = self.connection.request(path + "?metadata/system")

        meta = self._emc_meta(result)
        extra = {
            "object_id": meta["objectid"],
            "meta_data": meta_data,
        }

        return Object(object_name, bytes_transferred, data_hash, extra,
                      meta_data, container, self)

예제 #21

0

파일 보기

    def _upload_multipart_chunks(self,
                                 container,
                                 object_name,
                                 upload_id,
                                 stream,
                                 calculate_hash=True):
        """
        Uploads data from an iterator in fixed sized chunks to S3

        :param container: The destination container
        :type container: :class:`Container`

        :param object_name: The name of the object which we are uploading
        :type object_name: ``str``

        :param upload_id: The upload id allocated for this multipart upload
        :type upload_id: ``str``

        :param stream: The generator for fetching the upload data
        :type stream: ``generator``

        :keyword calculate_hash: Indicates if we must calculate the data hash
        :type calculate_hash: ``bool``

        :return: A tuple of (chunk info, checksum, bytes transferred)
        :rtype: ``tuple``
        """
        data_hash = None
        if calculate_hash:
            data_hash = self._get_hash_function()

        bytes_transferred = 0
        count = 1
        chunks = []
        params = {'uploadId': upload_id}

        request_path = self._get_object_path(container, object_name)

        # Read the input data in chunk sizes suitable for AWS
        for data in read_in_chunks(stream,
                                   chunk_size=CHUNK_SIZE,
                                   fill_size=True,
                                   yield_empty=True):
            bytes_transferred += len(data)

            if calculate_hash:
                data_hash.update(data)

            chunk_hash = self._get_hash_function()
            chunk_hash.update(data)
            chunk_hash = base64.b64encode(chunk_hash.digest()).decode('utf-8')

            # The Content-MD5 header provides an extra level of data check and
            # is recommended by amazon
            headers = {
                'Content-Length': len(data),
                'Content-MD5': chunk_hash,
            }

            params['partNumber'] = count

            resp = self.connection.request(request_path,
                                           method='PUT',
                                           data=data,
                                           headers=headers,
                                           params=params)

            if resp.status != httplib.OK:
                raise LibcloudError('Error uploading chunk', driver=self)

            server_hash = resp.headers['etag'].replace('"', '')

            # Keep this data for a later commit
            chunks.append((count, server_hash))
            count += 1

        if calculate_hash:
            data_hash = data_hash.hexdigest()

        return (chunks, data_hash, bytes_transferred)

예제 #22

0

파일 보기

    def upload_object_via_stream(self,
                                 iterator,
                                 container,
                                 object_name,
                                 extra=None):
        if isinstance(iterator, file):
            iterator = iter(iterator)

        data_hash = hashlib.md5()
        generator = read_in_chunks(iterator, CHUNK_SIZE, True)
        bytes_transferred = 0
        try:
            chunk = next(generator)
        except StopIteration:
            chunk = ''

        path = self._namespace_path(container.name + '/' + object_name)
        method = 'PUT'

        if extra is not None:
            content_type = extra.get('content_type', None)
        else:
            content_type = None

        content_type = self._determine_content_type(content_type, object_name)

        try:
            self.connection.request(path + '?metadata/system')
        except AtmosError as e:
            if e.code != 1003:
                raise
            method = 'POST'

        while True:
            end = bytes_transferred + len(chunk) - 1
            data_hash.update(b(chunk))
            headers = {
                'x-emc-meta': 'md5=' + data_hash.hexdigest(),
                'Content-Type': content_type,
            }

            if len(chunk) > 0 and bytes_transferred > 0:
                headers['Range'] = 'Bytes=%d-%d' % (bytes_transferred, end)
                method = 'PUT'

            result = self.connection.request(path,
                                             method=method,
                                             data=chunk,
                                             headers=headers)
            bytes_transferred += len(chunk)

            try:
                chunk = next(generator)
            except StopIteration:
                break
            if len(chunk) == 0:
                break

        data_hash = data_hash.hexdigest()

        if extra is None:
            meta_data = {}
        else:
            meta_data = extra.get('meta_data', {})
        meta_data['md5'] = data_hash
        user_meta = ', '.join(
            [k + '=' + str(v) for k, v in list(meta_data.items())])
        self.connection.request(path + '?metadata/user',
                                method='POST',
                                headers={'x-emc-meta': user_meta})

        result = self.connection.request(path + '?metadata/system')

        meta = self._emc_meta(result)
        extra = {
            'object_id': meta['objectid'],
            'meta_data': meta_data,
        }

        return Object(object_name, bytes_transferred, data_hash, extra,
                      meta_data, container, self)