def test_blob_download_stream(binary_blob, temp_file):
    with open(temp_file, 'wb') as download_file:
        binary_blob.download(download_file)

    hash_type = binary_blob.driver.hash_type
    download_hash = file_checksum(temp_file, hash_type=hash_type)
    assert download_hash.hexdigest() == BINARY_MD5_CHECKSUM
Ejemplo n.º 2
0
    def _make_blob(self, container: Container, object_name: str) -> Blob:
        """Convert local file name to a Cloud Storage Blob.

        :param container: Container instance.
        :type container: :class:`.Container`

        :param object_name: Filename.
        :type object_name: str

        :return: Blob instance.
        :rtype: :class:`.Blob`
        """
        full_path = os.path.join(self.base_path, container.name, object_name)

        object_path = pathlib.Path(full_path)

        try:
            stat = os.stat(str(object_path))
        except FileNotFoundError:
            raise NotFoundError(blob_not_found % (object_name, container.name))

        meta_data = {}
        content_type = None
        content_disposition = None

        try:
            x = xattr.xattr(full_path)

            for attr_key, attr_value in x.items():
                value_str = attr_value.decode('utf-8')

                if attr_key.startswith(self._OBJECT_META_PREFIX + 'metadata'):
                    meta_key = attr_key.split('.')[-1]
                    meta_data[meta_key] = value_str
                elif attr_key.endswith('content-type'):
                    content_type = value_str
                elif attr_key.endswith('content-disposition'):
                    content_disposition = value_str
                else:
                    logger.warning("Unknown file attribute '%s'" % attr_key)
        except OSError:
            logger.warning(local_no_attributes)

        # TODO: QUESTION: Option to disable checksum for large files?
        # TODO: QUESTION: Save a .hash file for each file?
        checksum = file_checksum(full_path, hash_type=self.hash_type)

        etag = hashlib.sha1(full_path.encode('utf-8')).hexdigest()
        created_at = datetime.fromtimestamp(stat.st_ctime, timezone.utc)
        modified_at = datetime.fromtimestamp(stat.st_mtime, timezone.utc)
        return Blob(name=object_path.name, checksum=checksum, etag=etag,
                    size=stat.st_size, container=container, driver=self,
                    acl=None, meta_data=meta_data,
                    content_disposition=content_disposition,
                    content_type=content_type, created_at=created_at,
                    modified_at=modified_at)
Ejemplo n.º 3
0
    def upload_blob(
        self,
        container: Container,
        filename: FileLike,
        blob_name: str = None,
        acl: str = None,
        meta_data: MetaData = None,
        content_type: str = None,
        content_disposition: str = None,
        cache_control: str = None,
        chunk_size: int = 1024,
        extra: ExtraOptions = None,
    ) -> Blob:
        if acl:
            logger.info(messages.OPTION_NOT_SUPPORTED, "acl")

        meta_data = {} if meta_data is None else meta_data
        extra = extra if extra is not None else {}

        extra_args = self._normalize_parameters(extra, self._PUT_OBJECT_KEYS)
        extra_args.setdefault("content_type", content_type)
        extra_args.setdefault("content_disposition", content_disposition)
        extra_args.setdefault("cache_control", cache_control)

        azure_container = self._get_azure_container(container.name)
        blob_name = blob_name or validate_file_or_path(filename)

        # azure does not set content_md5 on backend
        file_hash = file_checksum(filename, hash_type=self.hash_type)
        file_digest = file_hash.digest()
        checksum = base64.b64encode(file_digest).decode("utf-8").strip()
        extra_args.setdefault("content_md5", checksum)

        content_settings = ContentSettings(**extra_args)

        if isinstance(filename, str):
            self.service.create_blob_from_path(
                container_name=azure_container.name,
                blob_name=blob_name,
                file_path=filename,
                content_settings=content_settings,
                metadata=meta_data,
                validate_content=True,
            )
        else:
            self.service.create_blob_from_stream(
                container_name=azure_container.name,
                blob_name=blob_name,
                stream=filename,
                content_settings=content_settings,
                metadata=meta_data,
                validate_content=True,
            )

        azure_blob = self._get_azure_blob(azure_container.name, blob_name)
        return self._convert_azure_blob(container, azure_blob)
Ejemplo n.º 4
0
def test_blob_upload_path(container, text_filename, temp_file):
    blob = container.upload_blob(text_filename)
    assert blob.name == settings.TEXT_FILENAME

    blob.download(temp_file)

    hash_type = blob.driver.hash_type
    download_hash = file_checksum(temp_file, hash_type=hash_type)
    blob_checksum = download_hash.hexdigest()
    assert blob_checksum == settings.TEXT_MD5_CHECKSUM
Ejemplo n.º 5
0
def test_blob_upload_stream(container, binary_stream, temp_file):
    blob = container.upload_blob(filename=binary_stream,
                                 blob_name=BINARY_STREAM_FILENAME,
                                 **BINARY_OPTIONS)
    assert blob.name == BINARY_STREAM_FILENAME

    blob.download(temp_file)

    hash_type = blob.driver.hash_type
    download_hash = file_checksum(temp_file, hash_type=hash_type)
    blob_checksum = download_hash.hexdigest()
    assert blob_checksum == BINARY_MD5_CHECKSUM
Ejemplo n.º 6
0
def test_blob_upload_options(container, binary_stream, temp_file):
    blob = container.upload_blob(filename=binary_stream,
                                 blob_name=BINARY_STREAM_FILENAME,
                                 **BINARY_OPTIONS)
    assert blob.name == BINARY_STREAM_FILENAME
    assert blob.meta_data == BINARY_OPTIONS['meta_data']
    assert blob.content_type == BINARY_OPTIONS['content_type']

    blob.download(temp_file)

    hash_type = blob.driver.hash_type
    download_hash = file_checksum(temp_file, hash_type=hash_type)
    assert download_hash.hexdigest() == BINARY_MD5_CHECKSUM
def test_blob_generate_download_url(binary_blob, temp_file):
    content_disposition = BINARY_OPTIONS.get('content_disposition')
    download_url = binary_blob.generate_download_url(
        content_disposition=content_disposition)
    assert uri_validator(download_url)

    response = requests.get(download_url)
    assert response.status_code == HTTPStatus.OK, response.text
    assert response.headers['content-disposition'] == content_disposition

    with open(temp_file, 'wb') as f:
        for chunk in response.iter_content(chunk_size=128):
            f.write(chunk)

    hash_type = binary_blob.driver.hash_type
    download_hash = file_checksum(temp_file, hash_type=hash_type)
    assert download_hash.hexdigest() == BINARY_MD5_CHECKSUM
def test_blob_generate_download_url(binary_blob, temp_file):
    content_disposition = BINARY_OPTIONS.get('content_disposition')
    download_url = binary_blob.generate_download_url(
        content_disposition=content_disposition)
    assert uri_validator(download_url)

    response = requests.get(download_url)
    assert response.status_code == HTTPStatus.OK, response.text
    # Rackspace adds extra garbage to the header
    # 'attachment; filename=avatar-attachment.png;
    #  filename*=UTF-8\\'\\'avatar-attachment.png'
    parsed_disposition = parse_headers(response.headers['content-disposition'])
    response_disposition = '{}; filename={}'.format(
        parsed_disposition.disposition, parsed_disposition.filename_unsafe)
    assert response_disposition == content_disposition

    with open(temp_file, 'wb') as f:
        for chunk in response.iter_content(chunk_size=128):
            f.write(chunk)

    hash_type = binary_blob.driver.hash_type
    download_hash = file_checksum(temp_file, hash_type=hash_type)
    assert download_hash == BINARY_MD5_CHECKSUM
Ejemplo n.º 9
0
def test_blob_generate_download_url(binary_blob, temp_file):
    content_disposition = settings.BINARY_OPTIONS.get("content_disposition")
    download_url = binary_blob.generate_download_url(
        content_disposition=content_disposition)
    assert uri_validator(download_url)

    response = requests.get(download_url)
    assert response.status_code == HTTPStatus.OK, response.text
    # Rackspace adds extra garbage to the header
    # 'attachment; filename=avatar-attachment.png;
    #  filename*=UTF-8\\'\\'avatar-attachment.png'
    disposition, params = parse_content_disposition(
        response.headers["content-disposition"])
    response_disposition = "{}; filename={}".format(disposition,
                                                    params["filename"])
    assert response_disposition == content_disposition

    with open(temp_file, "wb") as f:
        for chunk in response.iter_content(chunk_size=128):
            f.write(chunk)

    hash_type = binary_blob.driver.hash_type
    download_hash = file_checksum(temp_file, hash_type=hash_type)
    assert download_hash.hexdigest() == settings.BINARY_MD5_CHECKSUM
def test_blob_download_path(binary_blob, temp_file):
    binary_blob.download(temp_file)
    hash_type = binary_blob.driver.hash_type
    download_hash = file_checksum(temp_file, hash_type=hash_type)
    assert download_hash.hexdigest() == BINARY_MD5_CHECKSUM
Ejemplo n.º 11
0
def test_file_checksum(text_filename):
    checksum = file_checksum(text_filename, hash_type='md5', block_size=32)
    assert checksum == TEXT_MD5_CHECKSUM
Ejemplo n.º 12
0
    def _make_blob(self, container: Container, object_name: str) -> Blob:
        """Convert local file name to a Cloud Storage Blob.

        :param container: Container instance.
        :type container: :class:`.Container`

        :param object_name: Filename.
        :type object_name: str

        :return: Blob instance.
        :rtype: :class:`.Blob`
        """
        full_path = os.path.join(self.base_path, container.name, object_name)
        if not self._check_path_accessible(full_path):
            raise NotFoundError(messages.BLOB_NOT_FOUND %
                                (object_name, container.name))

        object_path = pathlib.Path(full_path)

        try:
            stat = os.stat(str(object_path))
        except FileNotFoundError:
            raise NotFoundError(messages.BLOB_NOT_FOUND %
                                (object_name, container.name))

        meta_data = {}
        content_type = None
        content_disposition = None
        cache_control = None

        try:
            attributes = self._make_xattr(full_path)

            for attr_key, attr_value in attributes.items():
                value_str = None

                try:
                    value_str = attr_value.decode("utf-8")
                except UnicodeDecodeError:
                    pass

                if attr_key.startswith(self._OBJECT_META_PREFIX + "metadata"):
                    meta_key = attr_key.split(".")[-1]
                    meta_data[meta_key] = value_str
                elif attr_key.endswith("content_type"):
                    content_type = value_str
                elif attr_key.endswith("content_disposition"):
                    content_disposition = value_str
                elif attr_key.endswith("cache_control"):
                    cache_control = value_str
                else:
                    logger.warning("Unknown file attribute '%s'", attr_key)
        except OSError:
            logger.warning(messages.LOCAL_NO_ATTRIBUTES)

        # TODO: QUESTION: Option to disable checksum for large files?
        # TODO: QUESTION: Save a .hash file for each file?
        file_hash = file_checksum(full_path, hash_type=self.hash_type)
        checksum = file_hash.hexdigest()

        etag = hashlib.sha1(full_path.encode("utf-8")).hexdigest()
        created_at = datetime.fromtimestamp(stat.st_ctime, timezone.utc)
        modified_at = datetime.fromtimestamp(stat.st_mtime, timezone.utc)

        return Blob(
            name=object_name,
            checksum=checksum,
            etag=etag,
            size=stat.st_size,
            container=container,
            driver=self,
            acl=None,
            meta_data=meta_data,
            content_disposition=content_disposition,
            content_type=content_type,
            cache_control=cache_control,
            created_at=created_at,
            modified_at=modified_at,
        )
Ejemplo n.º 13
0
def test_file_checksum_stream(binary_stream):
    file_hash = file_checksum(binary_stream, hash_type='md5', block_size=32)
    assert file_hash.hexdigest() == BINARY_MD5_CHECKSUM
    assert binary_stream.tell() == 0
Ejemplo n.º 14
0
def test_file_checksum_filename(text_filename):
    file_hash = file_checksum(text_filename, hash_type='md5', block_size=32)
    assert file_hash.hexdigest() == TEXT_MD5_CHECKSUM