예제 #1
0
    def test_using_directory_sas_to_read(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        client = self._create_directory_and_get_directory_client()
        directory_name = client.path_name

        # generate a token with directory level read permission
        token = generate_directory_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            account_key=self.dsc.credential.account_key,
            permission=DirectorySasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        access_control = directory_client.get_access_control()

        self.assertIsNotNone(access_control)
def __upload_file(directory_client: DataLakeDirectoryClient, filename: str, file_data: bytes):
    # NOTE: Using get_file_client as a context manager will close the parent DirectoryClient on __exit__
    file_client = directory_client.get_file_client(filename)
    try:
        if len(file_data) <= 0:
            message = f'ERROR: Unexpected error - File size 0 (or negative) - NOT POSSIBLE ({len(file_data)})'
            raise HTTPException(status_code=HTTPStatus.NOT_ACCEPTABLE, detail=message)

        file_client.upload_data(file_data, overwrite=True)

        # Check if file is written as expected: due to the 0-byte file error
        verification_client = directory_client.get_file_client(filename)
        file_properties = verification_client.get_file_properties()

        if file_properties.size != len(file_data):
            message = f'ERROR: Azure write of ({file_client.path_name}) upload: {len(file_data)}' + \
                      f' stored: {file_properties.size} last_modified: {file_properties.last_modified}'
            raise HTTPException(status_code=HTTPStatus.NOT_ACCEPTABLE, detail=message)
        # End check

    except ResourceModifiedError as error:
        message = f'({type(error).__name__}) Upload failed (blob modified while writing): {error}'
        raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=message) from error

    except HttpResponseError as error:
        message = f'({type(error).__name__}) An error occurred while uploading file: {error}'
        raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=message) from error
예제 #3
0
 def test_using_oauth_token_credential_to_create_directory(self):
     # generate a token with directory level create permission
     directory_name = self._get_directory_reference()
     token_credential = self.generate_oauth_token()
     directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                self.file_system_name,
                                                directory_name,
                                                credential=token_credential)
     response = directory_client.create_directory()
     self.assertIsNotNone(response)
예제 #4
0
    def test_file_sas_only_applies_to_file_level(self,
                                                 datalake_storage_account_name,
                                                 datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only
        file_name = self._get_file_reference()
        directory_name = self._get_directory_reference()
        self._create_file_and_return_client(directory=directory_name,
                                            file=file_name)

        # generate a token with file level read and write permissions
        token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            file_name,
            self.dsc.credential.account_key,
            permission=FileSasPermissions(read=True, write=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         directory_name + '/' + file_name,
                                         credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        response = file_client.append_data(b"abcd",
                                           0,
                                           4,
                                           validate_content=True)
        self.assertIsNotNone(response)

        # the token is for file level, so users are not supposed to have access to file system level operations
        file_system_client = FileSystemClient(self.dsc.url,
                                              self.file_system_name,
                                              credential=token)
        with self.assertRaises(ClientAuthenticationError):
            file_system_client.get_file_system_properties()

        # the token is for file level, so users are not supposed to have access to directory level operations
        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        with self.assertRaises(ClientAuthenticationError):
            directory_client.get_directory_properties()
def __check_directory_exist(directory_client: DataLakeDirectoryClient):
    try:
        directory_client.get_directory_properties()
    except ResourceNotFoundError as error:
        message = f'({type(error).__name__}) The given dataset doesnt exist: {error}'
        raise HTTPException(status_code=error.status_code, detail=message) from error
    except ClientAuthenticationError as error:
        message = f'({type(error).__name__}) You do not have permission to access the dataset: {error}'
        raise HTTPException(status_code=error.status_code, detail=message) from error
    except HttpResponseError as error:
        message = f'({type(error).__name__}) An error occurred while checking if the dataset exist: {error}'
        raise HTTPException(status_code=error.status_code, detail=message) from error
def __get_destination_directory_client_based_on_event_time(directory_client: DataLakeDirectoryClient,
                                                           event_time: str) -> DataLakeDirectoryClient:
    event_time_obj, time_resolution = parse_date_str(event_time)

    path = get_file_path_with_respect_to_time_resolution(event_time_obj, time_resolution, '')

    return directory_client.get_sub_directory_client(path)
def __download_file(filename: str, directory_client: DataLakeDirectoryClient) -> StorageStreamDownloader:
    file_client = directory_client.get_file_client(filename)
    try:
        downloaded_file = file_client.download_file()
        return downloaded_file
    except ResourceNotFoundError as error:
        message = f'({type(error).__name__}) The given dataset doesnt exist: {error}'
        raise HTTPException(status_code=error.status_code, detail=message) from error
    except HttpResponseError as error:
        message = f'({type(error).__name__}) File could not be downloaded: {error}'
        raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=message) from error
예제 #8
0
    def test_using_directory_sas_to_create(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        # generate a token with directory level create permission
        directory_name = self._get_directory_reference()
        token = generate_directory_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            account_key=self.dsc.credential.account_key,
            permission=DirectorySasPermissions(create=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        response = directory_client.create_directory()
        self.assertIsNotNone(response)
    def test_rename_dir_with_file_system_sas(self):
        if TestMode.need_recording_file(self.test_mode):
            return

        token = generate_file_system_sas(
            self.dsc.account_name,
            self.file_system_name,
            self.dsc.credential.account_key,
            FileSystemSasPermissions(write=True, read=True, delete=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        dir_client = DataLakeDirectoryClient(self.dsc.url,
                                             self.file_system_name,
                                             "olddirectory",
                                             credential=token)
        dir_client.create_directory()
        new_client = dir_client.rename_directory(dir_client.file_system_name +
                                                 '/' + 'newdirectory')

        new_client.get_directory_properties()
        self.assertEqual(new_client.path_name, "newdirectory")
def __validate_json_with_schema(directory_client: DataLakeDirectoryClient, json_schema_file_path: str, data_dict: Dict):
    file_client = directory_client.get_file_client(json_schema_file_path)
    schema = __get_validation_schema(file_client)
    try:
        fastjsonschema.validate(schema, data_dict)
    except (TypeError, fastjsonschema.JsonSchemaDefinitionException, AttributeError) as error:
        raise HTTPException(
            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
            detail=f'Invalid schema definition: {getattr(error, "message", error)}') from error
    except fastjsonschema.JsonSchemaValueException as error:
        raise HTTPException(status_code=HTTPStatus.BAD_REQUEST,
                            detail={
                                'message': f'JSON Schema validation error: {error.message}',
                                'name': f'{error.name}',
                                'rule': f'{error.rule}',
                                'rule_definition': f'{error.rule_definition}'
                            }) from error
def __get_directory_client(token: str, guid: str) -> DataLakeDirectoryClient:
    account_url = config['Azure Storage']['account_url']
    filesystem_name = config['Azure Storage']['filesystem_name']
    credential = AzureCredential(token)

    return DataLakeDirectoryClient(account_url, filesystem_name, guid, credential=credential)
def __get_destination_directory_client_based_on_ingress_time(directory_client: DataLakeDirectoryClient) \
        -> DataLakeDirectoryClient:
    now = datetime.utcnow()

    path = f'year={now.year:02d}/month={now.month:02d}/day={now.day:02d}/hour={now.hour:02d}'
    return directory_client.get_sub_directory_client(path)
def instantiate_directory_client_from_conn_str():
    # [START instantiate_directory_client_from_conn_str]
    from azure.storage.filedatalake import DataLakeDirectoryClient
    DataLakeDirectoryClient.from_connection_string(connection_string,
                                                   "myfilesystem",
                                                   "mydirectory")