def __upload_file(directory_client: DataLakeDirectoryClient, filename: str, file_data: bytes): # NOTE: Using get_file_client as a context manager will close the parent DirectoryClient on __exit__ file_client = directory_client.get_file_client(filename) try: if len(file_data) <= 0: message = f'ERROR: Unexpected error - File size 0 (or negative) - NOT POSSIBLE ({len(file_data)})' raise HTTPException(status_code=HTTPStatus.NOT_ACCEPTABLE, detail=message) file_client.upload_data(file_data, overwrite=True) # Check if file is written as expected: due to the 0-byte file error verification_client = directory_client.get_file_client(filename) file_properties = verification_client.get_file_properties() if file_properties.size != len(file_data): message = f'ERROR: Azure write of ({file_client.path_name}) upload: {len(file_data)}' + \ f' stored: {file_properties.size} last_modified: {file_properties.last_modified}' raise HTTPException(status_code=HTTPStatus.NOT_ACCEPTABLE, detail=message) # End check except ResourceModifiedError as error: message = f'({type(error).__name__}) Upload failed (blob modified while writing): {error}' raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=message) from error except HttpResponseError as error: message = f'({type(error).__name__}) An error occurred while uploading file: {error}' raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=message) from error
def __download_file(filename: str, directory_client: DataLakeDirectoryClient) -> StorageStreamDownloader: file_client = directory_client.get_file_client(filename) try: downloaded_file = file_client.download_file() return downloaded_file except ResourceNotFoundError as error: message = f'({type(error).__name__}) The given dataset doesnt exist: {error}' raise HTTPException(status_code=error.status_code, detail=message) from error except HttpResponseError as error: message = f'({type(error).__name__}) File could not be downloaded: {error}' raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=message) from error
def __validate_json_with_schema(directory_client: DataLakeDirectoryClient, json_schema_file_path: str, data_dict: Dict): file_client = directory_client.get_file_client(json_schema_file_path) schema = __get_validation_schema(file_client) try: fastjsonschema.validate(schema, data_dict) except (TypeError, fastjsonschema.JsonSchemaDefinitionException, AttributeError) as error: raise HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f'Invalid schema definition: {getattr(error, "message", error)}') from error except fastjsonschema.JsonSchemaValueException as error: raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail={ 'message': f'JSON Schema validation error: {error.message}', 'name': f'{error.name}', 'rule': f'{error.rule}', 'rule_definition': f'{error.rule_definition}' }) from error