def __init__( self, client, lease_id=None ): # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs # type: (Union[FileSystemClient, DataLakeDirectoryClient, DataLakeFileClient], Optional[str]) -> None self.id = lease_id or str(uuid.uuid4()) self.last_modified = None self.etag = None if hasattr(client, '_blob_client'): _client = client._blob_client # type: ignore # pylint: disable=protected-access elif hasattr(client, '_container_client'): _client = client._container_client # type: ignore # pylint: disable=protected-access else: raise TypeError("Lease must use any of FileSystemClient DataLakeDirectoryClient, or DataLakeFileClient.") self._blob_lease_client = BlobLeaseClient(_client, lease_id=lease_id)
def tearDown(self): if not self.is_playback(): for container_name in self.test_containers: try: container = self.bsc.get_container_client(container_name) container.delete_container() except HttpResponseError: try: lease = BlobLeaseClient(container) lease.break_lease(0) container.delete_container() except: pass except: pass return super(StorageContainerTest, self).tearDown()
def copy_blob(storage_account_conn_str, source_container_name, source_blob_name, dest_container_name): """ Copy a blob in a blob storage container to another blob storage container in a storage account. """ try: # Create the blob object representing the source source_blob_client = BlobClient.from_connection_string( conn_str=storage_account_conn_str, container_name=source_container_name, blob_name=source_blob_name) # Create the blob object representing the destination dest_blob_client = BlobClient.from_connection_string( conn_str=storage_account_conn_str, container_name=dest_container_name, blob_name=source_blob_name) print('Copying a Blob from a Blob container to another one ... ') # Lease the source blob for the copy operation # to prevent another client from modifying it. lease = BlobLeaseClient(source_blob_client) lease.acquire() # Get the source blob's properties and display the lease state. source_props = source_blob_client.get_blob_properties() print("Lease state: " + source_props.lease.state) # Start the copy operation. dest_blob_client.start_copy_from_url(source_blob_client.url) # Get the destination blob's properties to check the copy status. properties = dest_blob_client.get_blob_properties() copy_props = properties.copy # Display the copy status. print("Copy status: " + copy_props["status"]) print("Copy progress: " + copy_props["progress"]) print("Completion time: " + str(copy_props["completion_time"])) print("Total bytes: " + str(properties.size)) if (source_props.lease.state == "leased"): # Break the lease on the source blob. lease.break_lease() # Update the destination blob's properties to check the lease state. source_props = source_blob_client.get_blob_properties() print("Lease state: " + source_props.lease.state) print('\nCopied') except Exception as e: print("\nError:") print(e) return
def blob_copy(self, container_name, blob_name): # Create a BlobClient from a connection string # retrieved from an environment variable named # AZURE_STORAGE_CONNECTION_STRING source_blob = BlobClient.from_connection_string( os.getenv("AZURE_STORAGE_CONNECTION_STRING"), container_name, blob_name ) try: # Lease the source blob for the copy operation # to prevent another client from modifying it. lease = BlobLeaseClient(source_blob) lease.acquire() # Get the source blob's properties and display the lease state. source_props = source_blob.get_blob_properties() print("Lease state: " + source_props.lease.state) # Create a BlobClient representing the # destination blob with a unique name. dest_blob = BlobClient.from_connection_string( os.getenv("AZURE_STORAGE_CONNECTION_STRING"), container_name, str(uuid.uuid4()) + "-" + blob_name ) # Start the copy operation. dest_blob.start_copy_from_url(source_blob.url) # Get the destination blob's properties to check the copy status. properties = dest_blob.get_blob_properties() copy_props = properties.copy # Display the copy status print("Copy status: " + copy_props["status"]) print("Copy progress: " + copy_props["progress"]); print("Completion time: " + str(copy_props["completion_time"])); print("Total bytes: " + str(properties.size)); if (source_props.lease.state == "leased"): # Break the lease on the source blob. lease.break_lease() # Update the destination blob's properties to check the lease state. source_props = source_blob.get_blob_properties() print("Lease state: " + source_props.lease.state) except ResourceNotFoundError as ex: print("ResourceNotFoundError: ", ex.message) except ServiceRequestError as ex: print("ServiceRequestError: ", ex.message)
class DataLakeLeaseClient(object): """Creates a new DataLakeLeaseClient. This client provides lease operations on a FileSystemClient, DataLakeDirectoryClient or DataLakeFileClient. :ivar str id: The ID of the lease currently being maintained. This will be `None` if no lease has yet been acquired. :ivar str etag: The ETag of the lease currently being maintained. This will be `None` if no lease has yet been acquired or modified. :ivar ~datetime.datetime last_modified: The last modified timestamp of the lease currently being maintained. This will be `None` if no lease has yet been acquired or modified. :param client: The client of the file system, directory, or file to lease. :type client: ~azure.storage.filedatalake.FileSystemClient or ~azure.storage.filedatalake.DataLakeDirectoryClient or ~azure.storage.filedatalake.DataLakeFileClient :param str lease_id: A string representing the lease ID of an existing lease. This value does not need to be specified in order to acquire a new lease, or break one. """ def __init__( self, client, lease_id=None ): # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs # type: (Union[FileSystemClient, DataLakeDirectoryClient, DataLakeFileClient], Optional[str]) -> None self.id = lease_id or str(uuid.uuid4()) self.last_modified = None self.etag = None if hasattr(client, '_blob_client'): _client = client._blob_client # type: ignore # pylint: disable=protected-access elif hasattr(client, '_container_client'): _client = client._container_client # type: ignore # pylint: disable=protected-access else: raise TypeError("Lease must use any of FileSystemClient DataLakeDirectoryClient, or DataLakeFileClient.") self._blob_lease_client = BlobLeaseClient(_client, lease_id=lease_id) def __enter__(self): return self def __exit__(self, *args): self.release() def acquire(self, lease_duration=-1, **kwargs): # type: (int, Optional[int], **Any) -> None """Requests a new lease. If the file/file system does not have an active lease, the DataLake service creates a lease on the file/file system and returns a new lease ID. :param int lease_duration: Specifies the duration of the lease, in seconds, or negative one (-1) for a lease that never expires. A non-infinite lease can be between 15 and 60 seconds. A lease duration cannot be changed using renew or change. Default is -1 (infinite lease). :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword str etag: An ETag value, or the wildcard character (*). Used to check if the resource has changed, and act according to the condition specified by the `match_condition` parameter. :keyword ~azure.core.MatchConditions match_condition: The match condition to use upon the etag. :keyword int timeout: The timeout parameter is expressed in seconds. :rtype: None """ self._blob_lease_client.acquire(lease_duration=lease_duration, **kwargs) self._update_lease_client_attributes() def renew(self, **kwargs): # type: (Any) -> None """Renews the lease. The lease can be renewed if the lease ID specified in the lease client matches that associated with the file system or file. Note that the lease may be renewed even if it has expired as long as the file system or file has not been leased again since the expiration of that lease. When you renew a lease, the lease duration clock resets. :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword str etag: An ETag value, or the wildcard character (*). Used to check if the resource has changed, and act according to the condition specified by the `match_condition` parameter. :keyword ~azure.core.MatchConditions match_condition: The match condition to use upon the etag. :keyword int timeout: The timeout parameter is expressed in seconds. :return: None """ self._blob_lease_client.renew(**kwargs) self._update_lease_client_attributes() def release(self, **kwargs): # type: (Any) -> None """Release the lease. The lease may be released if the client lease id specified matches that associated with the file system or file. Releasing the lease allows another client to immediately acquire the lease for the file system or file as soon as the release is complete. :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword str etag: An ETag value, or the wildcard character (*). Used to check if the resource has changed, and act according to the condition specified by the `match_condition` parameter. :keyword ~azure.core.MatchConditions match_condition: The match condition to use upon the etag. :keyword int timeout: The timeout parameter is expressed in seconds. :return: None """ self._blob_lease_client.release(**kwargs) self._update_lease_client_attributes() def change(self, proposed_lease_id, **kwargs): # type: (str, Any) -> None """Change the lease ID of an active lease. :param str proposed_lease_id: Proposed lease ID, in a GUID string format. The DataLake service returns 400 (Invalid request) if the proposed lease ID is not in the correct format. :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword str etag: An ETag value, or the wildcard character (*). Used to check if the resource has changed, and act according to the condition specified by the `match_condition` parameter. :keyword ~azure.core.MatchConditions match_condition: The match condition to use upon the etag. :keyword int timeout: The timeout parameter is expressed in seconds. :return: None """ self._blob_lease_client.change(proposed_lease_id=proposed_lease_id, **kwargs) self._update_lease_client_attributes() def break_lease(self, lease_break_period=None, **kwargs): # type: (Optional[int], Any) -> int """Break the lease, if the file system or file has an active lease. Once a lease is broken, it cannot be renewed. Any authorized request can break the lease; the request is not required to specify a matching lease ID. When a lease is broken, the lease break period is allowed to elapse, during which time no lease operation except break and release can be performed on the file system or file. When a lease is successfully broken, the response indicates the interval in seconds until a new lease can be acquired. :param int lease_break_period: This is the proposed duration of seconds that the lease should continue before it is broken, between 0 and 60 seconds. This break period is only used if it is shorter than the time remaining on the lease. If longer, the time remaining on the lease is used. A new lease will not be available before the break period has expired, but the lease may be held for longer than the break period. If this header does not appear with a break operation, a fixed-duration lease breaks after the remaining lease period elapses, and an infinite lease breaks immediately. :keyword ~datetime.datetime if_modified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has been modified since the specified time. :keyword ~datetime.datetime if_unmodified_since: A DateTime value. Azure expects the date value passed in to be UTC. If timezone is included, any non-UTC datetimes will be converted to UTC. If a date is passed in without timezone info, it is assumed to be UTC. Specify this header to perform the operation only if the resource has not been modified since the specified date/time. :keyword int timeout: The timeout parameter is expressed in seconds. :return: Approximate time remaining in the lease period, in seconds. :rtype: int """ self._blob_lease_client.break_lease(lease_break_period=lease_break_period, **kwargs) def _update_lease_client_attributes(self): self.id = self._blob_lease_client.id # type: str self.last_modified = self._blob_lease_client.last_modified # type: datetime self.etag = self._blob_lease_client.etag # type: str
def update(self, instance, validated_data): files_for_removal = list() for field in validated_data: content_type = self.get_content_type(validated_data[field]) # S3 storage - File copy needed if hasattr(default_storage, 'bucket'): fname = path.basename(validated_data[field]) new_file = ContentFile(b'') new_file.name = default_storage.get_alternative_name(fname, '') new_related_file = RelatedFile.objects.create( file=new_file, filename=fname, content_type=content_type, creator=self.context['request'].user, store_as_filename=True, ) bucket = default_storage.bucket stored_file = default_storage.open(new_related_file.file.name) stored_file.obj.copy({ "Bucket": bucket.name, "Key": validated_data[field] }) stored_file.obj.wait_until_exists() elif hasattr(default_storage, 'azure_container'): # https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-copy?tabs=python fname = path.basename(validated_data[field]) new_file_name = default_storage.get_alternative_name( validated_data[field], '') new_blobname = path.basename(new_file_name) # Copies a blob asynchronously. source_blob = default_storage.client.get_blob_client( validated_data[field]) dest_blob = default_storage.client.get_blob_client( new_file_name) try: lease = BlobLeaseClient(source_blob) lease.acquire() dest_blob.start_copy_from_url(source_blob.url) wait_for_blob_copy(dest_blob) lease.break_lease() except Exception as e: # copy failed, break file lease and re-raise lease.break_lease() raise e stored_blob = default_storage.open(new_blobname) new_related_file = RelatedFile.objects.create( file=File(stored_blob, name=new_blobname), filename=fname, content_type=content_type, creator=self.context['request'].user, store_as_filename=True, ) # Shared-fs else: stored_file = default_storage.open(validated_data[field]) new_file = File(stored_file, name=validated_data[field]) new_related_file = RelatedFile.objects.create( file=new_file, filename=validated_data[field], content_type=content_type, creator=self.context['request'].user, store_as_filename=True, ) # Mark prev ref for deleation if it exisits if hasattr(instance, field): prev_file = getattr(instance, field) if prev_file: files_for_removal.append(prev_file) # Set new file ref setattr(instance, field, new_related_file) # Update & Delete prev linked files instance.save(update_fields=[k for k in validated_data]) for f in files_for_removal: f.delete() return instance
def store_file(reference, content_type, creator, required=True, filename=None): """ Returns a `RelatedFile` obejct to store :param reference: Storage reference of file (url or file path) :type reference: string :param content_type: Mime type of file :type content_type: string :param creator: Id of Django user :type creator: int :param required: Allow for None returns if set to false :type required: boolean :return: Model Object holding a Django file :rtype RelatedFile """ # Download data from URL if is_valid_url(reference): response = urlopen(reference) fdata = response.read() # Find file name header_fname = response.headers.get('Content-Disposition', '').split('filename=')[-1] ref = header_fname if header_fname else os.path.basename(urlparse(reference).path) fname = filename if filename else ref logger.info('Store file: {}'.format(ref)) # Create temp file, download content and store with TemporaryFile() as tmp_file: tmp_file.write(fdata) tmp_file.seek(0) return RelatedFile.objects.create( file=File(tmp_file, name=fname), filename=fname, content_type=content_type, creator=creator, store_as_filename=True, ) # Issue S3 object Copy if is_in_bucket(reference): fname = filename if filename else ref new_file = ContentFile(b'') new_file.name = fname new_related_file = RelatedFile.objects.create( file=new_file, filename=fname, content_type=content_type, creator=creator, store_as_filename=True, ) stored_file = default_storage.open(new_related_file.file.name) stored_file.obj.copy({"Bucket": default_storage.bucket.name, "Key": reference}) stored_file.obj.wait_until_exists() return new_related_file # Issue Azure object Copy if is_in_container(reference): new_filename = filename if filename else ref fname = default_storage._get_valid_path(new_filename) source_blob = default_storage.client.get_blob_client(reference) dest_blob = default_storage.client.get_blob_client(fname) try: lease = BlobLeaseClient(source_blob) lease.acquire() dest_blob.start_copy_from_url(source_blob.url) wait_for_blob_copy(dest_blob) lease.break_lease() except Exception as e: # copy failed, break file lease and re-raise lease.break_lease() raise e stored_blob = default_storage.open(os.path.basename(fname)) new_related_file = RelatedFile.objects.create( file=File(stored_blob, name=fname), filename=fname, content_type=content_type, creator=creator, store_as_filename=True) return new_related_file try: # Copy via shared FS ref = str(os.path.basename(reference)) fname = filename if filename else ref return RelatedFile.objects.create( file=ref, filename=fname, content_type=content_type, creator=creator, store_as_filename=True, ) except TypeError as e: if not required: logger.warning(f'Failed to store file reference: {reference} - {e}') return None else: raise e
def copy_files(self, from_location, to_location, file_pattern): result = messages.message["copy_files_failed"] result, sources, source_names = self.list_files(location=from_location, file_pattern=file_pattern) if sources is None: print("no files found in source >" + from_location + "<.") result = messages.message["ok"] result["reference"] = "No files in source: " + from_location return result for file in sources: # https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-copy?tabs=python src_file = file # os.path.basename(file) tgt_file = to_location + "/" + os.path.basename(src_file) src_blob = BlobClient( self.blob_service_client.url, container_name=self.settings.storage_container, blob_name=src_file, credential=self.sas_token ) lease = BlobLeaseClient(src_blob) lease.acquire() source_props = src_blob.get_blob_properties() print("Lease state for source file %s: %s" %(src_file ,source_props.lease.state)) print(f"Copying %s.%s to %s using url %s" % (self.settings.storage_container, src_file, tgt_file, src_blob.url)) self.tgt = self.blob_service_client.get_blob_client(self.settings.storage_container, tgt_file) # download_file_path = os.path.join(".", str.replace("tryout", '.txt', 'DOWNLOAD.txt')) # print("\nDownloading blob to \n\t" + download_file_path) # with open(download_file_path, "wb") as download_file: # download_file.write(src_blob.download_blob().readall()) try: action = self.tgt.start_copy_from_url(src_blob.url) copy_id = action["copy_id"] status = action["copy_status"] error = action["error_code"] self.wait_condition(condition=self.check_copy_status , timeout=self.max_wait_in_sec , granularity=self.recheck) properties = self.tgt.get_blob_properties() print("Total bytes: " + str(properties.size)) copy_props = properties.copy if copy_props["status"] != "success": self.tgt.abort_copy(copy_id=copy_props["id"]) print(f"Unable to copy blob %s to %s. Status: %s" % (src_file, tgt_file, copy_props.status)) result = messages.message["copy_files_failed"] break # Note: We do not release the lease in case of errors if source_props.lease.state == "leased": # Break the lease on the source blob. lease.break_lease() # Update the source blob's properties to check the lease state. source_props = src_blob.get_blob_properties() print("Lease state: " + source_props.lease.state) result = messages.message["ok"] except exceptions.ResourceNotFoundError as e: print("Azure reported a resource not found error: ", e) result = messages.message["resource_not_found"] result["reference"] = f"source: %s, targte: %s" % (src_file, tgt_file) return result