def get_current_blocks(self, test_container_name, test_filename): blocks = [] if self.block_blob_service.exists(test_container_name, test_filename): blockslist = self.block_blob_service.get_block_list( test_container_name, test_filename, None, 'all') for block in blockslist.committed_blocks: blocks.append(BlobBlock(id=block.id)) return blocks
def create_or_update_blockblob(self, container_name, file_name, datalist, blocks): block_id = self.get_random_name() file_bytes = ''.join(datalist).encode() self.block_blob_service.put_block(container_name, file_name, file_bytes, block_id) blocks.append(BlobBlock(id=block_id)) self.block_blob_service.put_block_list(container_name, file_name, blocks) return blocks
def insert_empty_json(self, container_name, file_name): json_data = ['{"records":[', ']}'] blocks = [] for file_bytes in json_data: file_bytes = file_bytes.encode() block_id = self.get_random_name() self.block_blob_service.put_block(container_name, file_name, file_bytes, block_id) blocks.append(BlobBlock(id=block_id)) self.block_blob_service.put_block_list(container_name, file_name, blocks) return blocks
def readFrom(self, readable): blocks = [] try: while True: buf = readable.read(maxBlockSize) if len(buf) == 0: # We're safe to break here even if we never read anything, since # putting an empty block list creates an empty blob. break if encrypted: buf = encryption.encrypt(buf, store.keyPath) blockID = store._newFileID() container.put_block(blob_name=bytes(jobStoreFileID), block=buf, block_id=blockID) blocks.append(BlobBlock(blockID)) except: with panic(log=logger): # This is guaranteed to delete any uncommitted blocks. container.delete_blob(blob_name=bytes(jobStoreFileID)) if checkForModification and expectedVersion is not None: # Acquire a (60-second) write lock, leaseID = container.acquire_blob_lease( blob_name=bytes(jobStoreFileID), lease_duration=60) # check for modification, blob = container.get_blob_properties( blob_name=bytes(jobStoreFileID)) if blob.properties.etag != expectedVersion: container.release_blob_lease( blob_name=bytes(jobStoreFileID), lease_id=leaseID) raise ConcurrentFileModificationException( jobStoreFileID) # commit the file, container.put_block_list( blob_name=bytes(jobStoreFileID), block_list=blocks, lease_id=leaseID, metadata=dict(encrypted=str(encrypted))) # then release the lock. container.release_blob_lease( blob_name=bytes(jobStoreFileID), lease_id=leaseID) else: # No need to check for modification, just blindly write over whatever # was there. container.put_block_list( blob_name=bytes(jobStoreFileID), block_list=blocks, metadata=dict(encrypted=str(encrypted)))
def insert_mock_json_in_BlobStorage(self): test_filename = self.test_filename + ".json" # Todo refactor this to get current blocks blocks = self.insert_empty_json(self.test_container_name, test_filename) for i, data_block in enumerate(self.get_json_data()): block_id = self.get_random_name() file_bytes = json.dumps(data_block) file_bytes = (file_bytes[1:-1] if i == 0 else "," + file_bytes[1:-1]).encode() self.block_blob_service.put_block(self.test_container_name, test_filename, file_bytes, block_id) blocks.insert(len(blocks) - 1, BlobBlock(id=block_id)) self.block_blob_service.put_block_list(self.test_container_name, test_filename, blocks) print("inserted %s" % (blocks))
def block_blob_operations(self, account): file_to_upload = "HelloWorld.png" block_size = 1024 # Create an page blob service object blockblob_service = account.create_block_blob_service() container_name = 'blockblobcontainer' + self.random_data.get_random_name( 6) try: # Create a new container print('1. Create a container with name - ' + container_name) blockblob_service.create_container(container_name) blocks = [] # Read the file print('2. Upload file to block blob') with open(file_to_upload, "rb") as file: file_bytes = file.read(block_size) while len(file_bytes) > 0: block_id = self.random_data.get_random_name(32) blockblob_service.put_block(container_name, file_to_upload, file_bytes, block_id) blocks.append(BlobBlock(id=block_id)) file_bytes = file.read(block_size) blockblob_service.put_block_list(container_name, file_to_upload, blocks) print('3. Get the block list') blockslist = blockblob_service.get_block_list( container_name, file_to_upload, None, 'all') blocks = blockslist.committed_blocks print('4. Enumerate blocks in block blob') for block in blocks: print('Block ' + block.id) finally: print('5. Delete container') if blockblob_service.exists(container_name): blockblob_service.delete_container(container_name)
def block_blob_operations(): file_to_upload = "10MB.zip" block_size = 2097152 # Create an page blob service object #blockblob_service = account.create_block_blob_service() #container_name = 'python-course' try: # Create a new container #print('1. Create a container with name - ' + container_name) #blockblob_service.create_container(container_name) blocks = [] # Read the file print('2. Upload file to block blob') with open(file_to_upload, "rb") as file: file_bytes = file.read(block_size) while len(file_bytes) > 0: block_id = randomString(10) blockblob_service.put_block(container_name, file_to_upload, file_bytes, block_id) blocks.append(BlobBlock(id=block_id)) file_bytes = file.read(block_size) blockblob_service.put_block_list(container_name, file_to_upload, blocks) print('3. Get the block list') blockslist = blockblob_service.get_block_list(container_name, file_to_upload, None, 'all') blocks = blockslist.committed_blocks print('4. Enumerate blocks in block blob') for block in blocks: print('Block ' + block.id) finally: print("EXECUTED") print(blocks)
def main(queuemsg: func.QueueMessage, msg: func.Out[func.QueueMessage]): # Extract message' msg1 = extract_message(queuemsg) count, batchsize = extract_parameters(msg1)[7:] print('Process gradient ', count, ' of ', batchsize) if count < batchsize: try: # Get second message msg2_b64 = queue_service.get_messages('gradientqueue', visibility_timeout=10, num_messages=1) print('Found ', len(msg2_b64), ' extra message(s).') msg2 = base64.b64decode(msg2_b64[0].content).decode() queue_service.delete_message('gradientqueue', msg2_b64[0].id, msg2_b64[0].pop_receipt) # Get gradient parameters container1, partial_path1, full_path1, grad_name1, idx1, iter1, maxiter1, count1, batchsize1 = extract_parameters(msg1) container2, partial_path2, full_path2, grad_name2, idx2, iter2, maxiter2, count2, batchsize2 = extract_parameters(msg2) # New block blob gradient num_parts, desired_part_size, residual_bytes, file_size = get_multipart_file_params(container1, partial_path1 + grad_name1 + idx1) idx3 = get_random_name(16) blob_name = partial_path2 + grad_name2 + idx3 # Loop over blocks byte_count = 0 blocks = [] count = 1 for part in range(num_parts): print('Process ', count, ' of ', num_parts, ' blocks.') count += 1 # Get byte range byte_start = byte_count # byte start if residual_bytes is not None and part == (num_parts-1): byte_end = byte_count + residual_bytes - 1 else: byte_end = byte_count + desired_part_size - 1 # read until end of blob # Get current gradients and sum g = array_get(container1, partial_path1 + grad_name1 + idx1, start_range=byte_start, end_range=byte_end) g += array_get(container2, partial_path2 + grad_name2 + idx2, start_range=byte_start, end_range=byte_end) # Write back to blob storage block_id = get_random_name(32) blob_service.put_block(container1, blob_name, g.tostring(), block_id) blocks.append(BlobBlock(id=block_id)) # Finalize block blob and send message to queue print('Finalize block blob') blob_service.put_block_list(container1, blob_name, blocks) # Delete previous blobs blob_service.delete_blob(container1, partial_path1 + grad_name1 + idx1) blob_service.delete_blob(container2, partial_path2 + grad_name2 + idx2) # Out message msg_out = container1 + '&' + partial_path1 + '&' + full_path1 + '&' + grad_name1 + '&' + idx3 + '&' + str(iter1) + '&' + str(maxiter1) + '&' + str(count1 + count2) + '&' + str(batchsize1) print('Out message: ', msg_out, '\n') msg.set(msg_out) except: print('No other messages found. Return message to queue.') #time.sleep(2) msg.set(msg1) else: print("Gradient reduction terminated.\n") #try: # Move final gradient to other directory container1, partial_path1, full_path1, grad_name1, idx1, iter1, maxiter1, count1, batchsize1 = extract_parameters(msg1) # New block blob gradient num_parts, desired_part_size, residual_bytes, file_size = get_multipart_file_params(container1, partial_path1 + grad_name1 + idx1) idx_full = 'full_iteration_' + str(iter1) blob_name = full_path1 + grad_name1 + idx_full # Loop over blocks byte_count = 0 blocks = [] count = 1 for part in range(num_parts): print('Process ', count, ' of ', num_parts, ' blocks.') count += 1 # Get byte range byte_start = byte_count # byte start if residual_bytes is not None and part == (num_parts-1): byte_end = byte_count + residual_bytes - 1 else: byte_end = byte_count + desired_part_size - 1 # read until end of blob # Get current gradients and sum g = array_get(container1, partial_path1 + grad_name1 + idx1, start_range=byte_start, end_range=byte_end) # Write back to blob storage block_id = get_random_name(32) blob_service.put_block(container1, blob_name, g.tostring(), block_id) blocks.append(BlobBlock(id=block_id)) # Finalize block blob and send message to iteration queue print('Finalize block blob') blob_service.put_block_list(container1, blob_name, blocks) blob_service.delete_blob(container1, partial_path1 + grad_name1 + idx1) # Out message to iteration queue msg_out = container1 + '&' + partial_path1 + '&' + full_path1 + '&' + grad_name1 + '&' + str(iter1+1) + '&' + str(maxiter1) + '&' + str(batchsize1) print('Out message: ', msg_out, '\n') queue_service.put_message('iterationqueue', msg_out)