def __init__(self, azure_root, local_lambda, leave_space=0, storage_credential=None, subpath="."): super(AzureP2P, self).__init__() self.azure_root = azure_root self.subpath = subpath self.local_lambda = local_lambda #When called, returns a unique name (e.g. 172.10.12.3) and root storage (e.g. \\172.10.12.3\scratch). Expect repeated called to return the same two values #!!! document this self.leave_space = leave_space self.storage_credential = storage_credential self.azure_storage = AzureStorage( path_join(self.azure_root, "storage", subpath), local_lambda=self.local_lambda, storage_credential=self.storage_credential) self.dib_directory = AzureStorage( path_join(self.azure_root, "dibs", subpath), local_lambda=self.local_lambda, storage_credential=storage_credential) def storage_lambda(): unique_name, root = self.local_lambda() return unique_name, root + path_join(self.azure_root, "storage", subpath) azure_directory = AzureStorage( path_join(self.azure_root, "directory", subpath), local_lambda=self.local_lambda, storage_credential=self.storage_credential) self.file_share = PeerToPeer(directory=azure_directory, local_lambda=storage_lambda, leave_space=self.leave_space)
def zzztest_big_files_fileshare(self): #!!!needs to be updated logging.info("test_big_files_fileshare") from onemil.AzureP2P import AzureP2P, ip_address_local from onemil.file_cache import AzureStorage, PeerToPeer directory = AzureStorage(folder="testazureshardcontainer/fileshare2", local_lambda=ip_address_local, prefix="AzureDirectory", storage_account=self.storage_account, storage_key=self.storage_key) storage = PeerToPeer(directory=directory, local_lambda=ip_address_local) big_size = int(4e9) update_python_path = "once" double_it = True AzureBatch(task_count=1, pool_id="d14v2_300", update_python_path=update_python_path) while True: for n in [16, 4, 14, 2, 12, 10, 8]: storage.directory.rmtree() runner = AzureBatch(task_count=n * 2 if double_it else n, pool_id="d14v2_300", update_python_path="no") #runner = Local() #runner = LocalMultiThread(n*2 if double_it else n) mbps_list = self._big_files_fileshare_internal( big_size, n, runner, storage, double_it) logging.info("{0}\t{1}".format(n, mbps_list))
def storage_closure(): temp_dir = self._temp_dir() #increments self.count count = self.count def local_lambda(): unique_name, root = "{0}.{1}".format(os.environ['COMPUTERNAME'],count), temp_dir+"/storage" return unique_name, root storage = PeerToPeer(directory=AzureStorage("/flstor/testazurep2p/fileshare/directory",local_lambda=lambda:(None,directory)),local_lambda=local_lambda) return storage
def storage_closure(): temp_dir = self._temp_dir() #increments self.count count = self.count def local_lambda(): unique_name, root = "{0}.{1}".format(os.environ['COMPUTERNAME'],count), temp_dir+"/storage" return unique_name, root storage = PeerToPeer(directory=directory,local_lambda=local_lambda) return storage
class AzureP2P(FileCache): ''' A class that subclasses :class:`FileCache` to provide peer-to-peer file sharing backed up by Azure Storage. an Azure batch account. **Constructor:** :Parameters: * **azure_root** (*string*) -- The path on Azure storage under which data will be stored. The form of the path is: /STORAGEACCOUNT/CONTAINER/morepath. * **local_lambda** (*a zero-augment lambda*) -- When called, tells were to store data locally. See :func:`ip_address_local`. * **leave_space** (*integer*) -- (default 0) Tells the minimum amount of local storage space that should be kept free. * **storage_credential** (*:class:`StorageCredential`*) -- (default '~/azurebatch/cred.txt') Keys and names of Azure Storage and AzureBatch accounts to use. * **subpath** (*string*) -- (default '.') Additional path to append to **azure_root**. **All the methods of FileCache plus these:** ''' def __init__(self, azure_root, local_lambda, leave_space=0, storage_credential=None, subpath="."): super(AzureP2P, self).__init__() self.azure_root = azure_root self.subpath = subpath self.local_lambda = local_lambda #When called, returns a unique name (e.g. 172.10.12.3) and root storage (e.g. \\172.10.12.3\scratch). Expect repeated called to return the same two values #!!! document this self.leave_space = leave_space self.storage_credential = storage_credential self.azure_storage = AzureStorage( path_join(self.azure_root, "storage", subpath), local_lambda=self.local_lambda, storage_credential=self.storage_credential) self.dib_directory = AzureStorage( path_join(self.azure_root, "dibs", subpath), local_lambda=self.local_lambda, storage_credential=storage_credential) def storage_lambda(): unique_name, root = self.local_lambda() return unique_name, root + path_join(self.azure_root, "storage", subpath) azure_directory = AzureStorage( path_join(self.azure_root, "directory", subpath), local_lambda=self.local_lambda, storage_credential=self.storage_credential) self.file_share = PeerToPeer(directory=azure_directory, local_lambda=storage_lambda, leave_space=self.leave_space) def _simple_join(self, path): assert not self.azure_storage.file_exists( path), "Can't treat an existing file as a directory" return AzureP2P(self.azure_root, local_lambda=self.local_lambda, leave_space=self.leave_space, storage_credential=self.storage_credential, subpath=path_join(self.subpath, path)) def __repr__(self): return "{0}('{1}')".format(self.__class__.__name__, self.name) @property def name(self): return self.azure_root def _simple_file_exists(self, simple_file_name): return self.azure_storage._simple_file_exists(simple_file_name) @contextmanager def _simple_open_write(self, simple_file_name, size=0, updater=None): subhandle_as = self.azure_storage.open_write(simple_file_name, size=size, updater=updater) subhandle_as_file_name = subhandle_as.__enter__() if self.file_share._simple_file_exists(simple_file_name): logging.warning( "The AzureStorage doesn't already have the file that is being written, but the PeerToPeer does, so removing it from the PeerToPeer. {0},'{1}'" .format(self.file_share, simple_file_name)) self.file_share._simple_remove(simple_file_name) subhandle_fs = self.file_share.open_write(simple_file_name, size=size, updater=updater) subhandle_fs_file_name = subhandle_fs.__enter__() assert os.path.normpath(subhandle_fs_file_name) == os.path.normpath( subhandle_as_file_name ), "Expect that the two ways of distributing files to agree on the local file name" yield subhandle_fs_file_name subhandle_fs.__exit__(None, None, None) subhandle_as.__exit__( None, None, None) #This one needs to be last because it sets the file date @contextmanager def _simple_open_read(self, simple_file_name, updater=None): # We assume that we could use either sub-storage and that the only problem # that could happen is that the file_share's remote machine containing the "main" # file share copy could have been recycled and thus the file would be missing. is_ok = False try: subhandle1 = self.file_share._simple_open_read( simple_file_name ) #!!!should file share be self-repairing. If the "main" is gone, pick one of the others subhandle1_file_name = subhandle1.__enter__() is_ok = True except Exception as e: logging.info( "AzureP2P - machine-to-machine copy of '{0}' failed, so reading from AzureStorage. Exception='{1}'" .format(simple_file_name, e)) if is_ok: yield subhandle1_file_name subhandle1.__exit__(None, None, None) return #We are now in a situation in which multiple readers have failed to find the file in the PeerToPeer. We would like one of them to #download from AzureStorage, while the others wait. When that one has finished, the others can then get it from PeerToPeer. #Dib the file and see if you are 1st. # If so, double check that PeerToPeer isn't there now ((in case someone fixed everything already) if it is, use it) otherwise download and share. # If not wait until the first dib is gone, then clear your dib and use the file share. unique_name = self.local_lambda()[0] dib_path = self.dib_directory.join(simple_file_name) dir_path = self.file_share.directory.join(simple_file_name) dib_lib = DibLib(unique_name, dib_path, dir_path, "azure_storage_dib") status = dib_lib.wait_for_turn() logging.info("status is '{0}'".format(status)) if status == 'fixed': logging.info( "After waiting for someone else to fix the problem, can now read the file with PeerToPeer" ) read_handle = self.file_share._simple_open_read( simple_file_name ) #!!!should file share be self-repairing. If the "main" is gone, pick one of the others yield read_handle.__enter__() read_handle.__exit__(None, None, None) dib_lib.remove_dibs() return elif status == 'azure': is_ok = False try: logging.info("Before I try azure, let's try file_share again") read_handle = self.file_share._simple_open_read( simple_file_name ) #!!! should file share be self-repairing. If the "main" is gone, pick one of the others file_name2 = read_handle.__enter__() is_ok = True except Exception as e: logging.info( "2nd try of reading from PeerToPeer failed with message '{0}'" .format(e.message)) if is_ok: yield file_name2 read_handle.__exit__(None, None, None) dib_lib.remove_dibs() return logging.info("downloading from Azure") read_handle = self.azure_storage._simple_open_read( simple_file_name) file_name3 = read_handle.__enter__() self._simple_register_with_peer_to_peer(simple_file_name, file_name3) yield file_name3 read_handle.__exit__(None, None, None) dib_lib.remove_dibs() return else: raise Exception("Don't know status '{0}'".format(status)) def _simple_register_with_peer_to_peer(self, simple_file_name, local_file_name): #Removes any current peer-to-peer entry and adds a new one temp_name = local_file_name + "." + format(hash(os.times())) + ".temp" os.rename(local_file_name, temp_name) #Rename so that this remove doesn't remove it if self.file_share._simple_file_exists(simple_file_name): self.file_share._simple_remove(simple_file_name) with self.file_share._simple_open_write( simple_file_name, size=0 ) as file_name: #size 0 because we already have space allocated. os.rename(temp_name, local_file_name) #Put file back assert os.path.normpath(local_file_name) == os.path.normpath( file_name ), "Expect that the two ways of distributing files to agree on the local file name" def _simple_getmtime(self, simple_file_name): return self.azure_storage._simple_getmtime(simple_file_name) def _simple_rmtree(self, log_writer=None): #If they all share a common directory, kill that directory #why this could be a bad idea: What if the azure_shard_container's are different or if there is another directory there of interest #why this could be a great idea: If this is top-level Azure Storage container, deleting will take a second instead of an hour. if isinstance(self.file_share.directory, AzureStorage): def drop_end(folder): return '/'.join(folder.split('/')[:-1]) folder1 = drop_end(self.file_share.directory.folder) folder2 = drop_end(self.dib_directory.folder) folder3 = drop_end(self.azure_storage.folder) if folder1 == folder2 and folder2 == folder3: if log_writer is not None: log_writer("Fast rmtreeing '{0}'".format(folder1)) self.azure_storage.azure_shard_container.rmtree(folder1) return self.file_share._simple_rmtree(log_writer=log_writer) self.azure_storage._simple_rmtree(log_writer=log_writer) self.dib_directory._simple_rmtree(log_writer=log_writer) def _simple_remove(self, simple_file_name, log_writer=None): if self.file_share._simple_file_exists(simple_file_name): self.file_share._simple_remove(simple_file_name) self.azure_storage._simple_remove(simple_file_name, log_writer=log_writer) def _simple_walk(self): return self.azure_storage._simple_walk() def azure_storage_only(self, path=None, log_writer=None): ''' Remove everything except the AzureStorage copy of the file system ''' self.dib_directory.rmtree() #Remove everything from the dib directory self.file_share._remove_local_if_any(path) self.file_share.directory.rmtree(path, log_writer=log_writer) def remove_from_azure_storage(self, path=None, log_writer=None): ''' Remove everything from AzureStorage (local copies will remain, but will be ignored) ''' if path is None: self.rmtree( log_writer=log_writer ) #This will remove azure_storage and directory (and the dibs, again) elif self.azure_storage.file_exists(path): self.azure_storage.remove(path) self.azure_storage_only( path, log_writer=log_writer ) #This will remove any local files (and the dibs, again)