def _bcast_action(q1: Queue, q2: Queue, tk: int, action: Any, args: List[Any], kwargs: Dict[str, Any]) -> Any: """ :param q1: Will put to ``tk`` into this queue first :param q2: Will get an item from this queue after completing action :param tk: Token to identify this task :param action: Callable :param args: Ordered arguments to action :param kwargs: Named arguments to action :returns: result of calling action(*args, **kwargs) """ q1.put(tk) # tell main thread we started try: x = action(*args, **kwargs) finally: # wait for all threads to start # (q2 is expected to be empty until q1 is filled) q2.get() return x
class DistStatusReporter(object): """Report status through the training scheduler. Example: >>> @autogluon_method >>> def train_func(config, reporter): >>> reporter(accuracy=0.1) """ def __init__(self): self._queue = Queue() self._continue_semaphore = DistSemaphore(0) self._last_report_time = time.time() def __call__(self, **kwargs): """Report updated training status. Pass in `done=True` when the training job is completed. Args: kwargs: Latest training result status. Example: >>> reporter(accuracy=1, training_iters=4) """ report_time = time.time() if 'time_this_iter' not in kwargs: kwargs['time_this_iter'] = report_time - self._last_report_time self._last_report_time = report_time #print('Reporting {}'.format(json.dumps(kwargs))) try: self._queue.put(kwargs.copy()) except RuntimeError: return self._continue_semaphore.acquire() def fetch(self, block=True): try: kwargs = self._queue.get() except CommClosedError: return {} return kwargs def move_on(self): self._continue_semaphore.release() def _start(self): """Adjust the real starting time """ self._last_report_time = time.time() def save_dict(self, **state_dict): raise NotImplemented def get_dict(self): raise NotImplemented def __repr__(self): reprstr = self.__class__.__name__ return reprstr
def __init__(self, dask=None, client=None, logger=None): if dask is None and client is None: dask = "tcp://localhost:8786" if client is None: client = Client(address=dask) if logger is None: self.logger = logging.getLogger("praetor") else: self.logger = logger self.queue = Queue("praetor", client=client)
class DaskPraetorClient(PraetorClient): def __init__(self, address=None, client=None): if (client or address) is None: raise ValueError("address or client must be provided") client = Client(address) if address is not None else client self.queue = Queue("praetor", client=client) def send_message(self, message): logger = logging.getLogger(__name__) logger.debug(f"Sending message about {message.get('cls')}") self.queue.put(message)
class DaskPraetorCollector(PraetorCollector): def __init__(self, address): client = Client(address) self.queue = Queue("praetor", client=client) super().__init__() def receive_messages(self): return self.queue.get(timeout=1, batch=True)
class DistSemaphore(object): def __init__(self, value): self._queue = Queue() for i in range(value): self._queue.put(1) def acquire(self): try: _ = self._queue.get() except distributed.comm.core.CommClosedError: pass def release(self): self._queue.put(1) def __repr__(self): reprstr = self.__class__.__name__ return reprstr
def get_client(self): """Initialize a Client by pointing it to the address of a dask-scheduler. also, will init the worker count `self.n_workers` and two queue : `self.process_queue` and `self.result_queue` to save running process and results respectively. :return: return new client that is the primary entry point for users of dask.distributed. :rtype: distributed.Cient """ from dask.distributed import Client from dask.distributed import Queue client = Client(address=self.address) self.n_workers = len(client.scheduler_info()["workers"]) self.process_queue = Queue(client=client, maxsize=self.n_workers) self.result_queue = Queue(client=client) return client
def pool_broadcast(client: Client, action: Any, *args: List[Any], **kwargs: Dict[str, Any]): """Call ``action(*args, **kwargs)`` on every worker thread. This function block until all tasks are complete, expectation is that this is called at the very beginning on an empty pool, if called on a busy pool this will block until all active tasks are complete. Broadcast is achieved by blocking every task until all tasks have started, every worker does the following: 1. Let the primary task know this task has started 2. Perform action 3. Wait for all other tasks to start 4. Finish Steps (1) and (3) are achieved using distributed Queues, step (1) is a non-blocking ``put`` and step (3) is a blocking ``get``. :param client: Dask client object :param action: Callable `action(*args, **kwargs)` :param args: Ordered arguments to action :param kwargs: Named arguments to action """ postfix = "-{:02x}".format(randint(0, 1 << 64)) total_worker_threads = sum(client.ncores().values()) q1 = Queue("q1" + postfix, client=client, maxsize=total_worker_threads) q2 = Queue("q2" + postfix, client=client, maxsize=total_worker_threads) ff = [ client.submit( _bcast_action, q1, q2, i, action, args, kwargs, key="broadcast_action_{:04d}{}".format(i, postfix), ) for i in range(total_worker_threads) ] tks = set() for _ in range(total_worker_threads): tks.add(q1.get()) # blocking assert len(tks) == total_worker_threads # at this point all workers have launched # allow them to continue for i in range(total_worker_threads): q2.put(i) # should not block # block until all done and return result return [f.result() for f in ff]
class Praetor: def __init__(self, dask=None, client=None, logger=None): if dask is None and client is None: dask = "tcp://localhost:8786" if client is None: client = Client(address=dask) if logger is None: self.logger = logging.getLogger("praetor") else: self.logger = logger self.queue = Queue("praetor", client=client) def obj_to_message(self, obj): mapping = dict(NaiveFlow="flows/", NaiveFlowRun="flow_runs/", NaiveTaskRun="task_runs/") return dict( cls=obj.__class__.__name__, endpoint=mapping[obj.__class__.__name__], obj=obj.dict(), ) def post_flow(self, flow: NaiveFlow): self.queue.put(self.obj_to_message(flow), timeout=3) def shutdown_flow(self, flow: NaiveFlow): flow.is_online = False self.queue.put(self.obj_to_message(flow), timeout=3) def post_flow_run(self, flow_run: NaiveFlowRun): self.queue.put(self.obj_to_message(flow_run), timeout=3) def post_task_run(self, task_run: NaiveTaskRun): self.queue.put(self.obj_to_message(task_run), timeout=3) def close(self): self.queue.close() self.queue.client.close()
def get_data(self, as_list=False): arrays = dict() self.arrays_desc = Queue("Arrays").get() for name in self.arrays_desc: if not as_list: arrays[name] = self.create_array( name, self.arrays_desc[name]["sizes"], self.arrays_desc[name]["subsizes"], self.arrays_desc[name]["dtype"], self.arrays_desc[name]["timedim"]) else: #TODO test this arrays[name] = self.create_array_list( name, self.arrays_desc[name]["sizes"], self.arrays_desc[name]["subsizes"], self.arrays_desc[name]["dtype"], self.arrays_desc[name]["timedim"]) #Barrier after the creation of all the dask arrays e = Event("Done") e.set() return arrays
def __init__(self, Client, Ssize, rank, arrays, deisa_arrays_dtype): self.client = Client self.rank = rank listw = Variable("workers").get() if Ssize > len(listw): # more processes than workers self.workers = [listw[rank % len(listw)]] else: k = len(listw) // Ssize # more workers than processes self.workers = listw[rank * k:rank * k + k] self.arrays = arrays for ele in self.arrays: self.arrays[ele]["dtype"] = str(deisa_arrays_dtype[ele]) self.arrays[ele]["timedim"] = self.arrays[ele]["timedim"][0] self.position = [ self.arrays[ele]["starts"][i] // self.arrays[ele]["subsizes"][i] for i in range(len(np.array(self.arrays[ele]["sizes"]))) ] if rank == 0: Queue("Arrays").put( self.arrays ) # If and only if I have a perfect domain decomposition
def calculate_small_parsimony(inq, outq, stopiter, treefile, matfile,bootstrap_replicates, row_index, iolock , verbose = False ): inq = Queue('inq') outq = Queue('outq') #setup the tree and matrix for each worker with h5py.File(matfile) as hf: align_array = hf['MSA2array'] missing = 0 sys.setrecursionlimit( 10 **8 ) t = dendropy.Tree.get( path=treefile, schema='newick') #init the blank tree for i,n in enumerate(t.nodes()): n.matrow = i n.symbols = None n.scores = None n.event = None n.char = None n.eventype = None n.AAevent = 0 for i,l in enumerate(t.leaf_nodes()): l.event = {} l.scores = {} l.symbols = {} l.char= {} l.calc = {} #work on a fresh tree each time while stopiter == False or inq.qsize()>0: codon ,pos = inq.get() #assign leaf values #repeat here for bootstrap for i in range(bootsrap_replicates): #select portion of random genomes to take out if bootstrap_replicates >1: del_genomes = set(np.random.randint( align_array.shape[0], size= int(align_array.shape[0]*bootstrap) ) ) else: del_genomes = set([]) #change a subset of leaves to ambiguous characters for pos,col in enumerate(pos): for l in t.leaf_nodes(): if type(col[1]) is not None: #column has no events l.calc[pos] = False char = col[1] l.event[pos] = 0 l.scores[pos] = { c:10**10 for c in allowed_symbols } if char.upper() in allowed_symbols: l.symbols[pos] = { char } l.scores[pos][char] = 0 else: #ambiguous leaf l.symbols[pos] = allowed_symbols else: #setup for small_pars1 l.calc[pos] = True l.event[pos] = 0 l.scores[pos] = { c:10**10 for c in allowed_symbols } if str(l.taxon).replace("'", '') in row_index: char = align_array[ row_index[str(l.taxon).replace("'", '')] , col[0] ] if char.upper() in allowed_symbols: l.symbols[pos] = { char } l.scores[pos][char] = 0 elif col[0] in del_genomes: l.symbols[pos] = allowed_symbols else: #ambiguous leaf l.symbols[pos] = allowed_symbols else: missing += 1 char = None l.symbols[pos] = allowed_symbols if verbose == True: iolock.acquire() print( 'err ! alncol: ', l.taxon , aln_column ) iolock.release() l.char[pos] = min(l.scores[pos], key=l.scores[pos].get) #done tree init #up process_node_smallpars_1(t.seed_node) #down process_node_smallpars_2(t.seed_node) #collect events eventdict = {} for pos in [0,1,2]: eventindex = [ n.matrow for n in t.nodes() if n.event[pos] > 0 ] eventtypes = [ n.eventype[pos] for n in t.nodes() if n.event[pos] > 0 ] eventdict[pos] = { 'type': eventtypes , 'index' : eventindex } AAeventindex = [ n.matrow for n in t.nodes() if n.AAevent ] AAeventypes = [ n.AAevent for n in t.nodes() if n.AAevent ] outq.put(col, eventdict , AAeventindex , AAeventypes)
count +=1 print('FINAL SAVE !') save_mats(count, runName, AA_mutation,nucleotide_mutation) print('DONE ! ') brake.set(False) return None #######start the sankof algo here ####################### print('starting sankof') #scale cluster #scatter the blank tree and row index for each process #remote_tree = client.scatter(tree) remote_index = client.scatter(IDindex) inq = Queue('inq') outq = Queue('outq') lock = Lock('x') stopiter = Variable(False) brake = Variable(True) saver_started = False workers_started = False #start workers for workers in range(NCORE*ncpu ): w = client.submit( calculate_small_parsimony , inq= None ,outq = None ,stopiter= stopiter , treefile=treefile , bootstrap_replicates = bootstrap_replicates, matfile= alnfile+'.h5' , row_index= remote_index , iolock = lock, verbose = False ) fire_and_forget(w)
AA_mutation = sparseND.COO( coords = (AAeventindex , np.ones(len(AAeventindex)) * column , AAeventypes ) , data = np.ones(len(AAeventindex) , ) , shape = (matsize[0] , matsize[1] ,len(transitiondict_AA ) ) , dtype = np.int32 ) count +=1 print('FINAL SAVE !') save_mats(count, runName, AA_mutation,nucleotide_mutation) print('DONE ! ') return None #######start the sankof algo here ####################### print('starting sankof') #scale cluster #scatter the blank tree and row index for each process #remote_tree = client.scatter(tree) remote_index = client.scatter(IDindex) inq = Queue() outq = Queue() lock = Lock('x') saver_started = False workers_started = False stopiter = Variable(False) #big for loop here generating the mats with futures for n in range(bootstrap_replicates): #select portion of random genomes to take out if bootstrap_replicates >1: del_genomes = np.random.randint( align_array.shape[0], size= int(align_array.shape[0]*bootstrap) ) for annot_index,annot_row in annotation.iterrows(): #indexing starts at 1 for blast #####switch to sending the coordinates and masking for the matrix
sys.path.append('..') import serialio import os import argparse import numpy as np import pickle from dask.distributed import Client, Queue import tensorflow as tf import tensorflow.keras as keras from tensorflow.keras.callbacks import ModelCheckpoint dask_client = Client(processes=False) # use threads dask_queue = Queue() # ensure that training is executed sequentially # Global model checkpoint accessed from the queue # A workaround for a pickling issue in TF 2.0 # https://github.com/tensorflow/tensorflow/issues/33283 checkpoint = None save_model = None def load_model(): """manually creates a model and loads its weights, instead of using tensorflow.keras.load_model() A workaround for a pickling issue in TF 2.0 https://github.com/tensorflow/tensorflow/issues/33283 """
def __init__(self, value): self._queue = Queue() for i in range(value): self._queue.put(1)
def __init__(self, address=None, client=None): if (client or address) is None: raise ValueError("address or client must be provided") client = Client(address) if address is not None else client self.queue = Queue("praetor", client=client)
class ClusterDaskDistributor(DistributorBaseClass): """Distributor using a dask cluster. meaning that the calculation is spread over a cluster. :param str address: The `address` of dask-scheduler. eg. `tcp://127.0.0.1:8786`. """ def __init__(self, address): """Set up a distributor that connects to a dask-scheduler to distribute the calculaton. :param address: the ip address and port number of the dask-scheduler. :type address: str """ self.address = address self.future_set = set() self._queue_lock = Lock() def get_client(self): """Initialize a Client by pointing it to the address of a dask-scheduler. also, will init the worker count `self.n_workers` and two queue : `self.process_queue` and `self.result_queue` to save running process and results respectively. :return: return new client that is the primary entry point for users of dask.distributed. :rtype: distributed.Cient """ from dask.distributed import Client from dask.distributed import Queue client = Client(address=self.address) self.n_workers = len(client.scheduler_info()["workers"]) self.process_queue = Queue(client=client, maxsize=self.n_workers) self.result_queue = Queue(client=client) return client def get_worker_count(self): """Get the worker count of current Client in dask-scheduler. :return: the worker count of current Client in dask-scheduler. :rtype: int """ return self.n_workers def update_queues(self): """Update current client status, include all queue and set.""" with self._queue_lock: finished_set = set() for f in self.future_set: pid = f[0] future = f[1] if future.done(): self.result_queue.put((pid, future.result())) self.process_queue.get() finished_set.add(f) for f in finished_set: self.future_set.remove(f) def result_queue_empty(self): """Update current client status, and return if the result queue is empty. :return: if the result queue is empty. :rtype: bool """ self.update_queues() return self.result_queue.qsize() == 0 def result_queue_get(self): """Get a (pid, reslut) pair from result queue if it is not empty. :return: first (pid, result) pair in result queue. :rtype: (str or int or None, a user-defined result or None) """ self.update_queues() if self.result_queue.qsize() != 0: pid, result = self.result_queue.get() return pid, result else: return None, None def process_queue_full(self): """Check if current process queue is full. :return: if current process queue is full return True, otherwise False. :rtype: bool """ self.update_queues() return self.process_queue.qsize() == self.n_workers def process_queue_empty(self): """Check if current process queue is empty. :return: if current process queue is empty return True, otherwise False. :rtype: bool """ self.update_queues() return self.process_queue.qsize() == 0 def distribute(self, client, pid, func, kwargs): """Submit a calculation task to cluster. the calculation task will be executed asynchronously on one worker of the cluster. the `client` is the cluster entry point, `pid` is a user-defined unique id for this task, `func` is the function or object that do the calculation, `kwargs` is the parameters for `func`. :param distributed.Client client: the target `client` to run this task. :param pid: unique `pid` to descript this task. :type pid: str or int(defined by user). :param func: A serializable function or object(callable and has `__call__` function) which need to be distributed calculaton. :type func: function or object. :param dict kwargs: Parameter of `func`. """ future = client.submit(func, **kwargs) f = (pid, future) self.future_set.add(f) self.process_queue.put(pid) def close(self, client): """Close the connection to the local Dask Scheduler. :param distributed.Client client: the target `client` to close. """ client.close() def join(self): """Wait all process in process_queue to finish.""" while not self.process_queue_empty(): time.sleep(0.1) return
def __init__(self, value, remote=None): self._queue = Queue(client=remote) for i in range(value): self._queue.put(1)
def __init__(self, address): client = Client(address) self.queue = Queue("praetor", client=client) super().__init__()
def __init__(self): self._queue = Queue() self._continue_semaphore = DistSemaphore(0) self._last_report_time = time.time()
def __init__(self, remote=None): self._queue = Queue(client=remote) self._continue_semaphore = DistSemaphore(0) self._last_report_time = time.time()