Ejemplo n.º 1
0
def _bcast_action(q1: Queue,
                  q2: Queue,
                  tk: int,
                  action: Any,
                  args: List[Any],
                  kwargs: Dict[str, Any]) -> Any:
    """

    :param q1: Will put to ``tk`` into this queue first
    :param q2: Will get an item from this queue after completing action
    :param tk: Token to identify this task
    :param action: Callable
    :param args: Ordered arguments to action
    :param kwargs: Named arguments to action

    :returns: result of calling action(*args, **kwargs)
    """
    q1.put(tk)  # tell main thread we started
    try:
        x = action(*args, **kwargs)
    finally:
        # wait for all threads to start
        # (q2 is expected to be empty until q1 is filled)
        q2.get()
    return x
Ejemplo n.º 2
0
class DistStatusReporter(object):
    """Report status through the training scheduler.

    Example:
        >>> @autogluon_method
        >>> def train_func(config, reporter):
        >>>     reporter(accuracy=0.1)
    """
    def __init__(self):
        self._queue = Queue()
        self._continue_semaphore = DistSemaphore(0)
        self._last_report_time = time.time()

    def __call__(self, **kwargs):
        """Report updated training status.
        Pass in `done=True` when the training job is completed.

        Args:
            kwargs: Latest training result status.

        Example:
            >>> reporter(accuracy=1, training_iters=4)
        """
        report_time = time.time()
        if 'time_this_iter' not in kwargs:
            kwargs['time_this_iter'] = report_time - self._last_report_time
        self._last_report_time = report_time

        #print('Reporting {}'.format(json.dumps(kwargs)))
        try:
            self._queue.put(kwargs.copy())
        except RuntimeError:
            return
        self._continue_semaphore.acquire()

    def fetch(self, block=True):
        try:
            kwargs = self._queue.get()
        except CommClosedError:
            return {}
        return kwargs

    def move_on(self):
        self._continue_semaphore.release()

    def _start(self):
        """Adjust the real starting time
        """
        self._last_report_time = time.time()

    def save_dict(self, **state_dict):
        raise NotImplemented

    def get_dict(self):
        raise NotImplemented

    def __repr__(self):
        reprstr = self.__class__.__name__
        return reprstr
Ejemplo n.º 3
0
 def __init__(self, dask=None, client=None, logger=None):
     if dask is None and client is None:
         dask = "tcp://localhost:8786"
     if client is None:
         client = Client(address=dask)
     if logger is None:
         self.logger = logging.getLogger("praetor")
     else:
         self.logger = logger
     self.queue = Queue("praetor", client=client)
Ejemplo n.º 4
0
class DaskPraetorClient(PraetorClient):
    def __init__(self, address=None, client=None):
        if (client or address) is None:
            raise ValueError("address or client must be provided")
        client = Client(address) if address is not None else client
        self.queue = Queue("praetor", client=client)

    def send_message(self, message):
        logger = logging.getLogger(__name__)
        logger.debug(f"Sending message about {message.get('cls')}")
        self.queue.put(message)
Ejemplo n.º 5
0
class DaskPraetorCollector(PraetorCollector):
    def __init__(self, address):
        client = Client(address)
        self.queue = Queue("praetor", client=client)
        super().__init__()

    def receive_messages(self):
        return self.queue.get(timeout=1, batch=True)
Ejemplo n.º 6
0
class DistSemaphore(object):
    def __init__(self, value):
        self._queue = Queue()
        for i in range(value):
            self._queue.put(1)

    def acquire(self):
        try:
            _ = self._queue.get()
        except distributed.comm.core.CommClosedError:
            pass

    def release(self):
        self._queue.put(1)

    def __repr__(self):
        reprstr = self.__class__.__name__
        return reprstr
Ejemplo n.º 7
0
    def get_client(self):
        """Initialize a Client by pointing it to the address of a dask-scheduler.

        also, will init the worker count `self.n_workers` and two queue :
        `self.process_queue` and `self.result_queue` to save running process
        and results respectively.

        :return: return new client that is the primary entry point for users of
             dask.distributed.
        :rtype: distributed.Cient

        """
        from dask.distributed import Client
        from dask.distributed import Queue
        client = Client(address=self.address)
        self.n_workers = len(client.scheduler_info()["workers"])
        self.process_queue = Queue(client=client, maxsize=self.n_workers)
        self.result_queue = Queue(client=client)
        return client
Ejemplo n.º 8
0
def pool_broadcast(client: Client, action: Any, *args: List[Any],
                   **kwargs: Dict[str, Any]):
    """Call ``action(*args, **kwargs)`` on every worker thread.

    This function block until all tasks are complete, expectation is
    that this is called at the very beginning on an empty pool, if called
    on a busy pool this will block until all active tasks are complete.

    Broadcast is achieved by blocking every task until all tasks have started,
    every worker does the following:

    1. Let the primary task know this task has started
    2. Perform action
    3. Wait for all other tasks to start
    4. Finish

    Steps (1) and (3) are achieved using distributed Queues, step (1) is a
    non-blocking ``put`` and step (3) is a blocking ``get``.

    :param client: Dask client object
    :param action: Callable `action(*args, **kwargs)`
    :param args: Ordered arguments to action
    :param kwargs: Named arguments to action

    """
    postfix = "-{:02x}".format(randint(0, 1 << 64))
    total_worker_threads = sum(client.ncores().values())
    q1 = Queue("q1" + postfix, client=client, maxsize=total_worker_threads)
    q2 = Queue("q2" + postfix, client=client, maxsize=total_worker_threads)

    ff = [
        client.submit(
            _bcast_action,
            q1,
            q2,
            i,
            action,
            args,
            kwargs,
            key="broadcast_action_{:04d}{}".format(i, postfix),
        ) for i in range(total_worker_threads)
    ]

    tks = set()
    for _ in range(total_worker_threads):
        tks.add(q1.get())  # blocking

    assert len(tks) == total_worker_threads

    # at this point all workers have launched
    # allow them to continue
    for i in range(total_worker_threads):
        q2.put(i)  # should not block

    # block until all done and return result
    return [f.result() for f in ff]
Ejemplo n.º 9
0
class Praetor:
    def __init__(self, dask=None, client=None, logger=None):
        if dask is None and client is None:
            dask = "tcp://localhost:8786"
        if client is None:
            client = Client(address=dask)
        if logger is None:
            self.logger = logging.getLogger("praetor")
        else:
            self.logger = logger
        self.queue = Queue("praetor", client=client)

    def obj_to_message(self, obj):
        mapping = dict(NaiveFlow="flows/",
                       NaiveFlowRun="flow_runs/",
                       NaiveTaskRun="task_runs/")
        return dict(
            cls=obj.__class__.__name__,
            endpoint=mapping[obj.__class__.__name__],
            obj=obj.dict(),
        )

    def post_flow(self, flow: NaiveFlow):
        self.queue.put(self.obj_to_message(flow), timeout=3)

    def shutdown_flow(self, flow: NaiveFlow):
        flow.is_online = False
        self.queue.put(self.obj_to_message(flow), timeout=3)

    def post_flow_run(self, flow_run: NaiveFlowRun):
        self.queue.put(self.obj_to_message(flow_run), timeout=3)

    def post_task_run(self, task_run: NaiveTaskRun):
        self.queue.put(self.obj_to_message(task_run), timeout=3)

    def close(self):
        self.queue.close()
        self.queue.client.close()
Ejemplo n.º 10
0
 def get_data(self, as_list=False):
     arrays = dict()
     self.arrays_desc = Queue("Arrays").get()
     for name in self.arrays_desc:
         if not as_list:
             arrays[name] = self.create_array(
                 name, self.arrays_desc[name]["sizes"],
                 self.arrays_desc[name]["subsizes"],
                 self.arrays_desc[name]["dtype"],
                 self.arrays_desc[name]["timedim"])
         else:  #TODO test this
             arrays[name] = self.create_array_list(
                 name, self.arrays_desc[name]["sizes"],
                 self.arrays_desc[name]["subsizes"],
                 self.arrays_desc[name]["dtype"],
                 self.arrays_desc[name]["timedim"])
     #Barrier after the creation of all the dask arrays
     e = Event("Done")
     e.set()
     return arrays
Ejemplo n.º 11
0
 def __init__(self, Client, Ssize, rank, arrays, deisa_arrays_dtype):
     self.client = Client
     self.rank = rank
     listw = Variable("workers").get()
     if Ssize > len(listw):  # more processes than workers
         self.workers = [listw[rank % len(listw)]]
     else:
         k = len(listw) // Ssize  # more workers than processes
         self.workers = listw[rank * k:rank * k + k]
     self.arrays = arrays
     for ele in self.arrays:
         self.arrays[ele]["dtype"] = str(deisa_arrays_dtype[ele])
         self.arrays[ele]["timedim"] = self.arrays[ele]["timedim"][0]
         self.position = [
             self.arrays[ele]["starts"][i] //
             self.arrays[ele]["subsizes"][i]
             for i in range(len(np.array(self.arrays[ele]["sizes"])))
         ]
     if rank == 0:
         Queue("Arrays").put(
             self.arrays
         )  # If and only if I have a perfect domain decomposition
    def calculate_small_parsimony(inq, outq, stopiter, treefile, matfile,bootstrap_replicates, row_index, iolock , verbose  = False ):
        inq = Queue('inq')
        outq = Queue('outq')
        #setup the tree and matrix for each worker
        with h5py.File(matfile) as hf:
            align_array = hf['MSA2array']
            missing = 0
            sys.setrecursionlimit( 10 **8 )
            t = dendropy.Tree.get(
                    path=treefile,
                    schema='newick')
            #init the blank tree
            for i,n in enumerate(t.nodes()):
                n.matrow = i
                n.symbols = None
                n.scores = None
                n.event = None
                n.char = None
                n.eventype = None
                n.AAevent = 0

            for i,l in enumerate(t.leaf_nodes()):
                l.event = {}
                l.scores = {}
                l.symbols = {}
                l.char= {}
                l.calc = {}



            #work on a fresh tree each time
            while stopiter == False or inq.qsize()>0:
                codon ,pos = inq.get()
                #assign leaf values


                #repeat here for bootstrap

                for i in range(bootsrap_replicates):
                    #select portion of random genomes to take out
                    if bootstrap_replicates >1:
                        del_genomes = set(np.random.randint( align_array.shape[0], size= int(align_array.shape[0]*bootstrap) ) )
                    else:
                        del_genomes = set([])

                    #change a subset of leaves to ambiguous characters
                    for pos,col in enumerate(pos):
                        for l in t.leaf_nodes():
                            if type(col[1]) is not None:
                                #column has no events
                                l.calc[pos] = False
                                char = col[1]
                                l.event[pos] = 0
                                l.scores[pos] = { c:10**10 for c in allowed_symbols }
                                if char.upper() in allowed_symbols:
                                    l.symbols[pos] = { char }

                                    l.scores[pos][char] = 0
                                else:
                                    #ambiguous leaf
                                    l.symbols[pos] = allowed_symbols
                            else:
                                #setup for small_pars1
                                l.calc[pos] = True
                                l.event[pos] = 0
                                l.scores[pos] = { c:10**10 for c in allowed_symbols }
                                if str(l.taxon).replace("'", '') in row_index:

                                    char = align_array[ row_index[str(l.taxon).replace("'", '')] , col[0] ]
                                    if char.upper() in allowed_symbols:
                                        l.symbols[pos] = { char }
                                        l.scores[pos][char] = 0
                                    elif col[0] in del_genomes:
                                        l.symbols[pos] =  allowed_symbols
                                    else:
                                        #ambiguous leaf
                                        l.symbols[pos] =  allowed_symbols
                                else:
                                    missing += 1
                                    char = None
                                    l.symbols[pos] =  allowed_symbols
                                    if verbose == True:
                                        iolock.acquire()
                                        print( 'err ! alncol: ', l.taxon , aln_column  )
                                        iolock.release()
                                l.char[pos] = min(l.scores[pos], key=l.scores[pos].get)
                    #done tree init
                    #up
                    process_node_smallpars_1(t.seed_node)
                    #down
                    process_node_smallpars_2(t.seed_node)
                    #collect events
                    eventdict = {}
                    for pos in [0,1,2]:
                        eventindex = [ n.matrow for n in t.nodes() if n.event[pos] > 0 ]
                        eventtypes = [ n.eventype[pos] for n in t.nodes() if n.event[pos] > 0 ]
                        eventdict[pos] = { 'type': eventtypes , 'index' : eventindex }
                    AAeventindex = [ n.matrow for n in t.nodes() if n.AAevent  ]
                    AAeventypes = [ n.AAevent for n in t.nodes() if n.AAevent  ]
                    outq.put(col, eventdict , AAeventindex , AAeventypes)
                count +=1
        print('FINAL SAVE !')
        save_mats(count, runName, AA_mutation,nucleotide_mutation)
        print('DONE ! ')
        brake.set(False)
        return None

    #######start the sankof algo here #######################
    print('starting sankof')
    #scale cluster
    #scatter the blank tree and row index for each process
    #remote_tree = client.scatter(tree)

    remote_index = client.scatter(IDindex)

    inq = Queue('inq')
    outq = Queue('outq')
    lock = Lock('x')

    stopiter = Variable(False)
    brake = Variable(True)


    saver_started = False
    workers_started = False

    #start workers
    for workers in range(NCORE*ncpu ):
        w = client.submit(  calculate_small_parsimony , inq= None ,outq = None  ,stopiter= stopiter ,  treefile=treefile , bootstrap_replicates = bootstrap_replicates,
        matfile= alnfile+'.h5' ,  row_index= remote_index , iolock = lock, verbose  = False  )
        fire_and_forget(w)
Ejemplo n.º 14
0
                        AA_mutation  = sparseND.COO( coords =  (AAeventindex , np.ones(len(AAeventindex)) * column , AAeventypes ) , data = np.ones(len(AAeventindex)  ,  ) , shape = (matsize[0] , matsize[1] ,len(transitiondict_AA ) )   ,  dtype = np.int32 )
                count +=1
        print('FINAL SAVE !')
        save_mats(count, runName, AA_mutation,nucleotide_mutation)
        print('DONE ! ')
        return None



    #######start the sankof algo here #######################
    print('starting sankof')
    #scale cluster
    #scatter the blank tree and row index for each process
    #remote_tree = client.scatter(tree)
    remote_index = client.scatter(IDindex)
    inq = Queue()
    outq = Queue()
    lock = Lock('x')
    saver_started = False
    workers_started = False
    stopiter = Variable(False)
    #big for loop here generating the mats with futures
    for n in range(bootstrap_replicates):
        #select portion of random genomes to take out
        if bootstrap_replicates >1:
            del_genomes = np.random.randint( align_array.shape[0], size= int(align_array.shape[0]*bootstrap) )


        for annot_index,annot_row in annotation.iterrows():
            #indexing starts at 1 for blast
            #####switch to sending the coordinates and masking for the matrix
Ejemplo n.º 15
0
sys.path.append('..')
import serialio

import os
import argparse
import numpy as np
import pickle

from dask.distributed import Client, Queue

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.callbacks import ModelCheckpoint

dask_client = Client(processes=False)  # use threads
dask_queue = Queue()  # ensure that training is executed sequentially

# Global model checkpoint accessed from the queue
# A workaround for a pickling issue in TF 2.0
#  https://github.com/tensorflow/tensorflow/issues/33283
checkpoint = None
save_model = None


def load_model():
    """manually creates a model and loads its weights, instead of using
    tensorflow.keras.load_model()

    A workaround for a pickling issue in TF 2.0
    https://github.com/tensorflow/tensorflow/issues/33283
    """
Ejemplo n.º 16
0
 def __init__(self, value):
     self._queue = Queue()
     for i in range(value):
         self._queue.put(1)
Ejemplo n.º 17
0
 def __init__(self, address=None, client=None):
     if (client or address) is None:
         raise ValueError("address or client must be provided")
     client = Client(address) if address is not None else client
     self.queue = Queue("praetor", client=client)
Ejemplo n.º 18
0
class ClusterDaskDistributor(DistributorBaseClass):
    """Distributor using a dask cluster.

    meaning that the calculation is spread over a cluster.

    :param str address: The `address` of dask-scheduler.
        eg. `tcp://127.0.0.1:8786`.

    """
    def __init__(self, address):
        """Set up a distributor that connects to a dask-scheduler to distribute the calculaton.

        :param address: the ip address and port number of the dask-scheduler.
        :type address: str
        """
        self.address = address
        self.future_set = set()
        self._queue_lock = Lock()

    def get_client(self):
        """Initialize a Client by pointing it to the address of a dask-scheduler.

        also, will init the worker count `self.n_workers` and two queue :
        `self.process_queue` and `self.result_queue` to save running process
        and results respectively.

        :return: return new client that is the primary entry point for users of
             dask.distributed.
        :rtype: distributed.Cient

        """
        from dask.distributed import Client
        from dask.distributed import Queue
        client = Client(address=self.address)
        self.n_workers = len(client.scheduler_info()["workers"])
        self.process_queue = Queue(client=client, maxsize=self.n_workers)
        self.result_queue = Queue(client=client)
        return client

    def get_worker_count(self):
        """Get the worker count of current Client in dask-scheduler.

        :return: the worker count of current Client in dask-scheduler.
        :rtype: int

        """
        return self.n_workers

    def update_queues(self):
        """Update current client status, include all queue and set."""
        with self._queue_lock:
            finished_set = set()
            for f in self.future_set:
                pid = f[0]
                future = f[1]
                if future.done():
                    self.result_queue.put((pid, future.result()))
                    self.process_queue.get()
                    finished_set.add(f)
            for f in finished_set:
                self.future_set.remove(f)

    def result_queue_empty(self):
        """Update current client status, and return if the result queue is empty.

        :return: if the result queue is empty.
        :rtype: bool

        """
        self.update_queues()
        return self.result_queue.qsize() == 0

    def result_queue_get(self):
        """Get a (pid, reslut) pair from result queue if it is not empty.

        :return: first (pid, result) pair in result queue.
        :rtype: (str or int or None, a user-defined result or None)

        """
        self.update_queues()
        if self.result_queue.qsize() != 0:
            pid, result = self.result_queue.get()
            return pid, result
        else:
            return None, None

    def process_queue_full(self):
        """Check if current process queue is full.

        :return: if current process queue is full return True, otherwise False.
        :rtype: bool

        """
        self.update_queues()
        return self.process_queue.qsize() == self.n_workers

    def process_queue_empty(self):
        """Check if current process queue is empty.

        :return: if current process queue is empty return True, otherwise False.
        :rtype: bool

        """
        self.update_queues()
        return self.process_queue.qsize() == 0

    def distribute(self, client, pid, func, kwargs):
        """Submit a calculation task to cluster.

        the calculation task will be
        executed asynchronously on one worker of the cluster. the `client` is
        the cluster entry point, `pid` is a user-defined unique id for this
        task, `func` is the function or object that do the calculation,
        `kwargs` is the parameters for `func`.

        :param distributed.Client client: the target `client` to run this task.
        :param pid: unique `pid` to descript this task.
        :type pid: str or int(defined by user).
        :param func: A serializable function or object(callable and has
            `__call__` function) which need to be distributed calculaton.
        :type func: function or object.
        :param dict kwargs: Parameter of `func`.

        """
        future = client.submit(func, **kwargs)
        f = (pid, future)
        self.future_set.add(f)
        self.process_queue.put(pid)

    def close(self, client):
        """Close the connection to the local Dask Scheduler.

        :param distributed.Client client: the target `client` to close.

        """
        client.close()

    def join(self):
        """Wait all process in process_queue to finish."""
        while not self.process_queue_empty():
            time.sleep(0.1)
        return
Ejemplo n.º 19
0
 def __init__(self, value, remote=None):
     self._queue = Queue(client=remote)
     for i in range(value):
         self._queue.put(1)
Ejemplo n.º 20
0
 def __init__(self, address):
     client = Client(address)
     self.queue = Queue("praetor", client=client)
     super().__init__()
Ejemplo n.º 21
0
 def __init__(self):
     self._queue = Queue()
     self._continue_semaphore = DistSemaphore(0)
     self._last_report_time = time.time()
Ejemplo n.º 22
0
 def __init__(self, remote=None):
     self._queue = Queue(client=remote)
     self._continue_semaphore = DistSemaphore(0)
     self._last_report_time = time.time()