Ejemplo n.º 1
0
    def save_executed_notebook(self, builderSelf):
        error_results = []

        builderSelf.dask_log[
            'scheduler_info'] = builderSelf.client.scheduler_info()
        builderSelf.dask_log['futures'] = []

        ## create an instance of the class id config set
        if (builderSelf.config['jupyter_generate_html']):
            builderSelf._convert_class = convertToHtmlWriter(builderSelf)

        # this for loop gathers results in the background
        total_count = len(builderSelf.futures)
        count = 0
        update_count_delayed = 1
        for future, nb in as_completed(builderSelf.futures,
                                       with_results=True,
                                       raise_errors=False):
            count += 1
            builderSelf._execute_notebook_class.check_execution_completion(
                builderSelf, future, nb, error_results, count, total_count,
                'futures')

        for future, nb in as_completed(builderSelf.delayed_futures,
                                       with_results=True,
                                       raise_errors=False):
            count += 1
            if update_count_delayed == 1:
                update_count_delayed = 0
                total_count += len(builderSelf.delayed_futures)
            builderSelf._execute_notebook_class.check_execution_completion(
                builderSelf, future, nb, error_results, count, total_count,
                'delayed_futures')

        return error_results
Ejemplo n.º 2
0
 def run_job(self, job):
     futures = [
         self.client.submit(task, workers=task.get_locations())
         for task in job.get_tasks()
     ]
     for future, result in dd.as_completed(futures, with_results=True):
         yield result
def process_futures_into_list(future_list):
    """
    Take a list of futures and turn them into a list of results
    Results must be of the form i, data (where i is the output order)
    :param future_list: list(Futures)
    :return output_list: list(Data)
    """

    DaskController = MPControl.client
    output_list = [None] * len(future_list)
    complete_gen = distributed.as_completed(future_list)

    for finished_future in complete_gen:

        # Jobs can be cancelled in certain situations
        if finished_future.cancelled():
            # Restart cancelled futures and put them back into the work pile
            DaskController.client.retry(finished_future)
            complete_gen.update([finished_future])

        # More likely is jobs erroring as a result of cluster instability
        elif finished_future.status == "error":
            error = finished_future.exception()
            utils.Debug.vprint("Restarting job (Error: {er})".format(er=error),
                               level=1)
            # Restart errored futures and put them back into the work pile
            DaskController.client.retry(finished_future)
            complete_gen.update([finished_future])

        # In the event of success, get the data
        i, result_data = finished_future.result()
        output_list[i] = result_data

    return output_list
Ejemplo n.º 4
0
def distributed_main():
    """
    Create a large 2D numpy array, do some expensive computation on every element ***IN PARALLEL***,
    return the sum.
    """
    two_d_array = np.random.rand(10000, Y_DIM)

    # Split the large array into smaller arrays along the Y axis
    # Submit each smaller array as a job
    futures = []
    for i in range(NUM_JOBS):
        start = (i * Y_DIM) // NUM_JOBS
        end = ((i + 1) * Y_DIM) // NUM_JOBS
        print([start, end])
        # Sends lots of data over the network to each worker
        future = client.submit(parallel_func, two_d_array[:, start:end])
        futures.append(future)

    progress(futures)

    total = 0
    for future in as_completed(futures):
        total += future.result()

    print(total)
    return total
    def main(self, gmrecords):
        """Process data using steps defined in configuration file.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._get_events()

        # get the process tag from the user or define by current datetime
        self.process_tag = (gmrecords.args.label
                            or datetime.utcnow().strftime(TAG_FMT))
        logging.info('Processing tag: %s' % self.process_tag)

        if gmrecords.args.num_processes:
            # parallelize processing on events
            try:
                client = Client(n_workers=gmrecords.args.num_processes)
            except BaseException as ex:
                print(ex)
                print("Could not create a dask client.")
                print("To turn off paralleization, use '--num-processes 0'.")
                sys.exit(1)
            futures = client.map(self._process_event, self.events)
            for result in as_completed(futures, with_results=True):
                print(result)
                # print('Completed event: %s' % result)
        else:
            for event in self.events:
                self._process_event(event)

        self._summarize_files_created()
Ejemplo n.º 6
0
    async def pick_frame(self, dataset_uuid, x, y):
        ds = self.data.get_dataset(dataset_uuid)
        x = int(x)
        y = int(y)
        slice_ = Slice(origin=(y, x, 0, 0),
                       shape=(1, 1, ds.shape[2], ds.shape[3]))
        job = PickFrameJob(dataset=ds, slice_=slice_)

        executor = self.data.get_executor()

        log.info("picking %d/%d from %s", x, y, dataset_uuid)

        futures = []
        for task in job.get_tasks():
            submit_kwargs = {}
            futures.append(executor.client.submit(task, **submit_kwargs))

        full_result = np.zeros(shape=ds.shape[2:])
        async for future, result in dd.as_completed(futures,
                                                    with_results=True):
            for tile in result:
                tile.copy_to_result(full_result)
        log.info("picking done, encoding image (dtype=%s)", full_result.dtype)
        image = await run_blocking(
            _encode_image,
            full_result,
            colormap=cm.gist_earth,
            save_kwargs={'format': 'png'},
        )
        log.info("image encoded, sending response")
        return image.read()
    def main(self, gmrecords):
        """
        Assemble data and organize it into an ASDF file.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)
        self.gmrecords = gmrecords

        self._get_events()
        print(self.events)

        logging.info('Number of events to assemble: %s' % len(self.events))

        if gmrecords.args.num_processes:
            # parallelize processing on events
            try:
                client = Client(n_workers=gmrecords.args.num_processes)
            except BaseException as ex:
                print(ex)
                print("Could not create a dask client.")
                print("To turn off paralleization, use '--num-processes 0'.")
                sys.exit(1)
            futures = client.map(self._assemble_event, self.events)
            for result in as_completed(futures, with_results=True):
                print(result)
                # print('Completed event: %s' % result)
        else:
            for event in self.events:
                self._assemble_event(event)

        self._summarize_files_created()
Ejemplo n.º 8
0
    def run_tasks(self, tasks, cancel_id):
        tasks = list(tasks)
        tasks_wrapped = []

        def _id_to_task(task_id):
            return tasks[task_id]

        for idx, orig_task in enumerate(tasks):
            tasks_wrapped.append(TaskProxy(orig_task, idx))

        futures = self._get_futures(tasks_wrapped)
        self._futures[cancel_id] = futures

        try:
            for future, result_wrap in dd.as_completed(futures,
                                                       with_results=True):
                if future.cancelled():
                    del self._futures[cancel_id]
                    raise JobCancelledError()
                result = result_wrap['task_result']
                task = _id_to_task(result_wrap['task_id'])
                yield result, task
        finally:
            if cancel_id in self._futures:
                del self._futures[cancel_id]
Ejemplo n.º 9
0
 def as_completed(self, drain=True):
     """Emit submitted jobs as completed,
     drain all from the work queue if specified"""
     if drain:
         while self.primed:
             self.submit()
     yield from map(lambda fut: fut.result(), as_completed(self.__running))
Ejemplo n.º 10
0
    def main(self, gmrecords):
        """Compute waveform metrics.

        Args:
            gmrecords:
                GMrecordsApp instance.
        """
        logging.info('Running subcommand \'%s\'' % self.command_name)

        self.gmrecords = gmrecords
        self._get_events()

        if gmrecords.args.num_processes:
            # parallelize processing on events
            try:
                client = Client(n_workers=gmrecords.args.num_processes)
            except BaseException as ex:
                print(ex)
                print("Could not create a dask client.")
                print("To turn off paralleization, use '--num-processes 0'.")
                sys.exit(1)
            futures = client.map(self._compute_event_waveforms, self.events)
            for result in as_completed(futures, with_results=True):
                print(result)
        else:
            for event in self.events:
                self._compute_event_waveforms(event)

        self._summarize_files_created()
Ejemplo n.º 11
0
 def _iter_dask(self):
     safefunc = functools.partial(safely_call, self.task_func)
     allargs = list(self._genargs())
     yield len(allargs)
     cl = self.dask_client
     for fut in as_completed(cl.map(safefunc, cl.scatter(allargs))):
         yield fut.result()
Ejemplo n.º 12
0
 def norm(self, N=2):
     """Function to compute vector N-norm"""
     norms = self.client.map(_call_norm, self.vecDask, N=N, pure=False)
     norm = 0.0
     for future, result in daskD.as_completed(norms, with_results=True):
         norm += np.power(np.float64(result), N)
     return np.power(norm, 1. / N)
Ejemplo n.º 13
0
    async def get_preview_image(self, dataset_uuid):
        ds = self.data.get_dataset(dataset_uuid)
        job = SumFramesJob(dataset=ds)

        executor = self.data.get_executor()

        log.info("creating preview for dataset %s" % dataset_uuid)

        futures = []
        for task in job.get_tasks():
            submit_kwargs = {}
            futures.append(executor.client.submit(task, **submit_kwargs))
        log.info("preview futures created")

        full_result = np.zeros(shape=ds.shape[2:])
        async for future, result in dd.as_completed(futures,
                                                    with_results=True):
            for tile in result:
                tile.copy_to_result(full_result)
        log.info("preview done, encoding image (dtype=%s)", full_result.dtype)
        image = await run_blocking(
            _encode_image,
            full_result,
            colormap=cm.gist_earth,
            save_kwargs={'format': 'png'},
        )
        log.info("image encoded, sending response")
        return image.read()
Ejemplo n.º 14
0
    def collect_results(self, **kwa):
        """ collect (and log) results as they become available
            (this will block)
        """

        if kwa.get('all'):
            self.logger.info('collect all results')
            futures = as_completed(self.futures)
        else:
            self.logger.info('collect already done results only')
            futures = filter(lambda f: f.done() == True, self.futures)

        # for xi, future in enumerate (as_completed (self.futures)):
        for xi, future in enumerate(futures):
            self.results_collected += 1
            result = future.result()
            key = future.key
            # future.cancel()

            self.logger.debug('[{xi}] future {key} yielded {result}'.format(
                xi=xi, key=key, result=result))

            self.results.append(dict(
                index=xi,
                result=result,
            ))

        self.log_status('collect_results')
Ejemplo n.º 15
0
 def min(self):
     """Function to obtain minimum value within a vector"""
     mins = self.client.map(_call_min, self.vecDask, pure=False)
     min_val = np.inf
     for future, result in daskD.as_completed(mins, with_results=True):
         if result < min_val:
             min_val = result
     return min_val
Ejemplo n.º 16
0
 def max(self):
     """Function to obtain maximum value within a vector"""
     maxs = self.client.map(_call_max, self.vecDask, pure=False)
     max_val = -np.inf
     for future, result in daskD.as_completed(maxs, with_results=True):
         if result > max_val:
             max_val = result
     return max_val
Ejemplo n.º 17
0
 def run_tasks(self, tasks, cancel_id):
     futures = self._get_futures(tasks)
     self._futures[cancel_id] = futures
     for future, result in dd.as_completed(futures, with_results=True):
         if future.cancelled():
             raise JobCancelledError()
         yield result
     del self._futures[cancel_id]
Ejemplo n.º 18
0
def task_result_thread(ntasks, futures, runs):
    run_map = dict(zip(futures, runs))
    for i, cf in enumerate(as_completed(futures)):
        run = run_map[cf]
        status, addr, elap, loadavg = cf.result()
        if status == 'canceled':
            continue
        logging.info('{:>3}/{:<3} {:<9} {:<27} {:<22} {:4.1f}s {}'.format(
            i + 1, ntasks, status, addr, format_loadavg(loadavg), elap, run))
Ejemplo n.º 19
0
 def run_job(self, job):
     futures = []
     for task in job.get_tasks():
         submit_kwargs = {}
         if not self.is_local:
             submit_kwargs['workers'] = task.get_locations()
         futures.append(self.client.submit(task, **submit_kwargs))
     for future, result in dd.as_completed(futures, with_results=True):
         yield result
Ejemplo n.º 20
0
    def results(self):
        """Blocks until complete"""
        # submit remaining jobs
        while self.primed:
            self.submit()
        for fut in as_completed(self.__running):
            self.__results.append(fut.result())

        return self.__results
Ejemplo n.º 21
0
def batch_submit(
    func,
    *iterables,
    batch_size=None,
    return_results=False,
    raise_error=False,
    **kwargs,
):
    if not all_equal(len(iterable) for iterable in iterables):
        raise ValueError("iterables does not have the same length")

    with get_client(auto_spawn=False) as client:
        batch_size = batch_size if batch_size is not None else len(
            client.ncores())
        logger.debug(f"batch submission, size={batch_size}")

        # jump start
        iterables = zip(*iterables)
        futures = []
        for i in range(batch_size):
            try:
                futures.append(client.submit(func, *next(iterables), **kwargs))
            except StopIteration:
                logger.warning(
                    f"batch size ({batch_size}) is larger than number of iterable elements"
                )
                break

        if return_results:
            results = []
        queue = as_completed(futures, with_results=False)
        while queue.count():
            for batches in queue.batches():
                n = len(batches)
                for future in batches:
                    try:
                        result = future.result()
                    except Exception as err:
                        if raise_error:
                            raise
                        elif return_results:
                            result = err

                    if return_results:
                        results.append(result)
                    del future  # release the future

                # submit new task if there is any
                for i in range(n):
                    try:
                        queue.add(
                            client.submit(func, *next(iterables), **kwargs))
                    except StopIteration:
                        break
        if return_results:
            return results
Ejemplo n.º 22
0
def collect_output(futures, output_path, **kwargs):
    '''
    Collects the output from the list of futures and merges them into a dataframe.
    The Dataframe will then be written to a file as specified by the output_path.
    The datatframe df_started_runs is joined with the job outputs to get the real ontime.
    '''
    if output_path is not None:
        logger.info(
            'Concatenating results from each job into {}'.format(output_path))

    n_success = 0
    n_total = 0

    result_iterator = as_completed(futures,
                                   with_results=True,
                                   raise_errors=False)
    with Writer(output_path) as writer:
        for (future, result) in result_iterator:

            if isinstance(result, tuple):
                exc_type, exc_valye, tb = result
                logger.error('Exception running job: {}'.format(result[1]))
                logger.error('\n'.join(format_tb(tb)))
                continue

            if not result['success']:
                logger.error('Job errored with reason "{}"'.format(
                    result["reason"]))
                continue

            n_success += 1
            events = result.get('events')

            if events is None:
                output = result['outputfile']
                logger.info('Job wrote output to local file {}'.format(output))
                continue

            n_events = len(events)
            logger.info('There are {} events in the result'.format(n_events))

            if n_events == 0:
                continue

            n_total += n_events

            events.columns = rename_columns(events.columns)
            add_theta_deg_columns(events)
            writer.append(events)

            logger.info('Result written successfully')

    if output_path is not None:
        logger.info(
            'Wrote a total of {} events from {} succesfull runs to {}'.format(
                n_total, n_success, output_path))
Ejemplo n.º 23
0
def main():
    args = parse_args()
    logging.info(args)
    cluster = init_cluster(args)
    client = Client(cluster)
    future_list = client.map(dummy_function, range(args.n_jobs))
    logging.info(cluster.job_script())
    for future in as_completed(future_list):
        exception = future.exception()
        traceback.print_exception(type(exception), exception, future.traceback())
Ejemplo n.º 24
0
    def run_search(self):
        syncWarmupFlag = self.HPOConfig['asyncWarmupFlag']
        startTime = time.time()

        if syncWarmupFlag:
            print('sync warmup')
            super().run_search(asyncInitializeFlag=True)
            print('sync warmup complete\n')
            print('continuing with async search')
        else:
            self.reset_swarm()
            self.scatter_data_to_workers()
            self.build_initial_particles()

        futureEvalParticles = self.client.compute(self.delayedEvalParticles)
        particleFutureSeq = as_completed(futureEvalParticles)

        # particleFutureSeq is an iterator of futures, to which we append newly updated particles
        for particleFuture in particleFutureSeq:
            testDataPerf, trainDataPerf, pID, nTrees, evalTime = particleFuture.result(
            )

            self.log_particle_history(testDataPerf, trainDataPerf, pID, nTrees,
                                      evalTime)
            self.update_particle(testDataPerf,
                                 pID,
                                 nTrees,
                                 evalTime,
                                 wExplore=0)

            self.swarmEvals += 1

            # termination condition
            approximateEpoch = self.swarmEvals // self.nParticles
            if approximateEpoch > self.nEpochs: break

            # create delayed evaluations for newly updated particles
            delayedParticle = delayed(evaluate_particle)(
                self.scatteredDataFutures,
                self.particles[pID].pos,
                self.paramRanges,
                self.particles[pID].pID,
                self.dataset.trainObjective,
                cpuFlag=self.cpuFlag)

            futureParticle = self.client.compute(delayedParticle)
            particleFutureSeq.add(futureParticle)

            # print progress update via approximate epoch
            if self.swarmEvals % self.nParticles == 0:
                print(f'> async epoch {approximateEpoch} of {self.nEpochs}')

        self.elapsedTime = time.time() - startTime
        self.report_final_params()
Ejemplo n.º 25
0
Archivo: train.py Proyecto: wx-b/rlbc
def train_mcts(mcts_dir, num_transforms, train, dataset, model, trainset_dir,
               evalset_dir, max_demos_train, max_demos_eval, cluster, num_gpus,
               exploration_cst, score_name, resume, **unused_kwargs):
    # run all the transformations and save them in <mcts_dir>/transformations
    utils.test_transformations(train, dataset, model, trainset_dir,
                               evalset_dir, mcts_dir)

    print('Initializing GPU workers...')
    client = cluster_utils.make_client(cluster,
                                       'gpu',
                                       num_gpus,
                                       mcts_dir,
                                       no_nanny=True)

    print('Starting the training...')
    mc_tree = make_tree(mcts_dir, num_transforms, exploration_cst, score_name,
                        resume)
    iter_start = mc_tree.iterations

    print('MCTS of depth {} and exploration constant {}'.format(
        num_transforms, exploration_cst))

    args_worker = [
        train, dataset, model, trainset_dir, evalset_dir, mcts_dir,
        max_demos_train, max_demos_eval
    ]
    args_workers_all = [[arg] * num_gpus for arg in [mc_tree] + args_worker]
    # log_ids will be the list of iter_mcts as well (in the beginning)
    log_ids = list(range(iter_start, iter_start + num_gpus))
    futures = client.map(get_score, *args_workers_all, log_ids, log_ids)
    jobs_queue = distributed.as_completed(futures)

    for iter_mcts, future in enumerate(jobs_queue):
        try:
            path, policy_scores = future.result()
            mc_tree.add_path(path, policy_scores)
            mc_tree.save(mcts_dir)
            print('MCTS iteration {}'.format(iter_start + iter_mcts))
            print('\tpath {}\n\tscore {:.3f}\n\terror {:.3f}cm\n'.format(
                path, policy_scores[score_name],
                (1 - policy_scores[score_name]) * 10))
        except:
            print(
                'WARNING: one of the MCTS worker died, I will gracefully ignore it'
            )

        iter_mcts_future = iter_start + iter_mcts + num_gpus
        log_id = iter_mcts_future % num_gpus
        new_future = client.submit(get_score, mc_tree, *args_worker,
                                   iter_mcts_future, log_id)
        jobs_queue.add(new_future)

        if iter_mcts > 0 and iter_mcts % 10 == 0:
            utils.print_mcts_score(mc_tree)
Ejemplo n.º 26
0
    def collect_result(self, futures, results, box, submission_time):
        """Compile results from completed workers and recompiles their sub outputs into the output
        for the complete box being worked on.
        :param futures: list(dask.Future), list of futures representing future dask worker calculations
        :param results: list[numpy.nd.array], arrays of the appropriate structure representing
               the final output of processed box (need to be in the same order as the function passed in
               submit_workers returns in)
        :param box: numpy.nd.array, the initial complete box being processed
        :param submission_time: time, the time of submission of the dask workers (used to determine worker
               runtimes as a performance diagnostic)
        :return: results: tuple(numpy.nd.arrays), the processed results of the box
        """
        from dask.distributed import as_completed

        num_future = 0
        for future, sub_results in as_completed(futures, with_results=True):

            # message
            num_future += 1
            sub_t = time.time() - submission_time
            print("FUTURE #{} complete. Time used: {:.0f} seconds".format(
                num_future, sub_t))

            # catch result - sub_box
            # and convert the abosulte sub_box into local col/row start/end relative to the primary box
            # to assemble the result from each worker
            sub_box = sub_results[-1]
            x0, y0, x1, y1 = sub_box
            x0 -= box[0]
            x1 -= box[0]
            y0 -= box[1]
            y1 -= box[1]

            # catch result - matrices
            # and loop across all of the returned data to rebuild complete box
            for i, sub_result in enumerate(sub_results[:-1]):
                num_dim = sub_result.ndim

                if num_dim == 4:
                    results[i][:, :, y0:y1, x0:x1] = sub_result

                elif num_dim == 3:
                    results[i][:, y0:y1, x0:x1] = sub_result

                elif num_dim == 2:
                    results[i][y0:y1, x0:x1] = sub_result

                else:
                    msg = "worker result has unexpected dimension: {}".format(
                        num_dim)
                    msg += '\nit should be either 2 or 3 or 4!'
                    raise Exception(msg)

        return results
Ejemplo n.º 27
0
    def __init__(self,
                 scheduler_host=None,
                 scatter=None,
                 client=None,
                 loop=None,
                 wait_for_workers_timeout=10,
                 **submit_kwargs):
        super().__init__()

        if distributed is None:
            msg = ("You are trying to use 'dask' as a joblib parallel backend "
                   "but dask is not installed. Please install dask "
                   "to fix this error.")
            raise ValueError(msg)

        if client is None:
            if scheduler_host:
                client = Client(scheduler_host,
                                loop=loop,
                                set_as_default=False)
            else:
                try:
                    client = get_client()
                except ValueError as e:
                    msg = ("To use Joblib with Dask first create a Dask Client"
                           "\n\n"
                           "    from dask.distributed import Client\n"
                           "    client = Client()\n"
                           "or\n"
                           "    client = Client('scheduler-address:8786')")
                    raise ValueError(msg) from e

        self.client = client

        if scatter is not None and not isinstance(scatter, (list, tuple)):
            raise TypeError("scatter must be a list/tuple, got "
                            "`%s`" % type(scatter).__name__)

        if scatter is not None and len(scatter) > 0:
            # Keep a reference to the scattered data to keep the ids the same
            self._scatter = list(scatter)
            scattered = self.client.scatter(scatter, broadcast=True)
            self.data_futures = {id(x): f for x, f in zip(scatter, scattered)}
        else:
            self._scatter = []
            self.data_futures = {}
        self.wait_for_workers_timeout = wait_for_workers_timeout
        self.submit_kwargs = submit_kwargs
        self.waiting_futures = as_completed([],
                                            loop=client.loop,
                                            with_results=True,
                                            raise_errors=False)
        self._results = {}
        self._callbacks = {}
Ejemplo n.º 28
0
 def dot(self, other):
     """Function to compute dot product between two vectors"""
     checkVector(self, other)
     dots = self.client.map(_call_dot,
                            self.vecDask,
                            other.vecDask,
                            pure=False)
     # Adding all the results together
     dot = 0.0
     for future, result in daskD.as_completed(dots, with_results=True):
         dot += np.float64(result)
     return dot
Ejemplo n.º 29
0
    async def run_job(self, uuid, ds, job, full_result):
        self.data.register_job(uuid=uuid, job=job)
        executor = self.data.get_executor()

        futures = []
        for task in job.get_tasks():
            submit_kwargs = {}
            futures.append(executor.client.submit(task, **submit_kwargs))
        self.write(Message(self.data).start_job(job_id=uuid))
        self.finish()
        msg = Message(self.data).start_job(job_id=uuid, )
        log_message(msg)
        self.event_registry.broadcast_event(msg)

        async for future, result in dd.as_completed(futures,
                                                    with_results=True):
            # TODO:
            # + only send PNG of area that has changed (bounding box of all result tiles!)
            # + normalize each channel (per channel: keep running min/max, map data to [0, 1])
            # + if min/max changes, send whole channel (all results up to this point re-normalized)
            # + maybe saturate up to some point (20% over current max => keep current max) and send
            #   whole result image once finished
            # + maybe use visualization framework in-browser (example: GR)

            # TODO: update task_result message:
            # + send bbox for blitting

            for tile in result:
                tile.copy_to_result(full_result)
            images = yield full_result

            # NOTE: make sure the following broadcast_event messages are sent atomically!
            # (that is: keep the code below synchronous, and only send the messages
            # once the images have finished encoding, and then send all at once)
            msg = Message(self.data).task_result(
                job_id=uuid,
                num_images=len(images),
            )
            log_message(msg)
            self.event_registry.broadcast_event(msg)
            for image in images:
                raw_bytes = image.read()
                self.event_registry.broadcast_event(raw_bytes, binary=True)
        images = yield full_result
        msg = Message(self.data).finish_job(
            job_id=uuid,
            num_images=len(images),
        )
        log_message(msg)
        self.event_registry.broadcast_event(msg)
        for image in images:
            raw_bytes = image.read()
            self.event_registry.broadcast_event(raw_bytes, binary=True)
Ejemplo n.º 30
0
    def run_tasks(
        self,
        tasks: Iterable[TaskProtocol],
        params_handle: Any,
        cancel_id: Any,
    ):
        tasks = list(tasks)
        tasks_w_index = list(enumerate(tasks))

        def _id_to_task(task_id):
            return tasks[task_id]

        workers = self.get_available_workers()
        threaded_executor = workers.has_threaded_workers()

        self._futures[cancel_id] = []
        initial = []

        for w in range(int(len(workers))):
            if not tasks_w_index:
                break
            idx, wrapped_task = tasks_w_index.pop(0)
            future = self._get_future(wrapped_task, workers, idx,
                                      params_handle, threaded_executor)
            initial.append(future)
            self._futures[cancel_id].append(future)

        try:
            as_completed = dd.as_completed(initial,
                                           with_results=True,
                                           loop=self.client.loop)
            for future, result_wrap in as_completed:
                if future.cancelled():
                    del self._futures[cancel_id]
                    raise JobCancelledError()
                result = result_wrap['task_result']
                task = _id_to_task(result_wrap['task_id'])
                if tasks_w_index:
                    idx, wrapped_task = tasks_w_index.pop(0)
                    future = self._get_future(
                        wrapped_task,
                        workers,
                        idx,
                        params_handle,
                        threaded_executor,
                    )
                    as_completed.add(future)
                    self._futures[cancel_id].append(future)
                yield result, task
        finally:
            if cancel_id in self._futures:
                del self._futures[cancel_id]