def parallel_runs(data_list):
     pool = multiprocessing.Pool(4)
     prod_x = partial(read_process_store,
                      dask_df=dask_df,
                      symbol_unique_dates=symbol_unique_dates)
     try:
         results = pool.map(prod_x, data_list)
         print(results)
     except KeyError:
         pass
Exemple #2
0
def create_dask_client(args):
    """Create and install a Dask distributed client using args from a
    Namespace, supporting the following attributes:

    - .scheduler: Address of the distributed scheduler, or the
      empty string to start one locally

    """
    import dask
    scheduler = getattr(args, 'scheduler', None)
    num_workers = getattr(args, 'num_workers', 0)
    num_threads_per_worker = getattr(args, 'num_threads_per_worker', 0)

    if scheduler == 'multithreading':
        import dask.threaded
        from multiprocessing.pool import ThreadPool

        if num_threads_per_worker <= 0:
            num_workers = max(
                1,
                psutil.cpu_count(logical=False) + num_threads_per_worker)
        else:
            num_workers = num_threads_per_worker
        print('Starting dask thread pool with %d thread(s)' % num_workers)
        dask.config.set(pool=ThreadPool(num_workers))
        dask.config.set(scheduler='threads')
        return

    if scheduler == 'multiprocessing':
        import dask.multiprocessing
        import multiprocessing

        dask.config.set(scheduler='processes')
        if num_workers <= 0:
            num_workers = max(1, psutil.cpu_count(logical=False) + num_workers)

        print('Starting dask multiprocessing pool with %d worker(s)' %
              num_workers)
        dask.config.set(pool=multiprocessing.Pool(
            num_workers,
            initializer=dask.multiprocessing.initialize_worker_process))
        return

    import dask.distributed
    if not scheduler:

        if num_workers <= 0:
            num_workers = max(1, psutil.cpu_count(logical=False) + num_workers)
        num_threads_per_worker = (num_threads_per_worker
                                  if num_threads_per_worker >= 1 else None)

        print('Creating dask LocalCluster with %d worker(s), %r thread(s) per '
              'worker' % (num_workers, num_threads_per_worker))
        scheduler = dask.distributed.LocalCluster(
            ip='0.0.0.0',  # Allow reaching the diagnostics port externally
            scheduler_port=0,  # Don't expose the scheduler port
            n_workers=num_workers,
            memory_limit=0,
            threads_per_worker=num_threads_per_worker,
            silence_logs=False)

    return dask.distributed.Client(scheduler)