Ejemplo n.º 1
0
    def test_become_dask(self):
        executor = self.client.become_dask()
        reprs = self.client[:].apply_sync(repr, Reference('distributed_worker'))
        for r in reprs:
            self.assertIn("Worker", r)

        squares = executor.map(lambda x: x * x, range(10))
        tot = executor.submit(sum, squares)
        self.assertEqual(tot.result(), 285)

        # cleanup
        self.client.stop_distributed()
        ar = self.client[:].apply_async(lambda x: x, Reference('distributed_worker'))
        self.assertRaisesRemote(NameError, ar.get)
Ejemplo n.º 2
0
def phistogram(view, a, bins=10, rng=None, normed=False):
    """Compute the histogram of a remote array a.
    
    Parameters
    ----------
        view
            IPython DirectView instance
        a : str
            String name of the remote array
        bins : int
            Number of histogram bins
        rng : (float, float)
            Tuple of min, max of the range to histogram
        normed : boolean
            Should the histogram counts be normalized to 1
    """
    nengines = len(view.targets)
    
    # view.push(dict(bins=bins, rng=rng))
    with view.sync_imports():
        import numpy
    rets = view.apply_sync(lambda a, b, rng: numpy.histogram(a,b,rng), Reference(a), bins, rng)
    hists = [ r[0] for r in rets ]
    lower_edges = [ r[1] for r in rets ]
    # view.execute('hist, lower_edges = numpy.histogram(%s, bins, rng)' % a)
    lower_edges = view.pull('lower_edges', targets=0)
    hist_array = numpy.array(hists).reshape(nengines, -1)
    # hist_array.shape = (nengines,-1)
    total_hist = numpy.sum(hist_array, 0)
    if normed:
        total_hist = total_hist/numpy.sum(total_hist,dtype=float)
    return total_hist, lower_edges
Ejemplo n.º 3
0
def remote_iterator(view,name):
    """Return an iterator on an object living on a remote engine.
    """
    view.execute('it%s=iter(%s)'%(name,name), block=True)
    while True:
        try:
            result = view.apply_sync(lambda x: x.next(), Reference('it'+name))
        # This causes the StopIteration exception to be raised.
        except RemoteError as e:
            if e.ename == 'StopIteration':
                raise StopIteration
            else:
                raise e
        else:
            yield result
Ejemplo n.º 4
0
def pwordfreq(view, fnames):
    """Parallel word frequency counter.
    
    view - An IPython DirectView
    fnames - The filenames containing the split data.
    """
    assert len(fnames) == len(view.targets)
    view.scatter('fname', fnames, flatten=True)
    ar = view.apply(wordfreq, Reference('fname'))
    freqs_list = ar.get()
    word_set = set()
    for f in freqs_list:
        word_set.update(f.keys())
    freqs = dict(zip(word_set, repeat(0)))
    for f in freqs_list:
        for word, count in f.items():
            freqs[word] += count
    return freqs
Ejemplo n.º 5
0
pub_url = root.apply_sync(lambda: com.pub_url)

# gather the connection information into a dict
ar = view.apply_async(lambda: com.info)
peers = ar.get_dict()

# this is a dict, keyed by engine ID, of the connection info for the EngineCommunicators


# connect the engines to each other:
def connect(com, peers, tree, pub_url, root_id):
    """this function will be called on the engines"""
    com.connect(peers, tree, pub_url, root_id)


view.apply_sync(connect, Reference('com'), peers, btree, pub_url, root_id)


# functions that can be used for reductions
# max and min builtins can be used as well
def add(a, b):
    """cumulative sum reduction"""
    return a + b


def mul(a, b):
    """cumulative product reduction"""
    return a * b


view['add'] = add
Ejemplo n.º 6
0
    # scatter engine IDs
    view.scatter('my_id', range(num_procs), flatten=True)

    # create the engine connectors
    view.execute('com = EngineCommunicator()')

    # gather the connection information into a single dict
    ar = view.apply_async(lambda: com.info)
    peers = ar.get_dict()
    # print peers
    # this is a dict, keyed by engine ID, of the connection info for the EngineCommunicators

    # setup remote partitioner
    # note that Reference means that the argument passed to setup_partitioner will be the
    # object named 'com' in the engine's namespace
    view.apply_sync(setup_partitioner, Reference('com'), peers,
                    Reference('my_id'), num_procs, grid, partition)
    time.sleep(1)
    # convenience lambda to call solver.solve:
    _solve = lambda *args, **kwargs: solver.solve(*args, **kwargs)

    if ns.scalar:
        impl['inner'] = 'scalar'
        # setup remote solvers
        view.apply_sync(setup_solver,
                        I,
                        f,
                        c,
                        bc,
                        Lx,
                        Ly,
Ejemplo n.º 7
0
def gpu_job_runner(job_fnc, job_args, ipp_profile='ssh_gpu_py2', log_name=None, log_dir='~/logs/default',
                   status_interval=600, allow_engine_overlap=True, devices_assigned=False):
    """ Distribute a set of jobs across an IPyParallel 'GPU cluster'
    Requires that cluster has already been started with `ipcluster start --profile={}`.forat(ipp_profile)
    Checks on the jobs every status_interval seconds, logging status.

    Args:
      job_fnc: the function to distribute
        must accept `device` as a kwarg, as this  function is wrapped so that
        device is bound within the engine namespace
        returned values are ignored
      job_args: list of args passed to job_fnc - list
      ipp_profile: profile of GPU IPyParallel profile - str
      log_name: (optional) name for log
      log_dir: (optional), default is ~/logs/default which is created if it doesn't exist
      status_interval: (optional) the amount of time, in seconds, to wait before querying the AsyncResult
       object for the status of the jobs
      devices_assigned: (optional) set this to True if devices have already been assigned to
        the engines on this cluster

    """
    from ipyparallel import Client, RemoteError, Reference
    import inspect

    # setup logging
    log_path = os.path.expanduser(log_dir)
    log_name = log_name or 'job_runner'
    logger = setup_logging(log_name, log_path)

    # TODO: this isn't strictly necessary
    try:
        # check that job_fnc accepts a device kwarg
        args = inspect.getargspec(job_fnc)[0]
        assert 'device' in args
    except AssertionError:
        logger.critical("job_fnc does not except device kwarg. Halting.")

    client = Client(profile=ipp_profile)

    logger.info("Succesfully initialized client on %s with %s engines", ipp_profile, len(client))


    if not devices_assigned:
        # assign each engine to a GPU
        engines_per_host = {}
        device_assignments = []
        engine_hosts = client[:].apply(socket.gethostname).get()

        for host in engine_hosts:
            if host in engines_per_host:
                device_assignments.append('/gpu:{}'.format(engines_per_host[host]))
                engines_per_host[host] += 1
            else:
                device_assignments.append('/gpu:0')
                engines_per_host[host] = 1

        logger.info("Engines per host: \n")

        if not allow_engine_overlap:
            try:
                # check that we haven't over-provisioned GPUs
                for host, n_engines in six.iteritems(engines_per_host):
                    logger.info("%s: %s", host, n_engines)
                    assert n_engines <= WS_N_GPUS[host]
            except AssertionError:
                logger.critical("Host has more engines than GPUs. Halting.")


        while True:
            try:
                # NOTE: could also be accomplished with process environment variables
                # broadcast device assignments and job_fnc
                for engine_id, engine_device in enumerate(device_assignments):
                    print("Pushing to engine {}: device: {}".format(engine_id, engine_device))
                    client[engine_id].push({'device': engine_device,
                                            'job_fnc': job_fnc})

                for engine_id, (host, assigned_device) in enumerate(zip(engine_hosts, device_assignments)):
                    remote_device = client[engine_id].pull('device').get()
                    logger.info("Engine %s: host = %s; device = %s, remote device = %s",
                                engine_id, host, assigned_device, remote_device)
                break
            except RemoteError as remote_err:
                logger.warn("Caught remote error: %s. Sleeping for 10s before retry", remote_err)
                time.sleep(10)
    else:
        try:
            device_assignments = client[:].pull('device').get()
        except RemoteError as remote_err:
            logger.warn('Caught remote error when checking device assignments: %s. You may want to initialize device assignments', remote_err)

    logger.info("Dispatching jobs: %s", job_args)
    # dispatch jobs
    async_result = client[:].map(job_fnc, job_args, [Reference('device')] * len(job_args))

    start_time = time.time()

    while not async_result.ready():
        time.sleep(status_interval)
        n_finished = async_result.progress
        n_jobs = len(job_args)
        wall_time = start_time - time.time()
        logger.info("%s seconds elapsed. %s of %s jobs finished",
                    wall_time, n_finished, n_jobs)
    logger.info("All jobs finished in %s seconds!", async_result.wall_time)