예제 #1
0
    def fit(self, X, y, sample_weight=None):
        """
        Run fit with all sets of parameters.

        :param X: array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and n_features is the number of features.

        :param y: array-like, shape = [n_samples] or [n_samples, n_output], optional
        :param sample_weight: array-like, shape = [n_samples], weight
        """
        X, y, sample_weight = check_inputs(X,
                                           y,
                                           sample_weight=sample_weight,
                                           allow_none_weights=True)

        if self.parallel_profile is None:
            while self.evaluations_done < self.params_generator.n_evaluations:
                state_indices, state_dict = self.params_generator.generate_next_point(
                )
                status, value = apply_scorer(self.scorer, state_dict,
                                             self.base_estimator, X, y,
                                             sample_weight)
                assert status == 'success', 'Error during grid search ' + str(
                    value)
                self.params_generator.add_result(state_indices, value)
                self.evaluations_done += 1
                state_string = ", ".join(
                    [k + '=' + str(v) for k, v in state_dict.items()])
                self._log('{}: {}'.format(value, state_string))
        else:
            if str.startswith(self.parallel_profile, 'threads'):
                _, n_threads = str.split(self.parallel_profile, '-')
                portion = int(n_threads)
                print("Performing grid search in {} threads".format(portion))
            else:
                from IPython.parallel import Client

                direct_view = Client(
                    profile=self.parallel_profile).direct_view()
                portion = len(direct_view)
                print(
                    "There are {0} cores in cluster, the portion is equal {1}".
                    format(len(direct_view), portion))

            while self.evaluations_done < self.params_generator.n_evaluations:
                state_indices_array, state_dict_array = self.params_generator.generate_batch_points(
                    size=portion)
                current_portion = len(state_indices_array)
                result = map_on_cluster(
                    self.parallel_profile, apply_scorer,
                    [self.scorer] * current_portion, state_dict_array,
                    [self.base_estimator] * current_portion,
                    [X] * current_portion, [y] * current_portion,
                    [sample_weight] * current_portion)
                assert len(
                    result
                ) == current_portion, "The length of result is very strange"
                for state_indices, state_dict, (status, score) in zip(
                        state_indices_array, state_dict_array, result):
                    params = ", ".join(
                        [k + '=' + str(v) for k, v in state_dict.items()])
                    if status != 'success':
                        message = 'Fail during training on the node \nException {exc}\n Parameters {params}'
                        self._log(message.format(exc=score, params=params),
                                  level=40)
                    else:
                        self.params_generator.add_result(state_indices, score)
                        self._log("{}: {}".format(score, params))
                self.evaluations_done += current_portion
                print("%i evaluations done" % self.evaluations_done)
        return self
예제 #2
0
    grid = ns.grid
    partition = ns.partition
    Lx = ns.Lx
    Ly = ns.Ly
    c = ns.c
    tstop = ns.tstop
    if ns.save:
        user_action = wave_saver
    else:
        user_action = None

    num_cells = 1.0*(grid[0]-1)*(grid[1]-1)
    final_test = True

    # create the Client
    rc = Client(profile=ns.profile)
    num_procs = len(rc.ids)

    if partition is None:
        partition = [1,num_procs]

    assert partition[0]*partition[1] == num_procs, "can't map partition %s to %i engines"%(partition, num_procs)

    view = rc[:]
    print("Running %s system on %s processes until %f" % (grid, partition, tstop))

    # functions defining initial/boundary/source conditions
    def I(x,y):
        from numpy import exp
        return 1.5*exp(-100*((x-0.5)**2+(y-0.5)**2))
    def f(x,y,t):
예제 #3
0
def load_client():
    global client, view
    client = Client()
    view = client.load_balanced_view()
    client.block = False
    client[:].use_dill()
예제 #4
0
# <codecell>

strike_vals = np.linspace(min_strike, max_strike, n_strikes)
sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas)

# <markdowncell>

# ## Parallel computation across strike prices and volatilities

# <markdowncell>

# The Client is used to setup the calculation and works with all engines.

# <codecell>

c = Client(profile=cluster_profile)

# <markdowncell>

# A LoadBalancedView is an interface to the engines that provides dynamic load
# balancing at the expense of not knowing which engine will execute the code.

# <codecell>

view = c.load_balanced_view()

# <codecell>

print("Strike prices: ", strike_vals)
print("Volatilities: ", sigma_vals)
        del (grid)
    except:
        pass
    return 0


md = '/lcrc/group/earthscience/radar/nexrad/chicago_floods/'
idir = md
filelist = os.listdir(md)
good_files = []
for fl in filelist:
    if 'KLOT' in fl:
        good_files.append(idir + fl)
good_files.sort()
t1 = time()
My_Cluster = Client()
My_View = My_Cluster[:]
print My_View
print len(My_View)

#Turn off blocking so all engines can work async
My_View.block = False

#on all engines do an import of Py-ART
My_View.execute('import matplotlib')
My_View.execute('matplotlib.use("agg")')

#Map the code and input to all workers
result = My_View.map_async(do_grid_map_gates_to_grid, good_files)

#Reduce the result to get a list of output
    X_L, X_D = state_tuple
    _do_analyze = crosscat.LocalEngine._do_analyze
    return _do_analyze(M_c, T, X_L, X_D, (), num_transitions, (), (), -1, -1,
                       SEED)


# set everything up
T, M_r, M_c = du.read_model_data_from_csv(filename, gen_seed=gen_seed)
num_rows = len(T)
num_cols = len(T[0])
col_names = numpy.array(
    [M_c['idx_to_name'][str(col_idx)] for col_idx in range(num_cols)])

## set up parallel
from IPython.parallel import Client
c = Client(ipython_parallel_config)
dview = c[:]
with dview.sync_imports():
    import crosscat
    import crosscat.LocalEngine
    import sys
if path_append is not None:
    dview.apply_sync(lambda: sys.path.append(path_append))
#
dview.push(dict(M_c=M_c, M_r=M_r, T=T, num_transitions=num_transitions))
seeds = range(num_chains)
async_result = dview.map_async(do_intialize, seeds)
initialized_states = async_result.get()
#
async_result = dview.map_async(do_analyze, zip(seeds, initialized_states))
chain_tuples = async_result.get()
from IPython.parallel import Client

client = Client()
client.ids

def fct():
    return 'hello'

client[:].apply_sync(fct)

quit()
예제 #8
0
파일: snippet.py 프로젝트: szabo92/gistable
import numpy as np

from IPython.parallel import Client

from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import KFold
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.preprocessing import Scaler
from sklearn.utils import shuffle

digits = datasets.fetch_mldata("MNIST original")
X, y = digits.data, digits.target

X, y = shuffle(X, y)

X = Scaler().fit_transform(X)

params = dict(C=10. ** np.arange(-3, 3), gamma=10. ** np.arange(-3, 3))

rc = Client(profile='sge')
view = rc.load_balanced_view()


grid = GridSearchCV(SVC(), param_grid=params, cv=KFold(len(y), 4), n_jobs=view)

grid.fit(X, y)
print(grid.grid_scores_)
예제 #9
0
from __future__ import division

from IPython.parallel import Client
Client()[:].execute('''
import __builtin__
__builtin__.__dict__['profile'] = lambda x: x
''')

import numpy as np
from matplotlib import pyplot as plt

from pyhsmm.models import HSMMIntNegBinVariant
from pyhsmm.basic.models import MixtureDistribution
from library_models import FrozenMixtureDistribution, LibraryHSMMIntNegBinVariant
from pyhsmm.basic.distributions import Gaussian, NegativeBinomialIntegerRVariantDuration
from pyhsmm.util.text import progprint_xrange

#############################
#  generate synthetic data  #
#############################

states_in_hsmm = 5
components_per_GMM = 3
component_hyperparameters = dict(mu_0=np.zeros(2),
                                 sigma_0=np.eye(2),
                                 kappa_0=0.025,
                                 nu_0=3)

GMMs = [
    MixtureDistribution(alpha_0=4.,
                        components=[
예제 #10
0
    def ensure_controller(self, connect_only=False):
        """Make sure a controller is available, else start a local one.
        """
        if self._client:
            return self._client

        if self.profile is None:
            self._select_profile()
        if self.profile is None:
            return None
        print "parallelflow: using IPython profile %r" % self.profile

        try:
            self._client = Client(profile=self.profile)
            print "parallelflow: connected to controller"
            return self._client
        except error.TimeoutError:
            print "parallelflow: timeout when connecting to controller"
            if connect_only:
                start_ctrl = False
            elif qt_available:
                res = QtGui.QMessageBox.question(
                    None, "Start controller",
                    "Unable to connect to the configured IPython "
                    "controller. Do you want to start one?",
                    QtGui.QMessageBox.Yes | QtGui.QMessageBox.No)
                start_ctrl = res == QtGui.QMessageBox.Yes
            else:
                start_ctrl = True
        except IOError:
            print "parallelflow: didn't find a controller to connect to"
            if connect_only:
                start_ctrl = False
            elif qt_available:
                res = QtGui.QMessageBox.question(
                    None, "Start controller",
                    "No controller is configured in this IPython profile. "
                    "Do you want to start one?",
                    QtGui.QMessageBox.Yes | QtGui.QMessageBox.No)
                start_ctrl = res == QtGui.QMessageBox.Yes
            else:
                start_ctrl = True

        if start_ctrl:
            ctrl_pid = os.path.join(locate_profile(self.profile), 'pid',
                                    'ipcontroller.pid')
            if os.path.exists(ctrl_pid):
                os.remove(ctrl_pid)
            print "parallelflow: starting controller"
            proc, code = self.start_process(
                lambda: os.path.exists(ctrl_pid), sys.executable, '-m',
                'IPython.parallel.apps.ipcontrollerapp',
                '--profile=%s' % self.profile)
            if code is not None:
                if qt_available:
                    QtGui.QMessageBox.critical(
                        None, "Error", "Controller exited with code %d" % code)
                print(
                    "parallelflow: controller process exited with "
                    "code %d" % code)
                return None
            else:
                self.started_controller = proc
                print "parallelflow: controller started, connecting"
                self._client = Client(profile=self.profile)
                return self._client

        return None
예제 #11
0
 def __init__(self):
     from moi import ctx_default
     self.demo = Client(profile=ctx_default)
     self.demo_lview = self.demo.load_balanced_view()
예제 #12
0
 def __init__(self, **kwargs):
     self.client = Client(**kwargs)
예제 #13
0
USAGE:

$FWR2D_Driver.py -h

This will print out the available options.

Make sure you modify 'run_No' in this script before submit a new run. Otherwise the result from the last run may be overwritten.
"""

#The tag of RUN. Each new run should be assigned a new number.
run_No = '_140GHz_275t'

import time
from IPython.parallel import Client
c = Client(profile='pbs')

#the engine needs time to start, so check when all the engines are connected before take a direct view of the cluster.

desired_engine_num = 128 #Make sure this number is EXACTLY the same as the engine number you initiated with ipengine

waiting=0
while(len(c) < desired_engine_num and waiting<=86400):#check if the engines are ready, if the engines are not ready after 1 min, something might be wrong. Exit and raise an exception.
    time.sleep(10)
    waiting += 10
    print(('Waiting for connecting engines: {0} requested, {1} connected. {2}s passed. '.format(desired_engine_num,len(c),waiting)))

if(len(c) != desired_engine_num):
    raise Exception('usable engine number is not the same as the desired engine number! usable:{0}, desired:{1}.\nCheck your cluster status and the desired number set in the Driver script.'.format(len(c),desired_engine_num))

예제 #14
0
def cluster_view(scheduler, queue, num_jobs, cores_per_job=1, profile=None,
                 start_wait=16, extra_params=None, retries=None, direct=False):
    """Provide a view on an ipython cluster for processing.

      - scheduler: The type of cluster to start (lsf, sge, pbs, torque).
      - num_jobs: Number of jobs to start.
      - cores_per_job: The number of cores to use for each job.
      - start_wait: How long to wait for the cluster to startup, in minutes.
        Defaults to 16 minutes. Set to longer for slow starting clusters.
      - retries: Number of retries to allow for failed tasks.
    """
    num_jobs = int(num_jobs)
    cores_per_job = int(cores_per_job)
    start_wait = int(start_wait)

    if extra_params is None:
        extra_params = {}
    max_delay = start_wait * 60
    delay = 5 if extra_params.get("run_local") else 30
    max_tries = 10
    if profile is None:
        has_throwaway = True
        profile = create_throwaway_profile()
    else:
        # ensure we have an .ipython directory to prevent issues
        # creating it during parallel startup
        cmd = [sys.executable, "-E", "-c", "from IPython import start_ipython; start_ipython()",
               "profile", "create", "--parallel"] + _get_profile_args(profile)
        subprocess.check_call(cmd)
        has_throwaway = False
    num_tries = 0

    cluster_id = str(uuid.uuid4())
    url_file = get_url_file(profile, cluster_id)

    while 1:
        try:
            if extra_params.get("run_local"):
                _start_local(cores_per_job, profile, cluster_id)
            else:
                _start(scheduler, profile, queue, num_jobs, cores_per_job, cluster_id, extra_params)
            break
        except subprocess.CalledProcessError:
            if num_tries > max_tries:
                raise
            num_tries += 1
            time.sleep(delay)
    try:
        client = None
        slept = 0
        max_up = 0
        up = 0
        while not up == num_jobs:
            up = _nengines_up(url_file)
            if up < max_up:
                print ("Engine(s) that were up have shutdown prematurely. "
                       "Aborting cluster startup.")
                _stop(profile, cluster_id)
                sys.exit(1)
            max_up = up
            time.sleep(delay)
            slept += delay
            if slept > max_delay:
                raise IOError("Cluster startup timed out.")
        client = Client(url_file, timeout=60)
        if direct:
            yield _get_direct_view(client, retries)
        else:
            yield _get_balanced_blocked_view(client, retries)
    finally:
        if client:
            _shutdown(client)
        _stop(profile, cluster_id)
        if has_throwaway:
            delete_profile(profile)
예제 #15
0
파일: main.py 프로젝트: temporaer/wurzel
    minimum_radius_mm = 0.08 / 2
    maximum_radius_mm = 2.5 / 2
    sigma0 = minimum_radius_mm
    s = (maximum_radius_mm / minimum_radius_mm)**(1. / num_scales)
    sigmas = []
    sigmas.extend([sigma0 * s**i for i in xrange(0, num_scales)])
    print "Sigmas (mm) : ", sigmas
    sigmas = [x / D.info.scale for x in sigmas]  # convert from mm to vox
    print "Sigmas (vox): ", sigmas

    # ##########################################
    #  Prepare remote systems (load data, ...)
    # ##########################################
    if parallelize:
        from IPython.parallel import Client
        rc = Client(profile="wurzel_cluster")
        #rc = Client(profile_dir="/home/VI/staff/schulz/.ipython/profile_default")
        rc[:].execute("import os; os.chdir(\"%s\")" % os.getcwd())
        lview = rc.load_balanced_view()
        lview.block = True

    # ##########################################
    #  Execute cmdl remotely or locally
    # ##########################################
    print "Executing..."
    if parallelize:
        print "in parallel..."
        sato = lview.map(parallel_run, [(basename, s) for s in sigmas])
        print "done."
    else:
        print "Sequential, and finding maxima"
    # print n_jobs, n_executed_jobs,
    rc = Client(profile=profile)
    n_clusters = len(rc)
    if n_executed_jobs == 0:
        n_executed_jobs = n_jobs
    elif n_executed_jobs < n_clusters:
        n_jobs = n_executed_jobs
    if n_jobs >= n_clusters:
        dview = rc[:]
    elif n_jobs == -1:
        dview = rc[:]
    elif n_jobs < n_clusters:
        dview = rc[list(np.random.permutation(n_clusters)[:n_executed_jobs])]
    # A = dview.queue_status()
    # print A.keys()
    return dview


if __name__ == "__main__":

    rc = Client(profile='net')
    A = rc.queue_status()
    for ii in range(len(rc)):
        print A[ii]

    # dview = random_rc('net', -1, 10)
    # A = dview.queue_status()
    # print len(dview)
    # for ii in A.keys():
        # print A[ii]
예제 #17
0
def parfor(task,
           task_vec,
           args=None,
           client=None,
           view=None,
           show_scheduling=False,
           show_progressbar=False):
    """
    Call the function ``tast`` for each value in ``task_vec`` using a cluster
    of IPython engines. The function ``task`` should have the signature
    ``task(value, args)`` or ``task(value)`` if ``args=None``.

    The ``client`` and ``view`` are the IPython.parallel client and
    load-balanced view that will be used in the parfor execution. If these
    are ``None``, new instances will be created.

    Parameters
    ----------

    task: a Python function
        The function that is to be called for each value in ``task_vec``.

    task_vec: array / list
        The list or array of values for which the ``task`` function is to be
        evaluated.

    args: list / dictionary
        The optional additional argument to the ``task`` function. For example
        a dictionary with parameter values.

    client: IPython.parallel.Client
        The IPython.parallel Client instance that will be used in the
        parfor execution.

    view: a IPython.parallel.Client view
        The view that is to be used in scheduling the tasks on the IPython
        cluster. Preferably a load-balanced view, which is obtained from the
        IPython.parallel.Client instance client by calling,
        view = client.load_balanced_view().

    show_scheduling: bool {False, True}, default False
        Display a graph showing how the tasks (the evaluation of ``task`` for
        for the value in ``task_vec1``) was scheduled on the IPython engine
        cluster.

    show_progressbar: bool {False, True}, default False
        Display a HTML-based progress bar duing the execution of the parfor
        loop.

    Returns
    --------
    result : list
        The result list contains the value of ``task(value, args)`` for each
        value in ``task_vec``, that is, it should be equivalent to
        ``[task(v, args) for v in task_vec]``.

    """

    submitted = datetime.datetime.now()

    if client is None:
        client = Client()

        # make sure qutip is available at engines
        dview = client[:]
        dview.block = True
        dview.execute("from qutip import *")

    if view is None:
        view = client.load_balanced_view()

    if args is None:
        ar_list = [view.apply_async(task, x) for x in task_vec]
    else:
        ar_list = [view.apply_async(task, x, args) for x in task_vec]

    if show_progressbar:
        n = len(ar_list)
        pbar = HTMLProgressBar(n)
        while True:
            n_finished = sum([ar.progress for ar in ar_list])
            pbar.update(n_finished)

            if view.wait(ar_list, timeout=0.5):
                pbar.update(n)
                break
    else:
        view.wait(ar_list)

    if show_scheduling:
        metadata = [[
            ar.engine_id, (ar.started - submitted).total_seconds(),
            (ar.completed - submitted).total_seconds()
        ] for ar in ar_list]
        _visualize_parfor_data(metadata)

    return [ar.get() for ar in ar_list]
예제 #18
0
        Y.append(y)
    return Y

# set everything up
T, M_r, M_c = du.read_model_data_from_csv(filename, gen_seed=gen_seed)
num_rows = len(T)
num_cols = len(T[0])
col_names = numpy.array([M_c['idx_to_name'][str(col_idx)] for col_idx in range(num_cols)])
engine = LE.LocalEngine(inf_seed)


do_remote = False
if do_remote:
    ## set up parallel
    from IPython.parallel import Client
    c = Client(profile='ssh', sshserver='*****@*****.**')
    dview = c[:]
    dview.execute('import sys')
    dview.apply_sync(lambda: sys.path.append('/usr/local/'))
    #
    with dview.sync_imports(): 
        import tabular_predDB.LocalEngine as LE
    dview.push(dict(
            M_c=M_c,
            M_r=M_r,
            T=T))
    async_result = dview.map_async(lambda SEED: LE.do_initialize(M_c, M_r, T, 'from_the_prior', SEED), range(8))
    initialized_states = async_result.get()
    #
    async_result = dview.map_async(lambda (SEED, state_tuple): LE.do_analyze(M_c, T, state_tuple[0], state_tuple[1], (), 10, (), (), -1, -1, SEED), zip(range(len(initialized_states)), initialized_states))
    chain_tuples = async_result.get()
from IPython.parallel import Client
c = Client(profile='mpi')
view = c[:]
view.activate() # enable magics
view.run('psum.py')
view.scatter('a',np.arange(16,dtype='float'))
view['a']
%px totalsum = psum(a)
view['totalsum']
예제 #20
0
def main(out):
    save_fn = os.path.join(Config['data_root'], PICKLE_FN)

    if os.path.exists(save_fn):
        out('Loading saved pickle from:\n%s'%save_fn)
        fd = file(save_fn, 'r')
        results = cPickle.load(fd)
        fd.close()

    else:

        # Set up the parallel engines
        raw_input('Make sure ipcluster has started... ')
        rc = Client()
        dview = rc[:]
        dview.block = True
        dview.execute('import cPickle, os, gc')
        dview.execute('from scanr.config import Config')
        dview.execute('from scanr.meta import get_tetrode_list, get_maze_list')
        dview.execute('from scanr.eeg import get_eeg_data, total_power, Theta, FullBand')

        lview = rc.load_balanced_view()

        @lview.remote(block=False)
        def compute_relative_theta(dataset):
            save_fn = os.path.join(Config['data_root'], 'rat%03d-%02d.pickle'%dataset)
            if os.path.exists(save_fn):
                fd = file(save_fn, 'r')
                res = cPickle.load(fd)
                fd.close()
            else:
                res = {}
                Theta.zero_lag = False
                for tt in get_tetrode_list(*dataset):
                    tt_id = tuple(dataset) + (tt,)
                    rtheta = []
                    for maze in get_maze_list(*dataset):
                        rds = tuple(dataset) + (maze,)
                        X = get_eeg_data(rds, tt)
                        if X is None:
                            continue
                        rtheta.append(
                            total_power(Theta.power(X), fs=Theta.fs) /
                                total_power(FullBand.power(X), fs=FullBand.fs)
                        )
                    res[tt_id] = rtheta
                    gc.collect()
                fd = file(save_fn, 'w')
                cPickle.dump(res, fd)
                fd.close()
            return res

        # Send out compute tasks and wait for completion
        out('Sending out tasks to the cluster...')
        async_results = map(compute_relative_theta, list(walk_days()))
        view_wait_with_status(lview, out, timeout=STATUS_PERIOD)

        # Collate results into flattened list of cell info dictionaries
        out('Collating results...')
        results = {}
        for async in async_results:
            results.update(async.get())

        # Save a pickle
        fd = file(save_fn, 'w')
        cPickle.dump(results, fd)
        fd.close()
        out('Saved intermediate pickle to:\n%s'%save_fn)

    # Recreate a fresh tetrodes table
    create_tetrode_table()
    tetrodes_table = get_node('/metadata', 'tetrodes')

    updated = 0
    out('Now updating %s with relative theta power data...'%tetrodes_table._v_pathname)

    for row in tetrodes_table.iterrows():
        tt_id = tuple(int(row[k]) for k in ('rat', 'day', 'tt'))
        tt_str = "rat%03d-%02d-Sc%d"%tt_id
        try:
            rtheta = results[tt_id]
        except KeyError:
            out('Results did not contain %s.'%tt_str, error=True)
            continue

        if len(rtheta) == 0:
            out('No EEG found for tetrode %s'%tt_str)
        else:
            rtheta_score = np.median(rtheta)
            out('Found %d sessions for tetrode %s, median = %.4f'%(
                len(rtheta), tt_str, rtheta_score))

            row['EEG'] = True
            row['relative_theta'] = rtheta_score
            row.update()

            updated += 1
            if updated % 100 == 0:
                out('Flushing updated table data...')
                tetrodes_table.flush()

    flush_file()
    close_file()
예제 #21
0
def starmap(func, iterable, **kwargs):
    """
    A dynamic load balancing parallel implementation of itertools.starmap for IPython.parallel.
    
    The reason for it's existence was twofold.
    First, the desire to easily submit a 'map' onto inputs
      already grouped in tuples in IPython.parallel.
    Second was the ability to submit a 'map' onto very large
      sequences.  Potentially infinite sequences.
    This function allows one to do that.  It is a generator function, so it is iterable.
    It maintains an internal list of returned results that are removed once yielded.
    The iterable passed as an argument need only have a next() method and raise StopIteration
      when it is finished iterating.

    Arguments
    ---------
    *func*   -   The function to be called (remotely) on each iterable.next()
    *iterable* - An iterable, generator, generator function...etc.  Something with a .next() that
                 will raise StopIteration when finished
    *profile*  -  (optional keyword argument.  Default = None) The ipython parallel cluster profile.
                  This function expects the cluster to already be 'up'.  Under the default of None,
                  this will start a client and load balanced view under the default profile, if
                  possible.  If the profile specified is not running, an IO error will be raised.
                  (Ignored if client keyword argument is specified)
    *client*   -  (optional keyword argument.  Default = None) An instance of
                  IPython.parallel.Client
    *max_fill*  - (optional keyword argument.  Default = 500000)The maximum number of
                  'jobs' to submit to the cluster before waiting for earlier jobs to finish.
    *wait*      - (optional keyword argument.  Default = 1)  Number of seconds to wait when
                  submission queue is full, and no further output may be yielded.
    *kwargs*    - Additional keyword arguments are treated as keyword arguments to func.


    A note on the profile and client keyword arguments:  If client is specified, the profile
    kwarg will be ignored.
    
    """
    profile = kwargs.pop('profile', None)
    rc = kwargs.pop('client', None)
    max_fill = kwargs.pop('max_fill', 50000)
    wait = kwargs.pop('wait', 1)
    if rc is None:
        rc = Client(profile=profile)
    elif not isinstance(rc, Client):
        raise ValueError(
            'client keyword value expected an instance of IPython.parallel.Client'
        )
    lbv = rc.load_balanced_view()

    async_results_list = []  #This will serve as our output queue

    while True:  #GO until StopIteration is raised

        if n_queued_jobs(lbv) < max_fill:
            #If there are less than the maximum number of jobs waiting to run,
            #submit the next job, unless we cannot.
            try:
                async_results_list.append(
                    lbv.apply(func, *iterable.next(), **kwargs))

            except StopIteration:
                if len(async_results_list) == 0:
                    raise

        while len(async_results_list) > 0 and async_results_list[0].ready():
            #If there are results ready to be read, pop them off
            yield async_results_list.pop(0).get()

        if n_queued_jobs(lbv) >= max_fill:
            time.sleep(wait)
예제 #22
0
def run_cell(shell, iopub, cell, timeout=300):
    if not hasattr(cell, 'input'):
        return [], False
    shell.execute(cell.input)
    # wait for finish, maximum 5min by default
    reply = shell.get_msg(timeout=timeout)['content']
    if reply['status'] == 'error':
        failed = True
        print("\nFAILURE:")
        print(cell.input)
        print('-----')
        print("raised:")
        print('\n'.join(reply['traceback']))
    else:
        failed = False

    # Collect the outputs of the cell execution
    outs = []
    while True:
        try:
            msg = iopub.get_msg(timeout=0.2)
        except Empty:
            break
        msg_type = msg['msg_type']
        if msg_type in ('status', 'pyin'):
            continue
        elif msg_type == 'clear_output':
            outs = []
            continue

        content = msg['content']
        out = current.NotebookNode(output_type=msg_type)

        if msg_type == 'stream':
            out.stream = content['name']
            out.text = content['data']
        elif msg_type in ('display_data', 'pyout'):
            for mime, data in content['data'].items():
                attr = mime.split('/')[-1].lower()
                # this gets most right, but fix svg+html, plain
                attr = attr.replace('+xml', '').replace('plain', 'text')
                setattr(out, attr, data)
            if msg_type == 'pyout':
                out.prompt_number = content['execution_count']
        elif msg_type == 'pyerr':
            out.ename = content['ename']
            out.evalue = content['evalue']
            out.traceback = content['traceback']
        else:
            print("unhandled iopub msg: %s" % msg_type)

        outs.append(out)

    # Special handling of ipcluster restarts
    if '!ipcluster stop' in cell.input:
        # wait some time for cluster commands to complete
        for i in range(10):
            try:
                if len(Client()) == 0:
                    break
            except FileNotFoundError:
                pass
            sys.stdout.write("@")
            sys.stdout.flush()
            time.sleep(5)
    if '!ipcluster start' in cell.input:
        # wait some time for cluster commands to complete
        for i in range(10):
            try:
                if len(Client()) > 0:
                    break
            except FileNotFoundError:
                pass
            sys.stdout.write("#")
            sys.stdout.flush()
            time.sleep(5)
    return outs, failed
예제 #23
0
def proc_data(data_folder,
              h5name,
              multiproc=False,
              chunk_size=4,
              filetype='aia',
              reffile=None,
              fittype=None,
              calfile=None,
              picts=False,
              **kwargs):

    if filetype == 'aia':
        GcmsObj = gcf.AiaFile
        ends = ('CDF', 'AIA', 'cdf', 'aia')

    files = os.listdir(data_folder)
    files = [f for f in files if f.endswith(ends)]
    files = [os.path.join(data_folder, f) for f in files]

    ref = None
    if reffile:
        if reffile.endswith(('txt', 'TXT')):
            ref = gcr.TxtReference(reffile, **kwargs)

    fit = None
    if fittype:
        if fittype.lower() == 'nnls':
            fit = gcfit.Nnls(**kwargs)

    h5 = gcd.GcmsStore(h5name, **kwargs)

    if multiproc:
        try:
            client = Client()
        except:
            error = "ERROR! You do not have an IPython Cluster running.\n\n"
            error += "Start cluster with: ipcluster start -n # &\n"
            error += "Where # == the number of processors.\n\n"
            error += "Stop cluster with: ipcluster stop"
            print(error)
            h5.close()
            return

        dview = client[:]
        dview.block = True
        dview['ref'] = ref
        dview['fit'] = fit
        dview['GcmsObj'] = GcmsObj
        chunk_size = len(dview)

    # Chunk the data so lots of data files aren't opened in memory.
    for chunk in _chunker(files, chunk_size):
        if multiproc:
            datafiles = dview.map_sync(_proc_file,
                                       [(i, kwargs) for i in chunk])
        else:
            datafiles = [GcmsObj(f, **kwargs) for f in chunk]
            if ref:
                ref(datafiles)
            if fit:
                fit(datafiles)

        h5.append_gcms(datafiles)

    if calfile:
        cal = gcc.Calibrate(h5, **kwargs)
        cal.curvegen(calfile, picts=picts, **kwargs)
        cal.datagen(picts=picts, **kwargs)

    h5.compress()
from IPython.parallel import Client

rc = Client(
    '/Users/sven/.starcluster/ipcluster/'
    'SecurityGroup:@sc-smallcluster-us-east-1.json',
    sshkey='/Users/sven/.ssh/starclusterkey.rsa',
    packer='pickle',
)

view = rc[:]
results = view.map(lambda x: x**30, range(8))
print(results.get())
def runScan(options):
    import os, sys, pickle
    import numpy as np

    scan_settings = pickle.load(
        open(
            os.path.join(user.scans_dir, options.NAME,
                         'OscFit_ScanSettings.pckl')))
    print '\nLLH scan: Running with the following fit settings'
    for one_key in scan_settings['fit_settings']:
        print '\t', one_key, '\t', scan_settings['fit_settings'][one_key]

    total_jobs = scan_settings['dm31_map'].size
    print '\nLLH scan: Total jobs ', total_jobs
    job_script = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                              'oscFit_oneJob_mctest.py')

    if len(options.JOBS) == 0:
        job_range = range(0, total_jobs)
    else:
        job_range = np.array(options.JOBS, dtype=int) - 1

    if options.TEST:
        print 'LLH scan: Running first point as a test!'
        print '\tYou will be asked if you wish to continue before moving on'
        os.system(' '.join(['python', job_script, '1', '1', options.NAME]))

        print 'LLH scan: Continue execution? ... '
        raw_input()

    if options.MODE == 'uge_farm':
        #print job_script
        farm_multiplicity = 1

        if len(options.JOBS) == 0:
            total_jobs = np.ceil(total_jobs * 1. / farm_multiplicity)
            job_array = '1-' + "%i" % total_jobs
            qsub_line = '  '.join([
                'qsub -t', job_array, './oscFit_farmScript.sh', job_script,
                "%i" % farm_multiplicity, options.NAME
            ])
            print qsub_line
            os.system(qsub_line)
        else:
            for job_array in options.JOBS:
                qsub_line = '  '.join([
                    'qsub -t', job_array, './oscFit_farmScript.sh', job_script,
                    "1", options.NAME
                ])
                print qsub_line
                os.system(qsub_line)

    elif options.MODE == 'iparallel':
        from IPython.parallel import Client, interactive
        import iparallel
        rc = Client(profile='sge')
        lview = rc.load_balanced_view()
        result = lview.map_async(doOscFit, [options.NAME] * len(job_range),
                                 job_range)
        iparallel.waitOn(result)

    elif options.MODE == 'local':
        import os
        for i in job_range:
            os.system('  '.join(
                ['python', job_script,
                 "%i" % i, '1', options.NAME]))
    print 'Finished!'
예제 #26
0
def get_lb_view():
    client = Client()
    lb_view = Client.load_balanced_view()
    return lb_view
예제 #27
0
def _ipython_map(func, iterable, cfg):

    import IPython

    if IPython.version_info[0] < 4:
        from IPython.parallel import Client
    else:
        from ipyparallel import Client

    rc = Client()
    rc[:].clear()


    ### Make modules for all dependencies on the engines
    for dep in cfg['dependencies']:
        mod_name = os.path.splitext(
            os.path.basename(dep)
        )[0]


        with open(dep) as f:
            code = f.read()

        code = code.encode('string_escape')

        rc[:].execute(
"""
import imp
import sys

_mod = imp.new_module('{mod_name}')
sys.modules['{mod_name}'] = _mod

exec '''{code}''' in _mod.__dict__

del _mod
""".format(code=code, mod_name=mod_name),
            block=True
        )




    ### Make sure all definitions surrounding the func are present on
    ### the engines (evaluate the code from the file of the func)
    fname = inspect.getfile(func)
    with open(fname) as f:
        code = f.read()


    logger.info("IPython engine IDs: {}".format(rc.ids))


    ## Need to escape all ' and " in order to embed the code into
    ## execute string
    # code = code.replace("\'", "\\\'")
    # code = code.replace("\"", "\\\"")

    code = code.encode('string_escape')


    ## The trick with `exec in {}' is done because we want to avoid
    ## executing `__main__'
    rc[:].execute(
"""
_tmp_dict = dict()
exec '''{code}''' in _tmp_dict
globals().update(_tmp_dict)
del _tmp_dict
""".format(code=code),
        block=True
    )
    # status.wait()

    # res = rc[:].apply(dir)
    # print(res.get())

    wrap = _FuncWrap(func)
    pool = rc.load_balanced_view()

    results = []
    for args in iterable:
        results.append( pool.apply_async(wrap, args) )


    for result in results:
        yield result.get()
예제 #28
0
                          SVC__C=np.logspace(-2, 8, 9, base=3),
                          SVC__gamma=np.logspace(-9, 3, 9, base=3),
                          SVC__kernel=['rbf', 'linear', 'sigmoid'],
                          SVC__class_weight=['balanced'],
                          RDF__n_estimators=range(10, 100, 10),
                          RDF__criterion=['gini', 'entropy'],
                          RDF__max_depth=range(1, len(X_train[0]), 1),
                          RDF__class_weight=['balanced'],
                          ADB__n_estimators=range(50, 500, 50),
                          ADB__learning_rate=np.logspace(-2, 8, 9, base=3),
                          LRC__Cs=range(0, 10, 1),
                          LRC__class_weight=['balanced'])

        best_classifiers = {}

        client = Client(packer="pickle")
        lb_view = client.load_balanced_view()

        for classifier in classifiers:
            ## Method for supplying just the parameter grid entries related to the classifier
            ## in the current interation while excluding the other classifer paramters.
            # dict comprehension method courtesy of BernBarn at:
            # http://stackoverflow.com/questions/14507591/python-dictionary-comprehension
            param_for_class = {
                key: value
                for key, value in param_grid.iteritems()
                if re.search(key.split("_")[0], 'features ' + classifier)
            }

            lb_view.abort()
            time.sleep(4)
"""loader.py
~~~~~~~~~~

loader do cluster
e funcoes auxiliares
"""

%load_ext autoreload
%autoreload 2

import my_func
from IPython.parallel import Client
import numpy as np

rc = Client()
dv = rc[1,2,3,4]
rc.ids
dv
dv.block = True

dv.execute('%load_ext autoreload')
dv.execute('%autoreload 2')
dv.execute('import os')
dv.execute('os.chdir("/home/ubuntu/Dropbox/Research_Code/ML/Nielsen/My_Code")')
dv.execute('import my_func')
dv.execute('reload(my_func)')
dv.apply(my_func.network3_nbb.set_GPU,False)

dv.execute('os.system("rm my_func.pyc")')
dv.execute('os.system("rm network3_nbb.pyc")')
예제 #30
0
 def new_client(self):
     if self._client:
         self._client.close()
     self._client = Client(profile=self.profile)