def fit(self, X, y, sample_weight=None): """ Run fit with all sets of parameters. :param X: array-like, shape = [n_samples, n_features] Training vector, where n_samples is the number of samples and n_features is the number of features. :param y: array-like, shape = [n_samples] or [n_samples, n_output], optional :param sample_weight: array-like, shape = [n_samples], weight """ X, y, sample_weight = check_inputs(X, y, sample_weight=sample_weight, allow_none_weights=True) if self.parallel_profile is None: while self.evaluations_done < self.params_generator.n_evaluations: state_indices, state_dict = self.params_generator.generate_next_point( ) status, value = apply_scorer(self.scorer, state_dict, self.base_estimator, X, y, sample_weight) assert status == 'success', 'Error during grid search ' + str( value) self.params_generator.add_result(state_indices, value) self.evaluations_done += 1 state_string = ", ".join( [k + '=' + str(v) for k, v in state_dict.items()]) self._log('{}: {}'.format(value, state_string)) else: if str.startswith(self.parallel_profile, 'threads'): _, n_threads = str.split(self.parallel_profile, '-') portion = int(n_threads) print("Performing grid search in {} threads".format(portion)) else: from IPython.parallel import Client direct_view = Client( profile=self.parallel_profile).direct_view() portion = len(direct_view) print( "There are {0} cores in cluster, the portion is equal {1}". format(len(direct_view), portion)) while self.evaluations_done < self.params_generator.n_evaluations: state_indices_array, state_dict_array = self.params_generator.generate_batch_points( size=portion) current_portion = len(state_indices_array) result = map_on_cluster( self.parallel_profile, apply_scorer, [self.scorer] * current_portion, state_dict_array, [self.base_estimator] * current_portion, [X] * current_portion, [y] * current_portion, [sample_weight] * current_portion) assert len( result ) == current_portion, "The length of result is very strange" for state_indices, state_dict, (status, score) in zip( state_indices_array, state_dict_array, result): params = ", ".join( [k + '=' + str(v) for k, v in state_dict.items()]) if status != 'success': message = 'Fail during training on the node \nException {exc}\n Parameters {params}' self._log(message.format(exc=score, params=params), level=40) else: self.params_generator.add_result(state_indices, score) self._log("{}: {}".format(score, params)) self.evaluations_done += current_portion print("%i evaluations done" % self.evaluations_done) return self
grid = ns.grid partition = ns.partition Lx = ns.Lx Ly = ns.Ly c = ns.c tstop = ns.tstop if ns.save: user_action = wave_saver else: user_action = None num_cells = 1.0*(grid[0]-1)*(grid[1]-1) final_test = True # create the Client rc = Client(profile=ns.profile) num_procs = len(rc.ids) if partition is None: partition = [1,num_procs] assert partition[0]*partition[1] == num_procs, "can't map partition %s to %i engines"%(partition, num_procs) view = rc[:] print("Running %s system on %s processes until %f" % (grid, partition, tstop)) # functions defining initial/boundary/source conditions def I(x,y): from numpy import exp return 1.5*exp(-100*((x-0.5)**2+(y-0.5)**2)) def f(x,y,t):
def load_client(): global client, view client = Client() view = client.load_balanced_view() client.block = False client[:].use_dill()
# <codecell> strike_vals = np.linspace(min_strike, max_strike, n_strikes) sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas) # <markdowncell> # ## Parallel computation across strike prices and volatilities # <markdowncell> # The Client is used to setup the calculation and works with all engines. # <codecell> c = Client(profile=cluster_profile) # <markdowncell> # A LoadBalancedView is an interface to the engines that provides dynamic load # balancing at the expense of not knowing which engine will execute the code. # <codecell> view = c.load_balanced_view() # <codecell> print("Strike prices: ", strike_vals) print("Volatilities: ", sigma_vals)
del (grid) except: pass return 0 md = '/lcrc/group/earthscience/radar/nexrad/chicago_floods/' idir = md filelist = os.listdir(md) good_files = [] for fl in filelist: if 'KLOT' in fl: good_files.append(idir + fl) good_files.sort() t1 = time() My_Cluster = Client() My_View = My_Cluster[:] print My_View print len(My_View) #Turn off blocking so all engines can work async My_View.block = False #on all engines do an import of Py-ART My_View.execute('import matplotlib') My_View.execute('matplotlib.use("agg")') #Map the code and input to all workers result = My_View.map_async(do_grid_map_gates_to_grid, good_files) #Reduce the result to get a list of output
X_L, X_D = state_tuple _do_analyze = crosscat.LocalEngine._do_analyze return _do_analyze(M_c, T, X_L, X_D, (), num_transitions, (), (), -1, -1, SEED) # set everything up T, M_r, M_c = du.read_model_data_from_csv(filename, gen_seed=gen_seed) num_rows = len(T) num_cols = len(T[0]) col_names = numpy.array( [M_c['idx_to_name'][str(col_idx)] for col_idx in range(num_cols)]) ## set up parallel from IPython.parallel import Client c = Client(ipython_parallel_config) dview = c[:] with dview.sync_imports(): import crosscat import crosscat.LocalEngine import sys if path_append is not None: dview.apply_sync(lambda: sys.path.append(path_append)) # dview.push(dict(M_c=M_c, M_r=M_r, T=T, num_transitions=num_transitions)) seeds = range(num_chains) async_result = dview.map_async(do_intialize, seeds) initialized_states = async_result.get() # async_result = dview.map_async(do_analyze, zip(seeds, initialized_states)) chain_tuples = async_result.get()
from IPython.parallel import Client client = Client() client.ids def fct(): return 'hello' client[:].apply_sync(fct) quit()
import numpy as np from IPython.parallel import Client from sklearn.grid_search import GridSearchCV from sklearn.cross_validation import KFold from sklearn.svm import SVC from sklearn import datasets from sklearn.preprocessing import Scaler from sklearn.utils import shuffle digits = datasets.fetch_mldata("MNIST original") X, y = digits.data, digits.target X, y = shuffle(X, y) X = Scaler().fit_transform(X) params = dict(C=10. ** np.arange(-3, 3), gamma=10. ** np.arange(-3, 3)) rc = Client(profile='sge') view = rc.load_balanced_view() grid = GridSearchCV(SVC(), param_grid=params, cv=KFold(len(y), 4), n_jobs=view) grid.fit(X, y) print(grid.grid_scores_)
from __future__ import division from IPython.parallel import Client Client()[:].execute(''' import __builtin__ __builtin__.__dict__['profile'] = lambda x: x ''') import numpy as np from matplotlib import pyplot as plt from pyhsmm.models import HSMMIntNegBinVariant from pyhsmm.basic.models import MixtureDistribution from library_models import FrozenMixtureDistribution, LibraryHSMMIntNegBinVariant from pyhsmm.basic.distributions import Gaussian, NegativeBinomialIntegerRVariantDuration from pyhsmm.util.text import progprint_xrange ############################# # generate synthetic data # ############################# states_in_hsmm = 5 components_per_GMM = 3 component_hyperparameters = dict(mu_0=np.zeros(2), sigma_0=np.eye(2), kappa_0=0.025, nu_0=3) GMMs = [ MixtureDistribution(alpha_0=4., components=[
def ensure_controller(self, connect_only=False): """Make sure a controller is available, else start a local one. """ if self._client: return self._client if self.profile is None: self._select_profile() if self.profile is None: return None print "parallelflow: using IPython profile %r" % self.profile try: self._client = Client(profile=self.profile) print "parallelflow: connected to controller" return self._client except error.TimeoutError: print "parallelflow: timeout when connecting to controller" if connect_only: start_ctrl = False elif qt_available: res = QtGui.QMessageBox.question( None, "Start controller", "Unable to connect to the configured IPython " "controller. Do you want to start one?", QtGui.QMessageBox.Yes | QtGui.QMessageBox.No) start_ctrl = res == QtGui.QMessageBox.Yes else: start_ctrl = True except IOError: print "parallelflow: didn't find a controller to connect to" if connect_only: start_ctrl = False elif qt_available: res = QtGui.QMessageBox.question( None, "Start controller", "No controller is configured in this IPython profile. " "Do you want to start one?", QtGui.QMessageBox.Yes | QtGui.QMessageBox.No) start_ctrl = res == QtGui.QMessageBox.Yes else: start_ctrl = True if start_ctrl: ctrl_pid = os.path.join(locate_profile(self.profile), 'pid', 'ipcontroller.pid') if os.path.exists(ctrl_pid): os.remove(ctrl_pid) print "parallelflow: starting controller" proc, code = self.start_process( lambda: os.path.exists(ctrl_pid), sys.executable, '-m', 'IPython.parallel.apps.ipcontrollerapp', '--profile=%s' % self.profile) if code is not None: if qt_available: QtGui.QMessageBox.critical( None, "Error", "Controller exited with code %d" % code) print( "parallelflow: controller process exited with " "code %d" % code) return None else: self.started_controller = proc print "parallelflow: controller started, connecting" self._client = Client(profile=self.profile) return self._client return None
def __init__(self): from moi import ctx_default self.demo = Client(profile=ctx_default) self.demo_lview = self.demo.load_balanced_view()
def __init__(self, **kwargs): self.client = Client(**kwargs)
USAGE: $FWR2D_Driver.py -h This will print out the available options. Make sure you modify 'run_No' in this script before submit a new run. Otherwise the result from the last run may be overwritten. """ #The tag of RUN. Each new run should be assigned a new number. run_No = '_140GHz_275t' import time from IPython.parallel import Client c = Client(profile='pbs') #the engine needs time to start, so check when all the engines are connected before take a direct view of the cluster. desired_engine_num = 128 #Make sure this number is EXACTLY the same as the engine number you initiated with ipengine waiting=0 while(len(c) < desired_engine_num and waiting<=86400):#check if the engines are ready, if the engines are not ready after 1 min, something might be wrong. Exit and raise an exception. time.sleep(10) waiting += 10 print(('Waiting for connecting engines: {0} requested, {1} connected. {2}s passed. '.format(desired_engine_num,len(c),waiting))) if(len(c) != desired_engine_num): raise Exception('usable engine number is not the same as the desired engine number! usable:{0}, desired:{1}.\nCheck your cluster status and the desired number set in the Driver script.'.format(len(c),desired_engine_num))
def cluster_view(scheduler, queue, num_jobs, cores_per_job=1, profile=None, start_wait=16, extra_params=None, retries=None, direct=False): """Provide a view on an ipython cluster for processing. - scheduler: The type of cluster to start (lsf, sge, pbs, torque). - num_jobs: Number of jobs to start. - cores_per_job: The number of cores to use for each job. - start_wait: How long to wait for the cluster to startup, in minutes. Defaults to 16 minutes. Set to longer for slow starting clusters. - retries: Number of retries to allow for failed tasks. """ num_jobs = int(num_jobs) cores_per_job = int(cores_per_job) start_wait = int(start_wait) if extra_params is None: extra_params = {} max_delay = start_wait * 60 delay = 5 if extra_params.get("run_local") else 30 max_tries = 10 if profile is None: has_throwaway = True profile = create_throwaway_profile() else: # ensure we have an .ipython directory to prevent issues # creating it during parallel startup cmd = [sys.executable, "-E", "-c", "from IPython import start_ipython; start_ipython()", "profile", "create", "--parallel"] + _get_profile_args(profile) subprocess.check_call(cmd) has_throwaway = False num_tries = 0 cluster_id = str(uuid.uuid4()) url_file = get_url_file(profile, cluster_id) while 1: try: if extra_params.get("run_local"): _start_local(cores_per_job, profile, cluster_id) else: _start(scheduler, profile, queue, num_jobs, cores_per_job, cluster_id, extra_params) break except subprocess.CalledProcessError: if num_tries > max_tries: raise num_tries += 1 time.sleep(delay) try: client = None slept = 0 max_up = 0 up = 0 while not up == num_jobs: up = _nengines_up(url_file) if up < max_up: print ("Engine(s) that were up have shutdown prematurely. " "Aborting cluster startup.") _stop(profile, cluster_id) sys.exit(1) max_up = up time.sleep(delay) slept += delay if slept > max_delay: raise IOError("Cluster startup timed out.") client = Client(url_file, timeout=60) if direct: yield _get_direct_view(client, retries) else: yield _get_balanced_blocked_view(client, retries) finally: if client: _shutdown(client) _stop(profile, cluster_id) if has_throwaway: delete_profile(profile)
minimum_radius_mm = 0.08 / 2 maximum_radius_mm = 2.5 / 2 sigma0 = minimum_radius_mm s = (maximum_radius_mm / minimum_radius_mm)**(1. / num_scales) sigmas = [] sigmas.extend([sigma0 * s**i for i in xrange(0, num_scales)]) print "Sigmas (mm) : ", sigmas sigmas = [x / D.info.scale for x in sigmas] # convert from mm to vox print "Sigmas (vox): ", sigmas # ########################################## # Prepare remote systems (load data, ...) # ########################################## if parallelize: from IPython.parallel import Client rc = Client(profile="wurzel_cluster") #rc = Client(profile_dir="/home/VI/staff/schulz/.ipython/profile_default") rc[:].execute("import os; os.chdir(\"%s\")" % os.getcwd()) lview = rc.load_balanced_view() lview.block = True # ########################################## # Execute cmdl remotely or locally # ########################################## print "Executing..." if parallelize: print "in parallel..." sato = lview.map(parallel_run, [(basename, s) for s in sigmas]) print "done." else: print "Sequential, and finding maxima"
# print n_jobs, n_executed_jobs, rc = Client(profile=profile) n_clusters = len(rc) if n_executed_jobs == 0: n_executed_jobs = n_jobs elif n_executed_jobs < n_clusters: n_jobs = n_executed_jobs if n_jobs >= n_clusters: dview = rc[:] elif n_jobs == -1: dview = rc[:] elif n_jobs < n_clusters: dview = rc[list(np.random.permutation(n_clusters)[:n_executed_jobs])] # A = dview.queue_status() # print A.keys() return dview if __name__ == "__main__": rc = Client(profile='net') A = rc.queue_status() for ii in range(len(rc)): print A[ii] # dview = random_rc('net', -1, 10) # A = dview.queue_status() # print len(dview) # for ii in A.keys(): # print A[ii]
def parfor(task, task_vec, args=None, client=None, view=None, show_scheduling=False, show_progressbar=False): """ Call the function ``tast`` for each value in ``task_vec`` using a cluster of IPython engines. The function ``task`` should have the signature ``task(value, args)`` or ``task(value)`` if ``args=None``. The ``client`` and ``view`` are the IPython.parallel client and load-balanced view that will be used in the parfor execution. If these are ``None``, new instances will be created. Parameters ---------- task: a Python function The function that is to be called for each value in ``task_vec``. task_vec: array / list The list or array of values for which the ``task`` function is to be evaluated. args: list / dictionary The optional additional argument to the ``task`` function. For example a dictionary with parameter values. client: IPython.parallel.Client The IPython.parallel Client instance that will be used in the parfor execution. view: a IPython.parallel.Client view The view that is to be used in scheduling the tasks on the IPython cluster. Preferably a load-balanced view, which is obtained from the IPython.parallel.Client instance client by calling, view = client.load_balanced_view(). show_scheduling: bool {False, True}, default False Display a graph showing how the tasks (the evaluation of ``task`` for for the value in ``task_vec1``) was scheduled on the IPython engine cluster. show_progressbar: bool {False, True}, default False Display a HTML-based progress bar duing the execution of the parfor loop. Returns -------- result : list The result list contains the value of ``task(value, args)`` for each value in ``task_vec``, that is, it should be equivalent to ``[task(v, args) for v in task_vec]``. """ submitted = datetime.datetime.now() if client is None: client = Client() # make sure qutip is available at engines dview = client[:] dview.block = True dview.execute("from qutip import *") if view is None: view = client.load_balanced_view() if args is None: ar_list = [view.apply_async(task, x) for x in task_vec] else: ar_list = [view.apply_async(task, x, args) for x in task_vec] if show_progressbar: n = len(ar_list) pbar = HTMLProgressBar(n) while True: n_finished = sum([ar.progress for ar in ar_list]) pbar.update(n_finished) if view.wait(ar_list, timeout=0.5): pbar.update(n) break else: view.wait(ar_list) if show_scheduling: metadata = [[ ar.engine_id, (ar.started - submitted).total_seconds(), (ar.completed - submitted).total_seconds() ] for ar in ar_list] _visualize_parfor_data(metadata) return [ar.get() for ar in ar_list]
Y.append(y) return Y # set everything up T, M_r, M_c = du.read_model_data_from_csv(filename, gen_seed=gen_seed) num_rows = len(T) num_cols = len(T[0]) col_names = numpy.array([M_c['idx_to_name'][str(col_idx)] for col_idx in range(num_cols)]) engine = LE.LocalEngine(inf_seed) do_remote = False if do_remote: ## set up parallel from IPython.parallel import Client c = Client(profile='ssh', sshserver='*****@*****.**') dview = c[:] dview.execute('import sys') dview.apply_sync(lambda: sys.path.append('/usr/local/')) # with dview.sync_imports(): import tabular_predDB.LocalEngine as LE dview.push(dict( M_c=M_c, M_r=M_r, T=T)) async_result = dview.map_async(lambda SEED: LE.do_initialize(M_c, M_r, T, 'from_the_prior', SEED), range(8)) initialized_states = async_result.get() # async_result = dview.map_async(lambda (SEED, state_tuple): LE.do_analyze(M_c, T, state_tuple[0], state_tuple[1], (), 10, (), (), -1, -1, SEED), zip(range(len(initialized_states)), initialized_states)) chain_tuples = async_result.get()
from IPython.parallel import Client c = Client(profile='mpi') view = c[:] view.activate() # enable magics view.run('psum.py') view.scatter('a',np.arange(16,dtype='float')) view['a'] %px totalsum = psum(a) view['totalsum']
def main(out): save_fn = os.path.join(Config['data_root'], PICKLE_FN) if os.path.exists(save_fn): out('Loading saved pickle from:\n%s'%save_fn) fd = file(save_fn, 'r') results = cPickle.load(fd) fd.close() else: # Set up the parallel engines raw_input('Make sure ipcluster has started... ') rc = Client() dview = rc[:] dview.block = True dview.execute('import cPickle, os, gc') dview.execute('from scanr.config import Config') dview.execute('from scanr.meta import get_tetrode_list, get_maze_list') dview.execute('from scanr.eeg import get_eeg_data, total_power, Theta, FullBand') lview = rc.load_balanced_view() @lview.remote(block=False) def compute_relative_theta(dataset): save_fn = os.path.join(Config['data_root'], 'rat%03d-%02d.pickle'%dataset) if os.path.exists(save_fn): fd = file(save_fn, 'r') res = cPickle.load(fd) fd.close() else: res = {} Theta.zero_lag = False for tt in get_tetrode_list(*dataset): tt_id = tuple(dataset) + (tt,) rtheta = [] for maze in get_maze_list(*dataset): rds = tuple(dataset) + (maze,) X = get_eeg_data(rds, tt) if X is None: continue rtheta.append( total_power(Theta.power(X), fs=Theta.fs) / total_power(FullBand.power(X), fs=FullBand.fs) ) res[tt_id] = rtheta gc.collect() fd = file(save_fn, 'w') cPickle.dump(res, fd) fd.close() return res # Send out compute tasks and wait for completion out('Sending out tasks to the cluster...') async_results = map(compute_relative_theta, list(walk_days())) view_wait_with_status(lview, out, timeout=STATUS_PERIOD) # Collate results into flattened list of cell info dictionaries out('Collating results...') results = {} for async in async_results: results.update(async.get()) # Save a pickle fd = file(save_fn, 'w') cPickle.dump(results, fd) fd.close() out('Saved intermediate pickle to:\n%s'%save_fn) # Recreate a fresh tetrodes table create_tetrode_table() tetrodes_table = get_node('/metadata', 'tetrodes') updated = 0 out('Now updating %s with relative theta power data...'%tetrodes_table._v_pathname) for row in tetrodes_table.iterrows(): tt_id = tuple(int(row[k]) for k in ('rat', 'day', 'tt')) tt_str = "rat%03d-%02d-Sc%d"%tt_id try: rtheta = results[tt_id] except KeyError: out('Results did not contain %s.'%tt_str, error=True) continue if len(rtheta) == 0: out('No EEG found for tetrode %s'%tt_str) else: rtheta_score = np.median(rtheta) out('Found %d sessions for tetrode %s, median = %.4f'%( len(rtheta), tt_str, rtheta_score)) row['EEG'] = True row['relative_theta'] = rtheta_score row.update() updated += 1 if updated % 100 == 0: out('Flushing updated table data...') tetrodes_table.flush() flush_file() close_file()
def starmap(func, iterable, **kwargs): """ A dynamic load balancing parallel implementation of itertools.starmap for IPython.parallel. The reason for it's existence was twofold. First, the desire to easily submit a 'map' onto inputs already grouped in tuples in IPython.parallel. Second was the ability to submit a 'map' onto very large sequences. Potentially infinite sequences. This function allows one to do that. It is a generator function, so it is iterable. It maintains an internal list of returned results that are removed once yielded. The iterable passed as an argument need only have a next() method and raise StopIteration when it is finished iterating. Arguments --------- *func* - The function to be called (remotely) on each iterable.next() *iterable* - An iterable, generator, generator function...etc. Something with a .next() that will raise StopIteration when finished *profile* - (optional keyword argument. Default = None) The ipython parallel cluster profile. This function expects the cluster to already be 'up'. Under the default of None, this will start a client and load balanced view under the default profile, if possible. If the profile specified is not running, an IO error will be raised. (Ignored if client keyword argument is specified) *client* - (optional keyword argument. Default = None) An instance of IPython.parallel.Client *max_fill* - (optional keyword argument. Default = 500000)The maximum number of 'jobs' to submit to the cluster before waiting for earlier jobs to finish. *wait* - (optional keyword argument. Default = 1) Number of seconds to wait when submission queue is full, and no further output may be yielded. *kwargs* - Additional keyword arguments are treated as keyword arguments to func. A note on the profile and client keyword arguments: If client is specified, the profile kwarg will be ignored. """ profile = kwargs.pop('profile', None) rc = kwargs.pop('client', None) max_fill = kwargs.pop('max_fill', 50000) wait = kwargs.pop('wait', 1) if rc is None: rc = Client(profile=profile) elif not isinstance(rc, Client): raise ValueError( 'client keyword value expected an instance of IPython.parallel.Client' ) lbv = rc.load_balanced_view() async_results_list = [] #This will serve as our output queue while True: #GO until StopIteration is raised if n_queued_jobs(lbv) < max_fill: #If there are less than the maximum number of jobs waiting to run, #submit the next job, unless we cannot. try: async_results_list.append( lbv.apply(func, *iterable.next(), **kwargs)) except StopIteration: if len(async_results_list) == 0: raise while len(async_results_list) > 0 and async_results_list[0].ready(): #If there are results ready to be read, pop them off yield async_results_list.pop(0).get() if n_queued_jobs(lbv) >= max_fill: time.sleep(wait)
def run_cell(shell, iopub, cell, timeout=300): if not hasattr(cell, 'input'): return [], False shell.execute(cell.input) # wait for finish, maximum 5min by default reply = shell.get_msg(timeout=timeout)['content'] if reply['status'] == 'error': failed = True print("\nFAILURE:") print(cell.input) print('-----') print("raised:") print('\n'.join(reply['traceback'])) else: failed = False # Collect the outputs of the cell execution outs = [] while True: try: msg = iopub.get_msg(timeout=0.2) except Empty: break msg_type = msg['msg_type'] if msg_type in ('status', 'pyin'): continue elif msg_type == 'clear_output': outs = [] continue content = msg['content'] out = current.NotebookNode(output_type=msg_type) if msg_type == 'stream': out.stream = content['name'] out.text = content['data'] elif msg_type in ('display_data', 'pyout'): for mime, data in content['data'].items(): attr = mime.split('/')[-1].lower() # this gets most right, but fix svg+html, plain attr = attr.replace('+xml', '').replace('plain', 'text') setattr(out, attr, data) if msg_type == 'pyout': out.prompt_number = content['execution_count'] elif msg_type == 'pyerr': out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] else: print("unhandled iopub msg: %s" % msg_type) outs.append(out) # Special handling of ipcluster restarts if '!ipcluster stop' in cell.input: # wait some time for cluster commands to complete for i in range(10): try: if len(Client()) == 0: break except FileNotFoundError: pass sys.stdout.write("@") sys.stdout.flush() time.sleep(5) if '!ipcluster start' in cell.input: # wait some time for cluster commands to complete for i in range(10): try: if len(Client()) > 0: break except FileNotFoundError: pass sys.stdout.write("#") sys.stdout.flush() time.sleep(5) return outs, failed
def proc_data(data_folder, h5name, multiproc=False, chunk_size=4, filetype='aia', reffile=None, fittype=None, calfile=None, picts=False, **kwargs): if filetype == 'aia': GcmsObj = gcf.AiaFile ends = ('CDF', 'AIA', 'cdf', 'aia') files = os.listdir(data_folder) files = [f for f in files if f.endswith(ends)] files = [os.path.join(data_folder, f) for f in files] ref = None if reffile: if reffile.endswith(('txt', 'TXT')): ref = gcr.TxtReference(reffile, **kwargs) fit = None if fittype: if fittype.lower() == 'nnls': fit = gcfit.Nnls(**kwargs) h5 = gcd.GcmsStore(h5name, **kwargs) if multiproc: try: client = Client() except: error = "ERROR! You do not have an IPython Cluster running.\n\n" error += "Start cluster with: ipcluster start -n # &\n" error += "Where # == the number of processors.\n\n" error += "Stop cluster with: ipcluster stop" print(error) h5.close() return dview = client[:] dview.block = True dview['ref'] = ref dview['fit'] = fit dview['GcmsObj'] = GcmsObj chunk_size = len(dview) # Chunk the data so lots of data files aren't opened in memory. for chunk in _chunker(files, chunk_size): if multiproc: datafiles = dview.map_sync(_proc_file, [(i, kwargs) for i in chunk]) else: datafiles = [GcmsObj(f, **kwargs) for f in chunk] if ref: ref(datafiles) if fit: fit(datafiles) h5.append_gcms(datafiles) if calfile: cal = gcc.Calibrate(h5, **kwargs) cal.curvegen(calfile, picts=picts, **kwargs) cal.datagen(picts=picts, **kwargs) h5.compress()
from IPython.parallel import Client rc = Client( '/Users/sven/.starcluster/ipcluster/' 'SecurityGroup:@sc-smallcluster-us-east-1.json', sshkey='/Users/sven/.ssh/starclusterkey.rsa', packer='pickle', ) view = rc[:] results = view.map(lambda x: x**30, range(8)) print(results.get())
def runScan(options): import os, sys, pickle import numpy as np scan_settings = pickle.load( open( os.path.join(user.scans_dir, options.NAME, 'OscFit_ScanSettings.pckl'))) print '\nLLH scan: Running with the following fit settings' for one_key in scan_settings['fit_settings']: print '\t', one_key, '\t', scan_settings['fit_settings'][one_key] total_jobs = scan_settings['dm31_map'].size print '\nLLH scan: Total jobs ', total_jobs job_script = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'oscFit_oneJob_mctest.py') if len(options.JOBS) == 0: job_range = range(0, total_jobs) else: job_range = np.array(options.JOBS, dtype=int) - 1 if options.TEST: print 'LLH scan: Running first point as a test!' print '\tYou will be asked if you wish to continue before moving on' os.system(' '.join(['python', job_script, '1', '1', options.NAME])) print 'LLH scan: Continue execution? ... ' raw_input() if options.MODE == 'uge_farm': #print job_script farm_multiplicity = 1 if len(options.JOBS) == 0: total_jobs = np.ceil(total_jobs * 1. / farm_multiplicity) job_array = '1-' + "%i" % total_jobs qsub_line = ' '.join([ 'qsub -t', job_array, './oscFit_farmScript.sh', job_script, "%i" % farm_multiplicity, options.NAME ]) print qsub_line os.system(qsub_line) else: for job_array in options.JOBS: qsub_line = ' '.join([ 'qsub -t', job_array, './oscFit_farmScript.sh', job_script, "1", options.NAME ]) print qsub_line os.system(qsub_line) elif options.MODE == 'iparallel': from IPython.parallel import Client, interactive import iparallel rc = Client(profile='sge') lview = rc.load_balanced_view() result = lview.map_async(doOscFit, [options.NAME] * len(job_range), job_range) iparallel.waitOn(result) elif options.MODE == 'local': import os for i in job_range: os.system(' '.join( ['python', job_script, "%i" % i, '1', options.NAME])) print 'Finished!'
def get_lb_view(): client = Client() lb_view = Client.load_balanced_view() return lb_view
def _ipython_map(func, iterable, cfg): import IPython if IPython.version_info[0] < 4: from IPython.parallel import Client else: from ipyparallel import Client rc = Client() rc[:].clear() ### Make modules for all dependencies on the engines for dep in cfg['dependencies']: mod_name = os.path.splitext( os.path.basename(dep) )[0] with open(dep) as f: code = f.read() code = code.encode('string_escape') rc[:].execute( """ import imp import sys _mod = imp.new_module('{mod_name}') sys.modules['{mod_name}'] = _mod exec '''{code}''' in _mod.__dict__ del _mod """.format(code=code, mod_name=mod_name), block=True ) ### Make sure all definitions surrounding the func are present on ### the engines (evaluate the code from the file of the func) fname = inspect.getfile(func) with open(fname) as f: code = f.read() logger.info("IPython engine IDs: {}".format(rc.ids)) ## Need to escape all ' and " in order to embed the code into ## execute string # code = code.replace("\'", "\\\'") # code = code.replace("\"", "\\\"") code = code.encode('string_escape') ## The trick with `exec in {}' is done because we want to avoid ## executing `__main__' rc[:].execute( """ _tmp_dict = dict() exec '''{code}''' in _tmp_dict globals().update(_tmp_dict) del _tmp_dict """.format(code=code), block=True ) # status.wait() # res = rc[:].apply(dir) # print(res.get()) wrap = _FuncWrap(func) pool = rc.load_balanced_view() results = [] for args in iterable: results.append( pool.apply_async(wrap, args) ) for result in results: yield result.get()
SVC__C=np.logspace(-2, 8, 9, base=3), SVC__gamma=np.logspace(-9, 3, 9, base=3), SVC__kernel=['rbf', 'linear', 'sigmoid'], SVC__class_weight=['balanced'], RDF__n_estimators=range(10, 100, 10), RDF__criterion=['gini', 'entropy'], RDF__max_depth=range(1, len(X_train[0]), 1), RDF__class_weight=['balanced'], ADB__n_estimators=range(50, 500, 50), ADB__learning_rate=np.logspace(-2, 8, 9, base=3), LRC__Cs=range(0, 10, 1), LRC__class_weight=['balanced']) best_classifiers = {} client = Client(packer="pickle") lb_view = client.load_balanced_view() for classifier in classifiers: ## Method for supplying just the parameter grid entries related to the classifier ## in the current interation while excluding the other classifer paramters. # dict comprehension method courtesy of BernBarn at: # http://stackoverflow.com/questions/14507591/python-dictionary-comprehension param_for_class = { key: value for key, value in param_grid.iteritems() if re.search(key.split("_")[0], 'features ' + classifier) } lb_view.abort() time.sleep(4)
"""loader.py ~~~~~~~~~~ loader do cluster e funcoes auxiliares """ %load_ext autoreload %autoreload 2 import my_func from IPython.parallel import Client import numpy as np rc = Client() dv = rc[1,2,3,4] rc.ids dv dv.block = True dv.execute('%load_ext autoreload') dv.execute('%autoreload 2') dv.execute('import os') dv.execute('os.chdir("/home/ubuntu/Dropbox/Research_Code/ML/Nielsen/My_Code")') dv.execute('import my_func') dv.execute('reload(my_func)') dv.apply(my_func.network3_nbb.set_GPU,False) dv.execute('os.system("rm my_func.pyc")') dv.execute('os.system("rm network3_nbb.pyc")')
def new_client(self): if self._client: self._client.close() self._client = Client(profile=self.profile)