def setupJoblib(ipp_profile='default'): from sklearn.externals.joblib import Parallel, parallel_backend, register_parallel_backend import ipyparallel as ipp from ipyparallel.joblib import IPythonParallelBackend global joblib_rc,joblib_view,joblib_be joblib_rc = ipp.Client(profile=ipp_profile) joblib_view = joblib_rc.load_balanced_view() joblib_be = IPythonParallelBackend(view=joblib_view) register_parallel_backend('ipyparallel',lambda : joblib_be,make_default=True)
def setupJoblib(self, ipp_profile='default', cluster_id=None): """ Method to set ipyparallel backend to a running ipcluster Arguments --------- ipp_profile : string Name of ipcluster profile for the started ipcluster that will be set up """ import ipyparallel as ipp from ipyparallel.joblib import IPythonParallelBackend global joblib_rc, joblib_view, joblib_be joblib_rc = ipp.Client(profile=ipp_profile, cluster_id=cluster_id) joblib_view = joblib_rc.load_balanced_view() joblib_be = IPythonParallelBackend(view=joblib_view) register_parallel_backend('ipyparallel', lambda: joblib_be, make_default=True) self.backend = 'ipyparallel'
profile = args.profile logging.basicConfig(filename=os.path.join(FILE_DIR,profile+'.log'), filemode='w', level=logging.DEBUG) logging.info("number of CPUs found: {0}".format(cpu_count())) logging.info("args.profile: {0}".format(profile)) #prepare the engines c = Client(profile=profile) #The following command will make sure that each engine is running in # the right working directory to access the custom function(s). c[:].map(os.chdir, [FILE_DIR]*len(c)) logging.info("c.ids :{0}".format(str(c.ids))) bview = c.load_balanced_view() register_parallel_backend('ipyparallel', lambda : IPythonParallelBackend(view=bview)) #Get data digits = load_digits() #prepare it for the custom function #it would be better to use cross-validation #outside the scope of this tutorial X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.3) #some parameters to test in parallel param_space = { 'C': np.logspace(-6, 6, 20), 'gamma': np.logspace(-6,1,20) }
# use ipython parallel for optimization if options.optimize: print('--- setting up optimization cluster') from sklearn.externals.joblib import Parallel, parallel_backend, register_parallel_backend import ipyparallel as ipp from ipyparallel import Client from ipyparallel.joblib import IPythonParallelBackend global joblib_rc, joblib_view, joblib_be joblib_rc = ipp.Client(profile=options.cluster) targets = None if options.cluster_nodes is not None: targets = [int(x) for x in options.cluster_nodes.split(",") if x != ""] joblib_view = joblib_rc.load_balanced_view(targets=targets) njobs = len(joblib_view) joblib_be = IPythonParallelBackend(view=joblib_view) register_parallel_backend('ipyparallel', lambda: joblib_be, make_default=True) print('will run %d jobs on %s (targets %s)' % (njobs, options.cluster, targets)) print('\n') # get features and target X = df[features] #.values y = df['label'] #.values w = df['wgt'] #.values # instantiate classifier from xgboost import XGBClassifier