def dask_cluster(): cluster = LocalCluster(n_workers=2, threads_per_worker=2) yield cluster cluster.close()
def compare(X): if ('client' in X) == False: #start DASK if required c = LocalCluster(n_workers=X['workers'].value) X['client'] = Client(c) if X['Mtype'].value == 'Magnetisations': Mswitch = 0 else: Mswitch = 1 X['Mswitch'] = Mswitch # Create variables M = X['M'] DM = X['DM'] X['Hc'] = 0.5 * (X['H'] - X['Hr']) X['Hb'] = 0.5 * (X['H'] + X['Hr']) X['Mnorm'] = M / np.max(M) X['DMnorm'] = DM / np.max(DM) #X['Xlsq'] = np.column_stack((np.ones((X['Hc'].size,1)),X['Hc'],X['Hb'],X['Hc']**2,X['Hb']**2,X['Hc']*X['Hb'],X['Hc']**3,X['Hb']**3,X['Hc']**2*X['Hb'],X['Hc']*X['Hb']**2)) X['Xlsq'] = np.column_stack( (np.ones((X['Hc'].size, 1)), X['H'], X['Hr'], X['H']**2, X['Hr']**2, X['H'] * X['Hr'], X['H']**3, X['Hr']**3, X['H']**2 * X['Hr'], X['H'] * X['Hr']**2)) idx = np.argwhere(in_window(X, X['Hc'], X['Hb']) == True) X['Hc0'] = X['Hc'][idx] X['Hb0'] = X['Hb'][idx] #scatter variables D = {} D['Xlsq'] = X['Xlsq'] D['M'] = X['Mnorm'] D['DM'] = X['DMnorm'] D['Hc'] = X['Hc'] D['Hb'] = X['Hb'] D['dH'] = X['dH'] D['Hc0'] = X['Hc0'] D['Hb0'] = X['Hb0'] X['Ds'] = X['client'].scatter(D, broadcast=True) Ntot = np.size(X['Hc0']) np.random.seed(999) Didx = np.sort(np.random.choice(Ntot, X['Ndown'].value, replace=False)) #downsampled indicies X = variforc_array(X) #get smoothing parameter jobs = [] for i in range(len(X['Sp_i'])): job = X['client'].submit(process_split, X['Ds'], X['Sp_i'][i], Didx, Mswitch) jobs.append(job) results = X['client'].gather(jobs) L = results[0] for i in range(len(results) - 1): L = np.concatenate((L, results[i + 1])) X['L'] = L #Make results plots i0 = np.argmax(L[:, 2]) if (Mswitch < 0.5): BF = regress_split(X['Xlsq'], X['Mnorm'], X['Hc'], X['Hb'], X['dH'], X['Hc'], X['Hb'], X['Sp'][i0, 0], X['Sp'][i0, 1], X['Sp'][i0, 4], X['Sp'][i0, 2], X['Sp'][i0, 3], X['Sp'][i0, 4]) else: BF = regress_split(X['Xlsq'], X['DMnorm'], X['Hc'], X['Hb'], X['dH'], X['Hc'], X['Hb'], X['Sp'][i0, 0], X['Sp'][i0, 1], X['Sp'][i0, 4], X['Sp'][i0, 2], X['Sp'][i0, 3], X['Sp'][i0, 4]) BF[np.isinf(BF)] = 1E200 X['BF'] = BF X['Pr'] = np.exp(BF - logsumexp(BF, axis=1)[:, np.newaxis]) #Lpt provides labels to points for selected model order Lpt = np.argmax(BF - [np.log(3), 0, np.log(3), np.log(3)], axis=1) Lpt[np.max(X['BF'], axis=1) < 1] = 0 X = plot_model_selection(X, Lpt[idx]) return X
def combine_probes_memory_efficient(probes_summary_dir, util_dir, n_workers): cluster = LocalCluster(n_workers=n_workers, threads_per_worker=1, memory_limit="0") client = Client(cluster) probes_filenames = glob.glob('{}/*_probes.csv'.format(probes_summary_dir)) blast_lineage = pd.read_csv('{}/blast_lineage.tab'.format(util_dir), sep='\t') taxonomic_levels = [ 'phylum', 'class', 'order', 'family', 'genus', 'species' ] blast_lineage_slim = blast_lineage.loc[:, ['molecule_id'] + taxonomic_levels] index_list = np.arange(0, len(probes_filenames), 200) index_list = np.append(index_list, len(probes_filenames)) extended_taxonomic_levels = [ 'superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'molecule_id' ] probes_summary_full = [] probes_properties_full = [] for i in range(len(index_list) - 1): print('Summarizing probes in group {} out of {} groups...'.format( i + 1, len(index_list) - 1)) probes = dd.read_csv(probes_filenames[index_list[i]:index_list[i + 1]]) probes['molecule_id'] = probes.source.apply(get_molecule_id, meta=('str')) probes = probes.merge(blast_lineage_slim, on='molecule_id', how='left') probes['superkingdom'] = 2 probes_taxa = probes.loc[:, ['seq'] + extended_taxonomic_levels] probes_summary = probes_taxa.groupby('seq').apply( calculate_source, meta=[('superkingdom', 'str'), ('phylum', 'str'), ('class', 'str'), ('order', 'str'), ('family', 'str'), ('genus', 'str'), ('species', 'str'), ('molecule_id', 'str')]) probes_properties = probes.loc[:, [ 'seq', 'length', 'Tm', 'GC', 'N', 'self_any_th', 'self_end_th', 'hair-pin', 'quality' ]] probes_summary = probes_summary.compute() probes_summary = probes_summary.reset_index() probes_properties = probes_properties.drop_duplicates().compute() probes_properties_full.append(probes_properties) probes_summary_full.append(probes_summary) probes_summary_full = pd.concat(probes_summary_full).drop_duplicates() probes_properties_full = pd.concat( probes_properties_full).drop_duplicates() probes_taxa_full_dd = dd.from_pandas(probes_summary_full, npartitions=1000) probes_summary_consolidate = probes_taxa_full_dd.groupby('seq').apply( consolidate_source, meta=[('superkingdom', 'str'), ('phylum', 'str'), ('class', 'str'), ('order', 'str'), ('family', 'int'), ('genus', 'str'), ('species', 'str'), ('molecule_id', 'str'), ('max_design_level_numeric', 'int'), ('max_design_level', 'str'), ('max_design_target', 'str')]) probes_summary_compute = probes_summary_consolidate.reset_index() probes_summary_compute = probes_summary_compute.compute() probes_summary_compute = probes_summary_compute.merge( probes_properties_full, on='seq', how='left', copy=False) client.close() cluster.close() probe_summary_filename = '{}/probes_summary.h5'.format(probes_summary_dir) probes_summary_compute[ 'max_design_target'] = probes_summary_compute.max_design_target.astype( str) taxonomic_levels = [ 'superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'molecule_id' ] for i in range(8): probes_summary_working_design_level = probes_summary_compute.loc[ probes_summary_compute.max_design_level_numeric.values >= i, :] probes_summary_working_design_level.loc[:, 'design_level'] = taxonomic_levels[ i] probes_summary_working_design_level.loc[:, 'design_target'] = probes_summary_working_design_level.loc[:, taxonomic_levels[ i]] probes_summary_working_design_level.groupby( ['design_level', 'design_target']).apply(write_to_hdf, probe_summary_filename) return
import dask as da from dask.distributed import LocalCluster, Client from datetime import date import glob import numpy as np import time import xarray as xr %pylab inline local_dir = "/g/data/e14/cp3790/dask-workers" #Replace this with your local directory cluster = LocalCluster(processes=False, local_dir=local_dir) client = Client(cluster) def reshape_data(da): da_groupby = list(da.groupby('time.dayofyear')) dayofyear = [] da_dayofyear = [] for item in list(da_groupby): dayofyear.append(item[0]) da_tmp = item[1] da_tmp['time'] = da_tmp['time.year'] da_tmp = da_tmp.rename({'time': 'year'}) da_tmp = da_tmp.assign_coords(dayofyear=item[0]) da_dayofyear.append(da_tmp) da_reshaped = xr.concat(da_dayofyear, dim='dayofyear') return da_reshaped files = sorted(glob.glob('/g/data/e14/cp3790/Charuni/ERA5-new/era5_dailytmax_*.nc')) obs_aus = (xr.open_mfdataset(files, combine='nested', concat_dim='time', chunks={'latitude': 10}) .sel(time=slice('1983', '2012'), longitude=slice(113, 154), latitude=slice(-10, -44)))
def client(): cluster = LocalCluster(n_workers=2) client = Client(cluster) yield client client.close() cluster.close()
for filename in os.listdir(folder): #print(filename) infilename = os.path.join(folder, filename) print(infilename) x = glob.glob(infilename + "/*/") list_stations_meteo = [] for path in x: print(path) os.chdir(infilename + path.split(infilename)[1]) var = path.split('/')[-2] #client = Client(n_workers=int(multiprocessing.cpu_count())) #client = Client(n_workers=3) cluster = LocalCluster(n_workers=3, processes=True, threads_per_worker=3) client = Client(cluster) print(os.getcwd()) ds = xr.open_mfdataset( glob.glob(os.getcwd() + '/*' + var + '*.grib2'), concat_dim='valid_time', engine='cfgrib', combine='nested', parallel=True, chunks={ "x": -1, "y": -1 }, coords='minimal', compat='override') #ai-je besoin de faire les chunk?
# Print out the EntitySet print(es_train) print(es_test) #%% logging.debug("Writing TRAIN entity set".format()) es_train.to_pickle(os.path.join(PATH_OFFLINE, "entity set TRAIN.pck")) logging.debug("Done writing TRAIN entity set".format()) logging.debug("Writing TEST entity set".format()) es_test.to_pickle(os.path.join(PATH_OFFLINE, "entity set TEST.pck")) logging.debug("Done writing TEST entity set".format()) #%% n_workers = 12 n_workers = 6 cluster = LocalCluster(n_workers=n_workers, silence_logs=False) dir(cluster) print(cluster) #%% Feature generation # All features to depth 2 def gen_features_all(): # Default primitives from featuretools default_agg_primitives = [ "sum", "std", "max", "skew", "min", "mean", "count", "percent_true", "num_unique", "mode" ] default_trans_primitives = [ "day", "year", "month", "weekday", "haversine", "numwords", "characters"
ncfiles = glob.glob(path + '/**/grid*.nc', recursive=True) ncfiles.sort() d = xr.open_dataset(ncfiles[0]) relCOMS = d.RELCOM ind_receptor = d.ind_receptor d.close() if e_time == None: e_time = pd.to_datetime(ncfiles[-1][-17:-3]).strftime('%Y-%m-%d') if s_time == None: s_time = pd.to_datetime(ncfiles[0][-17:-3]).strftime('%Y-%m-%d') print(create_client) if create_client == True: cluster = LocalCluster(n_workers=32, threads_per_worker=1, memory_limit='16GB') client = Client(cluster) print(cluster) date_slice = slice(s_time, e_time) if ind_receptor == 1: f_name = 'Conc' elif ind_receptor == 3: f_name = 'WetDep' elif ind_receptor == 4: f_name = 'DryDep' else: f_name = 'Unknown' dir_p = outpath + '/' + f_name + '_mean_{}_{}'.format(s_time, e_time)
logging.info("Starting Analyzer: {}".format(sid)) if sid not in self._analyzers: raise RuntimeError("Analyzer not found") else: self._analyzers[sid].start() def on_stop(self, sid): logging.info("Stopping Analyzer: {}".format(sid)) if sid not in self._analyzers: raise RuntimeError("Analyzer not found") else: self._analyzers[sid].stop() if __name__ == "__main__": cluster = LocalCluster(n_workers=0) # Add GPU workers # TODO: Get the number of GPU from configuration file cluster.start_worker(name="GPU_WORKER-1", resources={"GPU": 1}) with cluster, Client(cluster.scheduler_address) as client: # Initialize GPU workers results = client.run(gpu_worker.init_worker, ".") assert all([v == "OK" for _, v in results.items() ]), "Failed to initialize GPU workers" # Start analyzer manager io_loop = asyncio.get_event_loop() manager = AnalyzerManager(cluster, io_loop, ["nats://localhost:4222"]) io_loop.run_forever()
}, ) return {"wf": wf, "acc": acc, "mol": mol, "mf": mf, "descriptors": descriptors, "descriptors_tbdm": descriptors_tbdm} if __name__ == "__main__": import pyqmc import pyqmc.dasktools from pyqmc.dasktools import line_minimization, cvmc_optimize from dask.distributed import Client, LocalCluster r = 1.1 ncore = 2 sys = setuph2(r) cluster = LocalCluster(n_workers=ncore, threads_per_worker=1) client = Client(cluster) # Set up calculation nconf = 800 configs = pyqmc.initial_guess(sys["mol"], nconf) wf, df = line_minimization( sys["wf"], configs, pyqmc.gradient_generator(sys["mol"], sys["wf"]), client=client, maxiters=5, ) forcing = {} obj = {}
def set_client(args, stack, log): from omegaconf import open_dict # number of threads per worker if args.nthreads is None: if args.host_address is not None: raise ValueError( "You have to specify nthreads when using a distributed scheduler" ) import multiprocessing nthreads = multiprocessing.cpu_count() with open_dict(args): args.nthreads = nthreads else: nthreads = int(args.nthreads) # configure memory limit if args.mem_limit is None: if args.host_address is not None: raise ValueError( "You have to specify mem-limit when using a distributed scheduler" ) import psutil mem_limit = int(psutil.virtual_memory()[1] / 1e9) # all available memory by default with open_dict(args): args.mem_limit = mem_limit else: mem_limit = int(args.mem_limit) if args.nworkers is None: raise ValueError("You have to specify the number of workers") else: nworkers = args.nworkers if args.nthreads_per_worker is None: nthreads_per_worker = 1 with open_dict(args): args.nthreads_per_worker = nthreads_per_worker else: nthreads_per_worker = int(args.nthreads_per_worker) # the number of chunks being read in simultaneously is equal to # the number of dask threads nthreads_dask = nworkers * nthreads_per_worker if args.nvthreads is None: if args.host_address is not None: nvthreads = nthreads // nthreads_per_worker else: nvthreads = nthreads // nthreads_dask with open_dict(args): args.nvthreads = nvthreads os.environ["OMP_NUM_THREADS"] = str(args.nvthreads) os.environ["OPENBLAS_NUM_THREADS"] = str(args.nvthreads) os.environ["MKL_NUM_THREADS"] = str(args.nvthreads) os.environ["VECLIB_MAXIMUM_THREADS"] = str(args.nvthreads) os.environ["NUMBA_NUM_THREADS"] = str(args.nvthreads) # TODO - does this result in thread over-subscription? os.environ["NUMEXPR_NUM_THREADS"] = str(args.nvthreads) # set up client if args.host_address is not None: from distributed import Client print("Initialising distributed client.", file=log) client = stack.enter_context(Client(address)) else: if nthreads_dask * args.nvthreads > args.nthreads: print( "Warning - you are attempting to use more threads than available. " "This may lead to suboptimal performance.", file=log) from dask.distributed import Client, LocalCluster print("Initialising client with LocalCluster.", file=log) cluster = LocalCluster(processes=True, n_workers=nworkers, threads_per_worker=nthreads_per_worker, memory_limit=str(mem_limit / nworkers) + 'GB') cluster = stack.enter_context(cluster) client = stack.enter_context(Client(cluster)) from pfb.scheduling import install_plugin client.run_on_scheduler(install_plugin) # return updated args return args
import time from dask.distributed import Client, LocalCluster import sys print(sys.argv) worker = int(sys.argv[1]) if __name__ == '__main__': cluster = LocalCluster( n_workers=worker, scheduler_port=8786, host='0.0.0.0', dashboard_address='0.0.0.0:8787', ) while True: time.sleep(600)
def execute(self, pipeline_context, execution_plan): check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) check.param_invariant( isinstance(pipeline_context.executor, DaskExecutor), 'pipeline_context', 'Expected executor to be DaskExecutor got {}'.format( pipeline_context.executor), ) # Checks to ensure storage is compatible with Dask configuration storage = pipeline_context.run_config.get('storage') check.invariant(storage.keys(), 'Must specify storage to use Dask execution') check.invariant( pipeline_context.instance.is_persistent, 'Dask execution requires a persistent DagsterInstance', ) # https://github.com/dagster-io/dagster/issues/2440 check.invariant( pipeline_context.system_storage_def.is_persistent, 'Cannot use in-memory storage with Dask, use filesystem, S3, or GCS', ) step_levels = execution_plan.execution_step_levels() pipeline_name = pipeline_context.pipeline_def.name instance = pipeline_context.instance cluster_type = self.cluster_type if cluster_type == 'local': from dask.distributed import LocalCluster cluster = LocalCluster(**self.build_dict(pipeline_name)) elif cluster_type == 'yarn': from dask_yarn import YarnCluster cluster = YarnCluster(**self.build_dict(pipeline_name)) elif cluster_type == 'ssh': from dask.distributed import SSHCluster cluster = SSHCluster(**self.build_dict(pipeline_name)) elif cluster_type == 'pbs': from dask_jobqueue import PBSCluster cluster = PBSCluster(**self.build_dict(pipeline_name)) elif cluster_type == 'moab': from dask_jobqueue import MoabCluster cluster = MoabCluster(**self.build_dict(pipeline_name)) elif cluster_type == 'sge': from dask_jobqueue import SGECluster cluster = SGECluster(**self.build_dict(pipeline_name)) elif cluster_type == 'lsf': from dask_jobqueue import LSFCluster cluster = LSFCluster(**self.build_dict(pipeline_name)) elif cluster_type == 'slurm': from dask_jobqueue import SLURMCluster cluster = SLURMCluster(**self.build_dict(pipeline_name)) elif cluster_type == 'oar': from dask_jobqueue import OARCluster cluster = OARCluster(**self.build_dict(pipeline_name)) elif cluster_type == 'kube': from dask_kubernetes import KubeCluster cluster = KubeCluster(**self.build_dict(pipeline_name)) else: raise ValueError( f"Must be providing one of the following ('local', 'yarn', 'ssh', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube') not {cluster_type}" ) with dask.distributed.Client(cluster) as client: execution_futures = [] execution_futures_dict = {} for step_level in step_levels: for step in step_level: # We ensure correctness in sequencing by letting Dask schedule futures and # awaiting dependencies within each step. dependencies = [] for step_input in step.step_inputs: for key in step_input.dependency_keys: dependencies.append(execution_futures_dict[key]) run_config = dict(pipeline_context.run_config, execution={'in_process': {}}) recon_repo = pipeline_context.pipeline.get_reconstructable_repository( ) variables = { 'executionParams': { 'selector': { 'pipelineName': pipeline_name, 'repositoryName': recon_repo.get_definition().name, 'repositoryLocationName': '<<in_process>>', }, 'runConfigData': run_config, 'mode': pipeline_context.mode_def.name, 'executionMetadata': { 'runId': pipeline_context.pipeline_run.run_id }, 'stepKeys': [step.key], } } dask_task_name = '%s.%s' % (pipeline_name, step.key) workspace = create_in_process_ephemeral_workspace( pointer=pipeline_context.pipeline. get_reconstructable_repository().pointer) future = client.submit( query_on_dask_worker, workspace, variables, dependencies, instance.get_ref(), key=dask_task_name, resources=get_dask_resource_requirements(step.tags), ) execution_futures.append(future) execution_futures_dict[step.key] = future # This tells Dask to awaits the step executions and retrieve their results to the # master for future in dask.distributed.as_completed(execution_futures): for step_event in future.result(): check.inst(step_event, DagsterEvent) yield step_event
'''Dask interface demo: Use scikit-learn regressor interface with CPU histogram tree method.''' from dask.distributed import Client from dask.distributed import LocalCluster from dask import array as da import xgboost if __name__ == '__main__': cluster = LocalCluster(n_workers=2, silence_logs=False) # or use any other clusters client = Client(cluster) n = 100 m = 10000 partition_size = 100 X = da.random.random((m, n), partition_size) y = da.random.random(m, partition_size) regressor = xgboost.dask.DaskXGBRegressor(verbosity=2, n_estimators=2) regressor.set_params(tree_method='hist') regressor.client = client regressor.fit(X, y, eval_set=[(X, y)]) prediction = regressor.predict(X) bst = regressor.get_booster() history = regressor.evals_result() print('Evaluation history:', history) assert isinstance(prediction, da.Array)
def main(prefetch_storage, block_size, n_files, reps, types, nworkers): types = list(types) header = ["vhs-bucket/hydi-header.trk"] fs = S3FileSystem() files = fs.glob("hydi-tractography/hydi_tracks.*.trk")[:n_files] print(files) results_path = "../results/" bfile = op.join( results_path, f"real_{n_files}f_{reps}r_{block_size}b_{nworkers}w-recobundles.out", ) helpers.setup_bench(bfile) cluster = LocalCluster(n_workers=nworkers, resources={"CPU": 3}) client = Client(cluster) for r in range(reps): # random.shuffle(types) for t in types: print("***", t, "***") helpers.drop_caches() print(client) data = {} results = [] if t == "s3fs": print(t) for i in range(nworkers): f_per_w = n_files // nworkers print(files[i * f_per_w:(i + 1) * f_per_w]) seg = client.submit( segmentation_s3fs, files[i * f_per_w:(i + 1) * f_per_w], False, block_size, **data, bfile=bfile, ) results.append(seg) else: print(t) for i in range(nworkers): f_per_w = n_files // nworkers print(files[i * f_per_w:(i + 1) * f_per_w]) seg = client.submit( segmentation_prefetch, header + files[i * f_per_w:(i + 1) * f_per_w], False, block_size, prefetch_storage, **data, bfile=bfile, ) results.append(seg) print(client.gather(results)) system("pkill -f joblib")
#importlib.reload(ELMlib) # + endofcell="--" port_dict = { 'mm': 8789, # 'hmetzler':8790, # change at will 'hmetzler': 8888, # change at will 'cs': 8791 # change at will } my_user_name = getuser() print(my_user_name) my_port = port_dict[my_user_name] print(my_port) my_cluster = LocalCluster(dashboard_address='localhost:' + str(my_port)) # - Client(my_cluster) # -- ELMDataDir = "/home/hmetzler/SOIL-R/Manuscripts/Berkeley/2019/Data/" runID = "14C_transient_holger_fire.2x2_small" fn = runID + ".nc" ds = xr.open_dataset(Path(ELMDataDir).joinpath(runID + ".nc")) ds ds_depth = xr.open_dataset(Path(ELMDataDir).joinpath('DZSOI.nc')) parameter_set = ELMlib.load_parameter_set(nstep=1, ds_depth=ds_depth)
def run_tasks(pl_conf, task_type, task_fn, logging_init_fn): # Initialize local dask cluster logger.debug('Pipeline configuration: %s', pl_conf) cluster = LocalCluster(n_workers=pl_conf.n_workers, threads_per_worker=1, processes=True, memory_limit=pl_conf.memory_limit, ip='0.0.0.0') client = Client(cluster) # Split total region + tile indexes to process into separate lists for each worker # (by indexes of those combinations) tiles = pl_conf.region_tiles idx_batches = np.array_split(np.arange(len(tiles)), pl_conf.n_workers) # Assign gpus to tasks in round-robin fashion def get_gpu(i): if pl_conf.gpus is None: return None return pl_conf.gpus[i % len(pl_conf.gpus)] # Generate a single task configuration for each worker tasks = [ pl_conf.get_task_config(region_indexes=tiles[idx_batch, 0], tile_indexes=tiles[idx_batch, 1], gpu=get_gpu(i)) for i, idx_batch in enumerate(idx_batches) ] logger.info('Starting %s pipeline for %s tasks (%s workers)', task_type, len(tasks), pl_conf.n_workers) logger.debug('Task definitions:\n\t%s', '\n\t'.join([str(t) for t in tasks])) try: # Passing logging initialization operation, if given, to workers now # running in separate processes if logging_init_fn: client.run(logging_init_fn) # Disable the "auto_restart" feature of dask workers which is of no use in this context for worker in cluster.workers: worker.auto_restart = False # Pass tasks to each worker to execute in parallel res = client.map(task_fn, tasks) res = [r.result() for r in res] if len(res) != len(tasks): raise ValueError( 'Parallel execution returned {} results but {} were expected'. format(len(res), len(tasks))) finally: # Note that this often produces a non-critical error due to: https://github.com/dask/distributed/issues/1969 # but that closing these resources is necessary to avoid GPU oom in post-processing client.close() cluster.close() # Save measurement data to disk measure_data = concat(res) if measure_data: path = exec.record_processor_data(measure_data, pl_conf.output_dir) logging.info('%s complete; Measurement data saved to "%s"', task_type, path) else: logging.info('%s complete', task_type)
LOGGER = logger.get_logger(TEST_NAME) # Specify some constants URLPATH1 = "s3://dask-avro-data/application-data/app-1000*.avro" def filter_func(data): return data['payload']['originationCountryCode'] == 'CAN' for conf in [(1, 36), (4, 9), (12, 3), (36, 1)]: n_workers = conf[0] threads_per_worker = conf[1] test_name = "dsk_filter_pd_dist_{}_{}".format(n_workers, threads_per_worker) LOGGER.info('BEGIN: Running test: {}'.format(test_name)) cluster = LocalCluster( n_workers=n_workers, threads_per_worker=threads_per_worker) client = Client(cluster) LOGGER.info('START: Creating dask bag with filter') bag = dask.bag.read_avro( URLPATH1, storage_options={ 'config_kwargs': {'max_pool_connections': 500} }, blocksize=None ) bag = bag.filter(filter_func) LOGGER.info('FINISH: Dask bag created') LOGGER.info('START: Creating dask dataframe') df = bag.to_dataframe(meta={'payload': 'object', 'metadata': 'object'})
RESULTS_DESTINATION = RESOURCES_DIR + "/results/python_dask/1/" + RUN_ID if __name__ == "__main__": task_configs = [{ "location": WAV_FILES_LOCATION, "name": file_metadata[0], "timestamp": parse(file_metadata[1]), "sample_rate": 1500.0, "wav_bits": 16, "n_samples": 3587, "n_channels": 1, "results_destination": RESULTS_DESTINATION, "calibration_factor": CALIBRATION_FACTOR, "segment_duration": SEGMENT_DURATION, "window_size": WINDOW_SIZE, "window_overlap": WINDOW_OVERLAP, "nfft": NFFT } for file_metadata in pd.read_csv(METADATA_FILE_PATH).values] ncpus = len(os.sched_getaffinity(0)) cluster = LocalCluster(n_workers=1, threads_per_worker=ncpus, processes=False) client = Client(cluster) durations = client.map(single_file_handler.process_file, task_configs) avg_time = np.average(client.gather(durations))
def setUpClass(cls): cls.execution_path = os.path.dirname(os.path.abspath(__file__)) cluster = LocalCluster(n_workers=1, threads_per_worker=2) client = Client(cluster) cls.lmp = LammpsLibrary(cores=2, mode='dask', client=client) cls.lmp.file(os.path.join(cls.execution_path, "in.simple"))
def main(): """docstring for main""" # Load the images with ASE images = Trajectory("cu_training.traj") calc = Potentials.load( model="cu_training.ml4c", params="cu_training.params", preprocessor="model.scaler", ) for atoms in images: energy = calc.get_potential_energy(atoms) print("ML4Chem predicted energy = {}".format(energy)) print(" DFT energy = {}".format( atoms.get_potential_energy())) if __name__ == "__main__": logging.basicConfig( filename="cu_inference.log", level=logging.INFO, format="%(filename)s:%(lineno)s %(levelname)s:%(message)s", ) cluster = LocalCluster(n_workers=8, threads_per_worker=2) client = Client(cluster, asyncronous=True) main()
@author: donbo https://docs.dask.org/en/latest/setup/single-distributed.html http://localhost:8787/status """ # from dask.distributed import Client # client = Client() import pandas as pd import numpy as np import dask.dataframe as dd from dask.distributed import Client, LocalCluster cluster = LocalCluster() client = Client(cluster) df = pd.DataFrame({ 'A': np.random.randint(1000, size=100000), 'B': np.random.randint(1000, size=100000) }) df ddf = dd.from_pandas(df, npartitions=4) client.close() cluster.close() # cluster.run_on_scheduler(lambda dask_scheduler=None: # dask_scheduler.close() & sys.exit(0))
##from dask.distributed import Client ## ##client = Client('localhost:8786') from dask.distributed import Client, LocalCluster cluster = LocalCluster( n_workers=4, ip='127.0.0.1', ) client = Client(cluster)
def initialize_client(): cluster = LocalCluster(n_workers=100, threads_per_worker=1) client = Client(cluster) return (client)
class YarnCluster(object): """Start a Dask cluster on YARN. You can define default values for this in Dask's ``yarn.yaml`` configuration file. See http://docs.dask.org/en/latest/configuration.html for more information. Parameters ---------- environment : str, optional Path to an archived Python environment (either ``tar.gz`` or ``zip``). n_workers : int, optional The number of workers to initially start. worker_vcores : int, optional The number of virtual cores to allocate per worker. worker_memory : str, optional The amount of memory to allocate per worker. Accepts a unit suffix (e.g. '2 GiB' or '4096 MiB'). Will be rounded up to the nearest MiB. worker_restarts : int, optional The maximum number of worker restarts to allow before failing the application. Default is unlimited. worker_env : dict, optional A mapping of environment variables to their values. These will be set in the worker containers before starting the dask workers. scheduler_vcores : int, optional The number of virtual cores to allocate per scheduler. scheduler_memory : str, optional The amount of memory to allocate to the scheduler. Accepts a unit suffix (e.g. '2 GiB' or '4096 MiB'). Will be rounded up to the nearest MiB. deploy_mode : {'remote', 'local'}, optional The deploy mode to use. If ``'remote'``, the scheduler will be deployed in a YARN container. If ``'local'``, the scheduler will run locally, which can be nice for debugging. Default is ``'remote'``. name : str, optional The application name. queue : str, optional The queue to deploy to. tags : sequence, optional A set of strings to use as tags for this application. skein_client : skein.Client, optional The ``skein.Client`` to use. If not provided, one will be started. Examples -------- >>> cluster = YarnCluster(environment='my-env.tar.gz', ...) >>> cluster.scale(10) """ def __init__(self, environment=None, n_workers=None, worker_vcores=None, worker_memory=None, worker_restarts=None, worker_env=None, scheduler_vcores=None, scheduler_memory=None, deploy_mode=None, name=None, queue=None, tags=None, skein_client=None): spec = _make_specification(environment=environment, n_workers=n_workers, worker_vcores=worker_vcores, worker_memory=worker_memory, worker_restarts=worker_restarts, worker_env=worker_env, scheduler_vcores=scheduler_vcores, scheduler_memory=scheduler_memory, deploy_mode=deploy_mode, name=name, queue=queue, tags=tags) self._start_cluster(spec, skein_client) @cached_property def dashboard_link(self): """Link to the dask dashboard. None if dashboard isn't running""" if self._dashboard_address is None: return None template = dask.config.get('distributed.dashboard.link') dashboard = urlparse(self._dashboard_address) params = dict(os.environ) params.update({'host': dashboard.hostname, 'port': dashboard.port}) return template.format(**params) @classmethod def from_specification(cls, spec, skein_client=None): """Start a dask cluster from a skein specification. Parameters ---------- spec : skein.ApplicationSpec, dict, or filename The application specification to use. Must define at least one service: ``'dask.worker'``. If no ``'dask.scheduler'`` service is defined, a scheduler will be started locally. skein_client : skein.Client, optional The ``skein.Client`` to use. If not provided, one will be started. """ self = super(YarnCluster, cls).__new__(cls) if isinstance(spec, dict): spec = skein.ApplicationSpec.from_dict(spec) elif isinstance(spec, str): spec = skein.ApplicationSpec.from_file(spec) elif not isinstance(spec, skein.ApplicationSpec): raise TypeError("spec must be an ApplicationSpec, dict, or path, " "got %r" % type(spec).__name__) self._start_cluster(spec, skein_client) return self def _start_cluster(self, spec, skein_client=None): """Start the cluster and initialize state""" if 'dask.worker' not in spec.services: raise ValueError("Provided Skein specification must include a " "'dask.worker' service") skein_client = _get_skein_client(skein_client) if 'dask.scheduler' not in spec.services: # deploy_mode == 'local' self._local_cluster = LocalCluster(n_workers=0, ip='0.0.0.0', diagnostics_port=('', 0), scheduler_port=0) scheduler = self._local_cluster.scheduler scheduler_address = scheduler.address try: dashboard_port = scheduler.services['bokeh'].port except KeyError: dashboard_address = None else: dashboard_host = urlparse(scheduler_address).hostname dashboard_address = 'http://%s:%d' % (dashboard_host, dashboard_port) app = skein_client.submit_and_connect(spec) try: app.kv['dask.scheduler'] = scheduler_address.encode() if dashboard_address is not None: app.kv['dask.dashboard'] = dashboard_address.encode() except BaseException: # Failed to connect, kill the application and reraise skein_client.kill_application(app.id) raise else: # deploy_mode == 'remote' app = skein_client.submit_and_connect(spec) try: scheduler_address = app.kv.wait('dask.scheduler').decode() dashboard_address = app.kv.get('dask.dashboard') if dashboard_address is not None: dashboard_address = dashboard_address.decode() except BaseException: # Failed to connect, kill the application and reraise skein_client.kill_application(app.id) raise # Ensure application gets cleaned up self._finalizer = weakref.finalize(self, app.shutdown) self.scheduler_address = scheduler_address self._dashboard_address = dashboard_address self.app_id = app.id self.application_client = app @classmethod def from_current(cls): """Connect to an existing ``YarnCluster`` from inside the cluster. Returns ------- YarnCluster """ self = super(YarnCluster, cls).__new__(cls) app_id = os.environ.get('DASK_APPLICATION_ID', None) app_address = os.environ.get('DASK_APPMASTER_ADDRESS', None) if app_id is not None and app_address is not None: app = skein.ApplicationClient(app_address, app_id) else: app = skein.ApplicationClient.from_current() self._connect_existing(app) return self @classmethod def from_application_id(cls, app_id, skein_client=None): """Connect to an existing ``YarnCluster`` with a given application id. Parameters ---------- app_id : str The existing cluster's application id. skein_client : skein.Client The ``skein.Client`` to use. If not provided, one will be started. Returns ------- YarnCluster """ self = super(YarnCluster, cls).__new__(cls) skein_client = _get_skein_client(skein_client) app = skein_client.connect(app_id) self._connect_existing(app) return self def _connect_existing(self, app): spec = app.get_specification() if 'dask.worker' not in spec.services: raise ValueError("%r is not a valid dask cluster" % app.id) scheduler_address = app.kv.wait('dask.scheduler').decode() dashboard_address = app.kv.get('dask.dashboard') if dashboard_address is not None: dashboard_address = dashboard_address.decode() self.app_id = app.id self.application_client = app self.scheduler_address = scheduler_address self._dashboard_address = dashboard_address self._finalizer = None def __repr__(self): return 'YarnCluster<%s>' % self.app_id def _dask_client(self): if hasattr(self, '_dask_client_ref'): client = self._dask_client_ref() if client is not None: return client client = get_client(address=self.scheduler_address) self._dask_client_ref = weakref.ref(client) return client def shutdown(self, status='SUCCEEDED', diagnostics=None): """Shutdown the application. Parameters ---------- status : {'SUCCEEDED', 'FAILED', 'KILLED'}, optional The yarn application exit status. diagnostics : str, optional The application exit message, usually used for diagnosing failures. Can be seen in the YARN Web UI for completed applications under "diagnostics". If not provided, a default will be used. """ if self._finalizer is not None and self._finalizer.peek() is not None: self.application_client.shutdown(status=status, diagnostics=diagnostics) self._finalizer.detach() # don't call shutdown later # Shutdown in local deploy_mode if hasattr(self, '_local_cluster'): self._local_cluster.close() del self._local_cluster def close(self, **kwargs): """Close this cluster. An alias for ``shutdown``. See Also -------- shutdown """ self.shutdown(**kwargs) def __enter__(self): return self def __exit__(self, *args): self.close() def workers(self): """A list of all currently running worker containers.""" return self.application_client.get_containers(services=['dask.worker']) def scale_up(self, n, workers=None): """Ensure there are atleast n dask workers available for this cluster. No-op if ``n`` is less than the current number of workers. Examples -------- >>> cluster.scale_up(20) # ask for twenty workers """ if workers is None: workers = self.workers() if n > len(workers): self.application_client.scale(service='dask.worker', instances=n) def scale_down(self, workers): """Retire the selected workers. Parameters ---------- workers: list List of addresses of workers to close. """ self._dask_client().retire_workers(workers) def _select_workers_to_close(self, n): client = self._dask_client() worker_info = client.scheduler_info()['workers'] # Sort workers by memory used workers = sorted( (v['metrics']['memory'], k) for k, v in worker_info.items()) # Return just the ips return [w[1] for w in workers[:n]] def scale(self, n): """Scale cluster to n workers. Parameters ---------- n : int Target number of workers Examples -------- >>> cluster.scale(10) # scale cluster to ten workers """ workers = self.workers() if n >= len(workers): return self.scale_up(n, workers=workers) else: n_to_delete = len(workers) - n # Before trying to close running workers, check if there are any # pending containers and kill those first. pending = [ w for w in workers if w.state in ('waiting', 'requested') ] for c in pending[:n_to_delete]: self.application_client.kill_container(c.id) n_to_delete -= 1 if n_to_delete: to_close = self._select_workers_to_close(n_to_delete) self.scale_down(to_close) def _widget_status(self): client = self._dask_client() workers = client.scheduler_info()['workers'] n_workers = len(workers) cores = sum(w['ncores'] for w in workers.values()) memory = sum(w['memory_limit'] for w in workers.values()) text = """ <div> <style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style> <table style="text-align: right;"> <tr><th>Workers</th> <td>%d</td></tr> <tr><th>Cores</th> <td>%d</td></tr> <tr><th>Memory</th> <td>%s</td></tr> </table> </div> """ % (n_workers, cores, format_bytes(memory)) return text def _widget(self): """ Create IPython widget for display within a notebook """ try: return self._cached_widget except AttributeError: pass from ipywidgets import Layout, VBox, HBox, IntText, Button, HTML client = self._dask_client() layout = Layout(width='150px') title = HTML('<h2>YarnCluster</h2>') status = HTML(self._widget_status(), layout=Layout(min_width='150px')) request = IntText(0, description='Workers', layout=layout) scale = Button(description='Scale', layout=layout) @scale.on_click def scale_cb(b): with log_errors(): self.scale(request.value) elements = [title, HBox([status, request, scale])] if self.dashboard_link is not None: link = HTML('<p><b>Dashboard: </b><a href="%s" target="_blank">%s' '</a></p>\n' % (self.dashboard_link, self.dashboard_link)) elements.append(link) self._cached_widget = box = VBox(elements) def update(): status.value = self._widget_status() pc = PeriodicCallback(update, 500, io_loop=client.loop) pc.start() return box def _ipython_display_(self, **kwargs): try: return self._widget()._ipython_display_(**kwargs) except ImportError: print(self)
i += 1 if __name__ == "__main__": logging.getLogger("tifffile").setLevel(logging.ERROR) coloredlogs.install(level="DEBUG", fmt="%(asctime)s %(levelname)s %(message)s", datefmt="%H:%M:%S") logger = logging.getLogger(__name__) use_local = True if use_local: logger.info("using local cluster") cluster = LocalCluster(n_workers=4, threads_per_worker=4) client = Client(cluster) else: logger.info("using remote cluster") client = Client("10.109.20.6:8786") logger.info(client) src_ds = open_dataset("Y:/ARod/4F/20200317_No5_CamA") print(src_ds.inventory) logger.info(f"tile by {src_ds.tile_shape}") # INPUT (x, y, z) -> TRUE (z, x, y) src_ds.remap_tiling_axes({"x": "z", "y": "x", "z": "y"}) src_ds.flip_tiling_axes(["x", "y"])
merged.train( inputs=inputs, targets=targets, data=data_handler, regularization=regularization, convergence=convergence, optimizer=optimizer, device="cpu", batch_size=batch_size, lr_scheduler=lr_scheduler, lossfxn=losses, independent_loss=True, ) for index, model in enumerate(merged.models): label = "{}_{}".format(index, model.name()) Potentials.save(model, label=label) dump_ls = merged.models[0].get_latent_space(inputs[0]) dump(dump_ls, filename="checkme.latent") if __name__ == "__main__": logger() cluster = LocalCluster(n_workers=5, threads_per_worker=2, dashboard_address=8798) client = Client(cluster) # Let's do this hybrid()
def execute(self, pipeline_context, execution_plan): check.inst_param(pipeline_context, "pipeline_context", SystemPipelineExecutionContext) check.inst_param(execution_plan, "execution_plan", ExecutionPlan) check.param_invariant( isinstance(pipeline_context.executor, DaskExecutor), "pipeline_context", "Expected executor to be DaskExecutor got {}".format( pipeline_context.executor), ) check.invariant( pipeline_context.instance.is_persistent, "Dask execution requires a persistent DagsterInstance", ) step_levels = execution_plan.get_steps_to_execute_by_level() pipeline_name = pipeline_context.pipeline_name instance = pipeline_context.instance cluster_type = self.cluster_type if cluster_type == "existing": # address passed directly to Client() below to connect to existing Scheduler cluster = self.cluster_configuration["address"] elif cluster_type == "local": from dask.distributed import LocalCluster cluster = LocalCluster(**self.build_dict(pipeline_name)) elif cluster_type == "yarn": from dask_yarn import YarnCluster cluster = YarnCluster(**self.build_dict(pipeline_name)) elif cluster_type == "ssh": from dask.distributed import SSHCluster cluster = SSHCluster(**self.build_dict(pipeline_name)) elif cluster_type == "pbs": from dask_jobqueue import PBSCluster cluster = PBSCluster(**self.build_dict(pipeline_name)) elif cluster_type == "moab": from dask_jobqueue import MoabCluster cluster = MoabCluster(**self.build_dict(pipeline_name)) elif cluster_type == "sge": from dask_jobqueue import SGECluster cluster = SGECluster(**self.build_dict(pipeline_name)) elif cluster_type == "lsf": from dask_jobqueue import LSFCluster cluster = LSFCluster(**self.build_dict(pipeline_name)) elif cluster_type == "slurm": from dask_jobqueue import SLURMCluster cluster = SLURMCluster(**self.build_dict(pipeline_name)) elif cluster_type == "oar": from dask_jobqueue import OARCluster cluster = OARCluster(**self.build_dict(pipeline_name)) elif cluster_type == "kube": from dask_kubernetes import KubeCluster cluster = KubeCluster(**self.build_dict(pipeline_name)) else: raise ValueError( f"Must be providing one of the following ('existing', 'local', 'yarn', 'ssh', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube') not {cluster_type}" ) with dask.distributed.Client(cluster) as client: execution_futures = [] execution_futures_dict = {} for step_level in step_levels: for step in step_level: # We ensure correctness in sequencing by letting Dask schedule futures and # awaiting dependencies within each step. dependencies = [] for step_input in step.step_inputs: for key in step_input.dependency_keys: dependencies.append(execution_futures_dict[key]) run_config = dict(pipeline_context.run_config, execution={"in_process": {}}) recon_repo = pipeline_context.pipeline.get_reconstructable_repository( ) dask_task_name = "%s.%s" % (pipeline_name, step.key) recon_pipeline = recon_repo.get_reconstructable_pipeline( pipeline_name) future = client.submit( query_on_dask_worker, dependencies, recon_pipeline, pipeline_context.pipeline_run, run_config, [step.key], pipeline_context.mode_def.name, instance.get_ref(), key=dask_task_name, resources=get_dask_resource_requirements(step.tags), ) execution_futures.append(future) execution_futures_dict[step.key] = future # This tells Dask to awaits the step executions and retrieve their results to the # master futures = dask.distributed.as_completed(execution_futures, with_results=True) # Allow interrupts while waiting for the results from Dask for future, result in iterate_with_context( raise_execution_interrupts, futures): for step_event in result: check.inst(step_event, DagsterEvent) yield step_event
import modin.config as config import modin.pandas as pd from contexttimer import Timer from docopt import docopt from dask.distributed import Client, LocalCluster if __name__ == "__main__": args = docopt(__doc__, version="1.0") conn = os.environ["POSTGRES_URL"] table = os.environ["POSTGRES_TABLE"] partitions = int(args["<num>"]) config.NPartitions.put(partitions) cluster = LocalCluster(n_workers=partitions, scheduler_port=0, memory_limit="230G") client = Client(cluster) with Timer() as timer: df = pd.read_sql( f"SELECT * FROM {table}", conn, parse_dates=[ "l_shipdate", "l_commitdate", "l_receiptdate", ], ) print(f"[Total] {timer.elapsed:.2f}s")
help='Chunk size in x y f dimensions. Maybe helpful in many-cores ' 'low-memory-per-core system like Intel Xeon Phi.' ) args = parser.parse_args() # FIXED parameters noise_dim = pd.Index(range(500), name='noise_field') mask = xr.open_dataarray('/scratch/pkittiwi/fg1p/hera331_fov_mask.nc') if args.xyf_chunks is not None: chunks = {'x': args.xyf_chunks[0], 'y': args.xyf_chunks[1], 'f': args.xyf_chunks[2]} else: chunks = None # Setup and start Dask Local Cluster cluster = LocalCluster(n_workers=args.n_workers, processes=args.processes, threads_per_worker=args.threads_per_worker, scheduler_port=args.scheduler_port, diagnostics_port=args.diagnostics_port) client = Client(cluster) print('Hostname: {:s}'.format(os.environ['HOSTNAME'])) print('Dask Scheduler address: {:s}'.format(cluster.scheduler_address)) print('Dask Dashboard link: {:s}'.format(cluster.dashboard_link)) # Loop over data parameters and perform calculation for bw, fbw, t, s in itertools.product( args.bin_width, args.filter_bandwidth, args.theta, args.shift ): start_time = datetime.now() ds = xr.open_mfdataset( ['/scratch/pkittiwi/fg1p/binned_noise_map/bin{:.2f}MHz/' 'fbw{:.2f}MHz/theta{:.1f}/shift{:d}/binned_noise_map_bin{:.2f}MHz_' 'fbw{:.2f}MHz_theta{:.1f}_shift{:d}_{:03d}.nc'
def image_tikhonov(self, vis_arr, sphere, alpha, scale=True, usedask=False): n_s = sphere.pixels.shape[0] n_v = self.u_arr.shape[0] lambduh = alpha/np.sqrt(n_s) if not usedask: gamma = self.make_gamma(sphere) logger.info("Building Augmented Operator...") proj_operator_real = np.real(gamma).astype(np.float32) proj_operator_imag = np.imag(gamma).astype(np.float32) gamma = None proj_operator = np.block([[proj_operator_real], [proj_operator_imag]]) proj_operator_real = None proj_operator_imag = None logger.info('augmented: {}'.format(proj_operator.shape)) vis_aux = np.array(np.concatenate((np.real(vis_arr), np.imag(vis_arr))), dtype=np.float32) logger.info('vis mean: {} shape: {}'.format(np.mean(vis_aux), vis_aux.shape)) logger.info("Solving...") reg = linear_model.ElasticNet(alpha=lambduh, l1_ratio=0.05, max_iter=10000, positive=True) reg.fit(proj_operator, vis_aux) sky = reg.coef_ score = reg.score(proj_operator, vis_aux) logger.info('Loss function: {}'.format(score)) else: from dask_ml.linear_model import LinearRegression import dask_glm import dask.array as da from dask.distributed import Client, LocalCluster from dask.diagnostics import ProgressBar import dask logger.info('Starting Dask Client') if True: cluster = LocalCluster(dashboard_address=':8231', processes=False) client = Client(cluster) else: client = Client('tcp://localhost:8786') logger.info("Client = {}".format(client)) harmonic_list = [] p2j = 2*np.pi*1.0j dl = sphere.l dm = sphere.m dn = sphere.n n_arr_minus_1 = dn - 1 du = self.u_arr dv = self.v_arr dw = self.w_arr for u, v, w in zip(du, dv, dw): harmonic = da.from_array(np.exp(p2j*(u*dl + v*dm + w*n_arr_minus_1)) / np.sqrt(sphere.npix), chunks=(n_s,)) harminc = client.persist(harmonic) harmonic_list.append(harmonic) gamma = da.stack(harmonic_list) logger.info('Gamma Shape: {}'.format(gamma.shape)) #gamma = gamma.reshape((n_v, n_s)) gamma = gamma.conj() gamma = client.persist(gamma) logger.info('Gamma Shape: {}'.format(gamma.shape)) logger.info("Building Augmented Operator...") proj_operator_real = da.real(gamma) proj_operator_imag = da.imag(gamma) proj_operator = da.block([[proj_operator_real], [proj_operator_imag]]) proj_operator = client.persist(proj_operator) logger.info("Proj Operator shape {}".format(proj_operator.shape)) vis_aux = da.from_array(np.array(np.concatenate((np.real(vis_arr), np.imag(vis_arr))), dtype=np.float32)) #logger.info("Solving...") en = dask_glm.regularizers.ElasticNet(weight=0.01) en = dask_glm.regularizers.L2() #dT = da.from_array(proj_operator, chunks=(-1, 'auto')) ##dT = da.from_array(proj_operator, chunks=(-1, 'auto')) #dv = da.from_array(vis_aux) dask.config.set({'array.chunk-size': '1024MiB'}) A = da.rechunk(proj_operator, chunks=('auto', n_s)) A = client.persist(A) y = vis_aux # da.rechunk(vis_aux, chunks=('auto', n_s)) y = client.persist(y) #sky = dask_glm.algorithms.proximal_grad(A, y, regularizer=en, lambduh=alpha, max_iter=10000) logger.info("Rechunking completed.. A= {}.".format(A.shape)) reg = LinearRegression(penalty=en, C=1.0/lambduh, fit_intercept=False, solver='lbfgs', max_iter=1000, tol=1e-8 ) sky = reg.fit(A, y) sky = reg.coef_ score = reg.score(proj_operator, vis_aux) logger.info('Loss function: {}'.format(score.compute())) logger.info("Solving Complete: sky = {}".format(sky.shape)) sphere.set_visible_pixels(sky, scale=True) return sky.reshape(-1,1)