def _execute(self, future_or_delayed, *args, **kwargs): """ Execute Delayed jobs using the distributed Client to get a future object. Save the key of the future object for later use. Args: future_or_delayed (dask.Delayed or dask.distributed.Future): dask task object to track using TethysJobs. """ # noqa: E501 if not isinstance(future_or_delayed, Future) and not isinstance(future_or_delayed, Delayed): raise ValueError('Must pass a valid instance of Delayed or Future.') if isinstance(future_or_delayed, Delayed): future = self.client.compute(future_or_delayed) else: future = future_or_delayed self.key = future.key # NOTE: Job may not actually be running at this point, but we don't have another # way to know when the first node in the workflow starts running. self._status = 'RUN' self.start_time = timezone.now() # Send key to the dask scheduler so the scheduler knows which jobs to send status updates to Tethys. self.client.set_metadata(self.key, True) # Save updated attributes self.save() # Must use fire and forget to ensure job runs after the future goes out of scope. fire_and_forget(future)
def _get_temp_file(self) -> None: """ Creates self.File and writes self._DataFrame to it. File will always use gzip compression and end in .gz. Overwriting may cause a critical error and data corruption. Will wait for any previous file write to complete. """ def _get_temp_file_1( temp_folder_location: str = self._temporary_folder_location, dataset_name: str = self._dataset_name, log: callable = self.log, client: dd.Client = self._client, DataFrame: object = self._DataFrame) -> None: try: file: object = NamedTemporaryFile(dir=temp_folder_location, suffix="__" + dataset_name + '.gz') except OSError as err: log(err, 'DEBUG') raise DatasetNameError(dataset_name) File: object = client.submit(DataFrame.to_parquet, file, compression='gzip') return File def _get_temp_file_2() -> None: self._File = self._File.result() self.log("File saved") if self._File is not None: dd.wait(self._File) self._File = self._client.submit(_get_temp_file_1) self._File.add_done_callback(_get_temp_file_2) dd.fire_and_forget(self._File)
def plugin_f_and_f(dump, plugin, params, user_pk): """ Fire and forget plugin on dask """ dask_client = Client(settings.DASK_SCHEDULER_URL) fire_and_forget( dask_client.submit(run_plugin, dump, plugin, params, user_pk))
def submit( self, client: Client = None, scheduler_address: str = None, priority: int = None, resources: Dict[str, Any] = None, show_progress=False, **kwargs, ) -> None: if not priority: priority = self.priority if not resources: resources = self.resources if not client: client = Client(scheduler_address) self.scheduler_address = client.scheduler.address computation = client.compute(self.graph, retries=3, priority=priority, resources=resources) if show_progress: progress(computation) fire_and_forget(computation) if scheduler_address: client.close() return None
def _get_data(self) -> None: """ Retrieve self.DataFrame from connected database using self.sql and self.connection. """ def _get_data_1( backup_folder_location: str = self._backup_folder_location, dataset_name: str = self._dataset_name, log: callable = self.log, sql: str = self._sql, connection: object = self._connection) -> dd.Future: file_location: str = os.path.join(backup_folder_location, dataset_name + '.gz') if not os.path.isfile(file_location): log("Querying database") DataFrame = pd.read_sql(sql, connection) else: log("Reading backup file") DataFrame = pd.read_parquet(backup_folder_location) return DataFrame def _get_data_2() -> None: self._DataFrame = self._DataFrame.result() self.log("DataFrame retrieved") if len(self._DataFrame.index) == 0: self.log("Query was empty", 'WARNING') if self._DataFrame is not None: dd.wait(self._DataFrame) self._DataFrame = self._client.submit(_get_data_1) self._DataFrame.add_done_callback(_get_data_2) dd.fire_and_forget(self._DataFrame)
def _execute(self, future_or_delayed, *args, **kwargs): """ Execute Delayed jobs using the distributed Client to get a future object. Save the key of the future object for later use. Args: future_or_delayed (dask.Delayed or dask.distributed.Future): dask task object to track using TethysJobs. """ # noqa: E501 if not isinstance(future_or_delayed, Future) and not isinstance( future_or_delayed, Delayed): raise ValueError( 'Must pass a valid instance of Delayed or Future.') if isinstance(future_or_delayed, Delayed): future = self.client.compute(future_or_delayed) else: future = future_or_delayed self.key = future.key # NOTE: Job may not actually be running at this point, but we don't have another # way to know when the first node in the workflow starts running. self._status = 'RUN' self.start_time = timezone.now() # Send key to the dask scheduler so the scheduler knows which jobs to send status updates to Tethys. self.client.set_metadata(self.key, True) # Save updated attributes self.save() # Must use fire and forget to ensure job runs after the future goes out of scope. fire_and_forget(future)
def refit_task(project_id, model_id, selected_models: dict): docker = bool(os.getenv("IN_DOCKER", False)) if docker: client = Client(dask_scheduler) fire_and_forget(client.submit(DaskTasks.set_prediction_estimators, int(project_id), int(model_id), selected_models)) else: DaskTasks.set_prediction_estimators(int(project_id), int(model_id), selected_models)
def index_f_and_f(dump_pk, user_pk): """ Run all plugin for a new index on dask """ dask_client = Client(settings.DASK_SCHEDULER_URL) fire_and_forget( dask_client.submit(unzip_then_run, dump_pk, user_pk, settings.ELASTICSEARCH_URL) )
def plugin_f_and_f(dump, plugin, params): """ Fire and forget plugin on dask """ dask_client = Client(settings.DASK_SCHEDULER_URL) fire_and_forget( dask_client.submit(run_plugin, dump, plugin, settings.ELASTICSEARCH_URL, params) )
def setup_task(project_id, dataset_id, pipeline_id): docker = bool(os.getenv("IN_DOCKER", False)) if docker: client = Client(dask_scheduler) fire_and_forget(client.submit(DaskTasks.execute_task, int(project_id), int(dataset_id), int(pipeline_id))) else: DaskTasks.execute_task(int(project_id), int(dataset_id), int(pipeline_id))
def load_valid_tiles(ds, save=False): import imageio if save: dst_dir = create_dst_dir("mip") # generate thumbnails images = [] for j, (_, ds_x) in enumerate(ds.groupby("tile_y")): for i, (_, tile) in enumerate(ds_x.groupby("tile_x")): print(f".. iter (i: {i}, j: {j})") uuid = tile.loc["488_1X"].values[0] # ignore missing tiles if not uuid: continue if save: data = ds[uuid].max(axis=0) dst_path = os.path.join(dst_dir, f"tile-{i:03d}-{j:03d}_mip.tif") # delayed(imageio.imwrite)(dst_path, data) future = client.submit(imageio.imwrite, dst_path, data) fire_and_forget(future) images.append(((j, i), uuid)) # 2 3 4 # 6 # 7 # # generate neighbor linkage links = dict() for (apos, auuid), (bpos, buuid) in product(images[:-1], images[1:]): if apos == bpos: continue if bpos < apos: apos, bpos = bpos, apos auuid, buuid = buuid, auuid print(f"{apos} <> {bpos}") if (apos, bpos) in links: print(".. duplicate") continue else: aj, ai = apos bj, bi = bpos if ((aj - 1 == bj or aj + 1 == bj) and (ai == bi)) or ( (ai - 1 == bi or ai + 1 == bi) and (aj == bj) ): print(".. NEW NEIGHBOR") links[(apos, bpos)] = (auuid, buuid) else: print(".. not neighbor") return images, links
def setup_task(dataset_id, amodel_id, prepro_id=None): dataset = Dataset.objects.get(id=int(dataset_id)) amodel = AnalyticalModel.objects.get(id=int(amodel_id)) amodel.dataset = dataset.id amodel.save() client = Client(dask_scheduler) df = pd.read_csv(StringIO(bytes(dataset.data).decode())).drop("ID", axis=1) # add preprocessing to task fire_and_forget( client.submit(DaskTasks.execute_task, df, int(amodel.id), str(amodel.name), int(dataset_id)))
def run_and_score_submission(client, submission): """ Runs public and private, plus scoring """ delayed_conditional = dask.delayed(True) for is_public in (True, False): delayed_conditional = _trigger_submission_run( submission, delayed_conditional, is_public=is_public ) if settings.VISUALIZE_DASK_GRAPH: delayed_conditional.visualize(filename="task_graph.svg") future = client.submit(delayed_conditional.compute) # pylint:disable=no-member logger.info("Future key: %s", future.key) dd.fire_and_forget(future) return future
def launch(s, jid, job): """Launch a task. jid is a job id, job is a db.Job. This modifies job. Caller should provide a transaction and commit if launch returns. Otherwise we may have a running job that is not in the database. """ canc = Bogo_var() canc.set(False) spec = Task_spec(jid, job) fut = s.client.compute(model.task(spec, canc)) s.tasks[jid] = fut, canc job.status = db.Job_status.SCHEDULED fut.add_done_callback(lambda f: s.task_done(jid, f)) dd.fire_and_forget(fut) if s.monitor is not None: try: s.monitor(jid, fut) except: s.logger.exception("monitor.launch failed for job %s", jid)
def train_model(id): model, config, dataset = Dataset.model_from_id(id) # Check if training is already done or in progress if model.status == "done": return {"error": "Model is already trained"}, 409 if model.status not in ["not started", "error"]: return {"error": "Model is currently training"}, 409 app.logger.info(f"Starting training dataset {dataset.name}") app.logger.info(f"config: {config.to_json()}") app.logger.info(f"model: {model.to_json()}") app.logger.info(f"Found configuration {config}") # update status model.status = "starting" dataset.save() fut = client.submit(training.train_model, id) fire_and_forget(fut) return {"status": model.status}, 202
def submit(self, client: Client = None, client_address: str = None, priority: int = None) -> None: if not priority: priority = self.priority if not client: client = Client(client_address) computation = client.compute(self.graph, retries=3, priority=priority) fire_and_forget(computation) self.status = "submitted" if ( not client ): # if cient is provided, we assume the user will close it on their end client.close() schema = "administration" table = "workflow_queue" engine = os.getenv("QUEUE_ENGINE") or "mssql+pyodbc://redshift_acoe" self.submit_to_queue(engine, schema, table, priority) return None
def run( self, pars, sims, sim_status, indices, collect_in_memory: bool = True, batch_size: Optional[int] = None, ): """Run the simulator on the input parameters. Args: pars: array with all the input parameters. Should have shape (num. samples, num. parameters) sims: dictionary of arrays where to store the simulation output. All arrays should have the number of samples as the size of the first dimension sim_status: array where to store the simulation status (size should be equal to the number of samples) indices: indices of the samples that need to be run by the simulator collect_in_memory: if True, collect the simulation output in memory; if False, instruct Dask workers to save the output to the corresponding arrays. The latter option is asynchronous, thus this method immediately returns. batch_size: simulations will be submitted in batches of the specified size """ self.set_dask_cluster(self.cluster) # open parameter array as Dask array chunks = getattr(pars, "chunks", "auto") z = da.from_array(pars, chunks=chunks) idx = da.from_array(indices, chunks=(batch_size or -1, )) z = z[idx] z = z.persist() # load the parameters in the distributed memory # block-wise run the model function on the parameter array out = da.map_blocks( _run_model_chunk, z, model=self.model, sim_shapes=self.sim_shapes, fail_on_non_finite=self.fail_on_non_finite, drop_axis=1, dtype=np.object, ) # FIXME: Deprecated? # print("Simulator: Running...") # bag = db.from_sequence(z, npartitions=npartitions) # bag = bag.map(_run_one_sample, self.model, self.fail_on_non_finite) # result = bag.compute(scheduler=self.client or "processes") # print("Simulator: ...done.") # return result # split result dictionary and simulation status array results = out.map_blocks(getitem, 0, dtype=np.object) status = out.map_blocks(getitem, 1, meta=np.array(()), dtype=np.int) # unpack array of dictionaries to dictionary of arrays result_dict = {} for obs, shape in self.sim_shapes.items(): result_dict[obs] = results.map_blocks( getitem, obs, new_axis=[i + 1 for i in range(len(shape))], chunks=(z.chunks[0], *shape), meta=np.array(()), dtype=np.float, ) sources = [result_dict[k] for k in self.sim_shapes.keys()] targets = [sims[k] for k in self.sim_shapes.keys()] if collect_in_memory: # submit computation and collect results *sources, status = self.client.compute([*sources, status], sync=True) # update simulation results for source, target in zip(sources, targets): target[indices.tolist()] = source # finally, update the simulation status sim_status[indices.tolist()] = status else: sources = da.store( sources=sources, targets=targets, regions=(indices.tolist(), ), lock=False, compute=False, return_stored=True, ) # submit computation *sources, status = self.client.persist([*sources, status]) # the following dummy array is generated after results are stored. zeros_when_done = [ source.map_blocks( lambda x: np.zeros(x.shape[0], dtype=np.int), chunks=(source.chunks[0], ), drop_axis=[i for i in range(1, source.ndim)], meta=np.array((), dtype=np.int), dtype=np.int, ) for source in sources ] status = sum([*zeros_when_done, status]) status = status.store( target=sim_status, regions=(indices.tolist(), ), lock=False, compute=False, return_stored=True, ) # when the simulation results are stored, we can update the status status = self.client.persist(status) fire_and_forget(status)
async def send_computation_tasks( self, user_id: UserID, project_id: ProjectID, cluster_id: ClusterID, tasks: Dict[NodeID, Image], callback: Callable[[], None], remote_fct: Callable = None, ): """actually sends the function remote_fct to be remotely executed. if None is kept then the default function that runs container will be started.""" def _done_dask_callback(dask_future: Future): job_id = dask_future.key logger.debug("Dask future %s completed", job_id) # remove the future from the dict to remove any handle to the future, so the worker can free the memory self._taskid_to_future_map.pop(job_id) callback() def _comp_sidecar_fct(job_id: str, user_id: int, project_id: ProjectID, node_id: NodeID) -> None: """This function is serialized by the Dask client and sent over to the Dask sidecar(s) Therefore, (screaming here) DO NOT MOVE THAT IMPORT ANYWHERE ELSE EVER!!""" from simcore_service_dask_sidecar.tasks import ( run_task_in_service, # type: ignore ) run_task_in_service(job_id, user_id, project_id, node_id) if remote_fct is None: remote_fct = _comp_sidecar_fct for node_id, node_image in tasks.items(): # NOTE: the job id is used to create a folder in the sidecar, # so it must be a valid file name too # Also, it must be unique # and it is shown in the Dask scheduler dashboard website job_id = f"{node_image.name}_{node_image.tag}__projectid_{project_id}__nodeid_{node_id}__{uuid4()}" dask_resources = _from_node_reqs_to_dask_resources( node_image.node_requirements) # add the cluster ID here dask_resources.update({ f"{self.settings.DASK_CLUSTER_ID_PREFIX}{cluster_id}": CLUSTER_RESOURCE_MOCK_USAGE }) _check_valid_connection_to_scheduler(self.client) _check_cluster_able_to_run_pipeline( node_id=node_id, scheduler_info=self.client.scheduler_info(), task_resources=dask_resources, node_image=node_image, cluster_id_prefix=self.settings. DASK_CLUSTER_ID_PREFIX, # type: ignore cluster_id=cluster_id, ) try: task_future = self.client.submit( remote_fct, job_id, user_id, project_id, node_id, key=job_id, resources=dask_resources, retries=0, ) except Exception: # Dask raises a base Exception here in case of connection error, this will raise a more precise one _check_valid_connection_to_scheduler(self.client) # if the connection is good, then the problem is different, so we re-raise raise task_future.add_done_callback(_done_dask_callback) self._taskid_to_future_map[job_id] = task_future fire_and_forget( task_future ) # this should ensure the task will run even if the future goes out of scope logger.debug("Dask task %s started", task_future.key)
"/nfs/paper-big-data-engines/utils.py") # Allow workers to use module client.upload_file( "/nfs/paper-big-data-engines/incrementation/Increment.py") # Read images paths = crawl_dir(os.path.abspath(args.bb_dir)) client.scatter(paths) results = [] for path in paths: img = client.submit(read_img, path, start=start, args=args) # Increment the data n time: for _ in range(args.iterations): img = client.submit(increment, img, delay=args.delay, start=start, args=args) # Save the data results.append(client.submit(save_results, img, start=start, args=args)) # Execute the tasks fire_and_forget(img) client.gather(results) client.close()
inq = Queue('inq') outq = Queue('outq') lock = Lock('x') stopiter = Variable(False) brake = Variable(True) saver_started = False workers_started = False #start workers for workers in range(NCORE*ncpu ): w = client.submit( calculate_small_parsimony , inq= None ,outq = None ,stopiter= stopiter , treefile=treefile , bootstrap_replicates = bootstrap_replicates, matfile= alnfile+'.h5' , row_index= remote_index , iolock = lock, verbose = False ) fire_and_forget(w) s = client.submit( collect_futures , queue= None , stopiter=stopiter , brake = brake, runName= runName , nucleotides_only =False ) saver_started = True fire_and_forget(s) for annot_index,annot_row in annotation.iterrows(): #indexing starts at 1 for blast #####switch to sending the coordinates and masking for the matrix for j,codon in enumerate(range(annot_row.qstart-1, annot_row.qend-1 , 3 )): positions = [] for col in [codon, codon+1 , codon+2]: if col in informativesites: positions.append( (col, None) ) else: #just add the alignment character if it doesnt change.
from dask import delayed cluster = SLURMCluster(cores=40, processes=40, memory='250GB', queue='scavenger', walltime='02:00') cluster.start_workers(2) client = Client(cluster) if __name__ == '__main__': guest = load_molecule('aaa.res') trial_xyz, trial_rad = perturb_mol(guest) trial_xyz = delayed(trial_xyz) trial_rad = delayed(trial_rad) structures = load_crystals() bag = db.from_sequence([(structure, trial_xyz, trial_rad) for structure in structures], partition_size=40) guest_hits = client.map(gen_guests, bag) fire_and_forget(save_expansion, guest_hits) inserts = client.map(insert_guests, guest_hits) reopts = client.map(reoptimize_inserts, inserts)