class ClusterShareMemory(ShareMemory): """Share Memory for dask cluster.""" def __init__(self, name): from dask.distributed import Variable self.var = Variable(name, client=ShareMemoryClient().client) def put(self, value): """Put value into shared data.""" self.var.set(str(value)) def get(self): """Get value from shared data.""" # TODO: block issue when var no data. return ast.literal_eval(self.var.get(timeout=2)) def delete(self): """Delete data according to name.""" self.var.delete() def close(self): """Close Share Memory.""" ShareMemoryClient().close()
def run_indri_cluster(scheduler, indri, params, runs, overwrite): client = Client(scheduler) available_workers = get_worker_load(client) ntasks = len(params) for w in available_workers: logging.info('{:<27} {:<22}'.format(w[0], format_loadavg(w[1:]))) logging.info('{} tasks in total'.format(len(params))) logging.info('{} workers in total'.format(len(available_workers))) cancel = Variable('cancel', client) cancel.set(False) def signal_handler(sig, frame): cancel.set(True) logging.info( 'CTRL-C received. It may take a while to kill running tasks.') signal.signal(signal.SIGINT, signal_handler) indri_args = [(str(indri.resolve()), str(p.resolve())) for p in params] fp_runs = [str(r.resolve()) for r in runs] overwrite = [overwrite] * len(runs) schedule_loop(client, ntasks, cancel, runs, indri_args, fp_runs, overwrite)
def run_test_with_timeout( test_config: TestConfig, incoming_state: dict, hostnames: List[str], duration: int = 15, ) -> dict: """ Calls run_test with a timeout and signals run_test to end gracefully if timeout has completed Args: test_config: Config of test to run incoming_state: Initial state to run actions/asserts in hostnames: List of runner hostnames duration: Optional timeout to run test within (I suppose this is to make it convenient to call in runners) Returns: New state after running actions and asserts """ if duration is None or duration < 0: return run_test(test_config, incoming_state, hostnames) # NOTE: Use a dask cluster scheduler? client = get_client() # NOTE: may improve way of doing this timeout_signal_name = f"keep-going-{str(uuid.uuid4())}" keep_going = Variable(timeout_signal_name) keep_going.set(True) run_test_task: Future = client.submit( run_test, test_config=test_config, incoming_state=incoming_state, hostnames=hostnames, timeout_signal_name=timeout_signal_name, ) LOGGER.debug("Test duration config: %d seconds", duration) def distributed_timeout(): # If a timeout from a previous test did not complete, it will keep running (it cannot be canceled) # However, if it keeps running, it can end another test early # This means it needs to receive a signal to return end_time = datetime.now() + timedelta(seconds=duration) while datetime.now() <= end_time and keep_going.get(): time.sleep(test_config.get("secondsBetweenCycles", 1)) timeout_task: Future = client.submit(distributed_timeout) # Wait for either test or timeout to finish # Return test result if it finishes first # End test if timeout finishes first and return state start = datetime.now() wait([run_test_task, timeout_task], return_when="FIRST_COMPLETED") end = datetime.now() LOGGER.debug("Test %s took %d seconds", test_config["name"], (end - start).seconds) if run_test_task.done(): keep_going.set(False) return run_test_task.result() elif timeout_task.done(): LOGGER.debug(timeout_task) LOGGER.info("Test %s timed out", test_config["name"]) # NOTE: add timed out to summary? keep_going.set(False) return run_test_task.result()
#start workers for workers in range(NCORE*ncpu ): w = client.submit( calculate_small_parsimony , inq= None ,outq = None ,stopiter= stopiter , treefile=treefile , bootstrap_replicates = bootstrap_replicates, matfile= alnfile+'.h5' , row_index= remote_index , iolock = lock, verbose = False ) fire_and_forget(w) s = client.submit( collect_futures , queue= None , stopiter=stopiter , brake = brake, runName= runName , nucleotides_only =False ) saver_started = True fire_and_forget(s) for annot_index,annot_row in annotation.iterrows(): #indexing starts at 1 for blast #####switch to sending the coordinates and masking for the matrix for j,codon in enumerate(range(annot_row.qstart-1, annot_row.qend-1 , 3 )): positions = [] for col in [codon, codon+1 , codon+2]: if col in informativesites: positions.append( (col, None) ) else: #just add the alignment character if it doesnt change. positions.append( (col, align_array[0,col] ) ) #submit codons inq.put( (codon, positions) ) print('done iterating') while brake == True: time.sleep(10) stopiter.set(True) print('DONE main')
_VI_ = Variable('_VI_') file_D = os.path.join(args['dictionary'], "{}_D.txt".format(args["prefix"])) file_z = os.path.join(args['output'], "{}_z.txt".format(args["prefix"])) #Start the loop! for m in range(M): print ('M: '+str(m)) seed = np.random.randint(max_iterations + 1, high = 4294967295) np.random.seed(seed) u_old = np.random.random(T) num_iterations = 0 delta = 2 * epsilon while num_iterations < max_iterations and delta > epsilon: _U_.set(list(u_old)) if num_iterations > 0 else _U_.set(list([T, seed])) v = S.map(vector_matrix).flatten().foldby(lambda (key, value): key, partition_reduction, 0, combine=add).compute() r = client.gather(v) v = np.take(sorted(r), indices = 1, axis = 1) indices = np.sort(select_topr(v, R)) _I_.set(list(indices)) _VI_.set(list(v[indices])) u_newt = S.map(matrix_vector).compute() u_new = client.gather(u_newt) u_new = np.take(sorted(u_new), indices = 1, axis = 1) # Subtract off the mean and normalize. u_new -= u_new.mean()