def generate(self, number: int, method: str = 'poisson', regenerate: bool = False) -> None: """Generates an ensemble of matrices and estimates standard deviation Perturbs the initial raw matrix using either a Gaussian or Poisson process, unfolds them and applies the first generation method to them. Uses the variation to estimate standard deviation of each step. Args: number: The number of perturbed matrices to generate. method: The stochastic method to use to generate the perturbations Can be 'gaussian' or 'poisson'. regenerate: Whether to use already generated files (False) or generate them all anew (True). """ assert self.raw is not None, "Set the raw matrix" assert self.unfolder is not None, "Set unfolder" assert self.first_generation_method is not None, \ "Set first generation method" self.size = number self.regenerate = regenerate LOG.info(f"Start normalization with {self.nprocesses} cpus") pool = ProcessPool(nodes=self.nprocesses) ss = np.random.SeedSequence(self.seed) iterator = pool.imap(self.step, range(number), ss.spawn(number), repeat(method)) ensembles = np.array(list(tqdm(iterator, total=number))) pool.close() pool.join() pool.clear() raw_ensemble = ensembles[:, 0, :, :] unfolded_ensemble = ensembles[:, 1, :, :] firstgen_ensemble = ensembles[:, 2, :, :] # TODO Move this to a save step self.raw.save(self.path / 'raw.npy') # saving for firstgen is in step due to pickling self.firstgen = Matrix(path=self.path / 'firstgen.npy') # Calculate standard deviation raw_ensemble_std = np.std(raw_ensemble, axis=0) raw_std = Matrix(raw_ensemble_std, self.raw.Eg, self.raw.Ex, state='std') raw_std.save(self.path / "raw_std.npy") unfolded_ensemble_std = np.std(unfolded_ensemble, axis=0) unfolded_std = Matrix(unfolded_ensemble_std, self.raw.Eg, self.raw.Ex, state='std') unfolded_std.save(self.path / "unfolded_std.npy") firstgen_ensemble_std = np.std(firstgen_ensemble, axis=0) firstgen_std = Matrix(firstgen_ensemble_std, self.firstgen.Eg, self.firstgen.Ex, state='std') firstgen_std.save(self.path / "firstgen_std.npy") self.std_raw = raw_std self.std_unfolded = unfolded_std self.std_firstgen = firstgen_std self.raw_ensemble = raw_ensemble self.unfolded_ensemble = unfolded_ensemble self.firstgen_ensemble = firstgen_ensemble
def calculate_ldos(self, npts, emax, emin, **args): self.emax = emax self.emin = emin self.npts = npts self.phi = args['phi'] self.x = array([[0.0 for i in range(self.npts[0])] for j in range(self.npts[1])]) self.y = array([[0.0 for i in range(self.npts[0])] for j in range(self.npts[1])]) self.z = array([[0.0 for i in range(self.npts[0])] for j in range(self.npts[1])]) self.ldos = array([[[0.0 for j in range(self.npts[0])] for i in range(self.npts[1])] for k in range(len(self.orbitals))]) if 'nprocs' in args: self.nprocs = int(args['nprocs']) if 'tip_disp' in args: self.tip_disp = float(args['tip_disp']) #the list exclude includes the indices of atoms to exlcude from LDOS integration self.exclude = [] if 'exclude' in args: self.exclude_args = args['exclude'] counter = 0 for i in self.atomtypes: if i in args['exclude']: for j in range(self.atomnums[self.atomtypes.index(i)]): self.exclude.append(counter) counter += 1 else: counter += self.atomnums[self.atomtypes.index(i)] print(str(len(self.exclude)) + ' atoms excluded from LDOS averaging') if 'unit_cell_num' in args: self.unit_cell_num = args['unit_cell_num'] for i in range(-1 * self.unit_cell_num, self.unit_cell_num + 1): for j in range(-1 * self.unit_cell_num, self.unit_cell_num + 1): for k in self.coord: self.periodic_coord.append(k + self.lv[0] * i + self.lv[1] * j) self.periodic_coord = array(self.periodic_coord) for i in range(len(self.energies)): if self.energies[i] < self.emin: self.estart = i if self.energies[i] > self.emax: self.eend = i break if self.energies[0] > self.emin: self.estart = 0 self.emin = self.energies[0] print( 'specified emin is less than minimum energy in DOSCAR. setting emin to {}' .format(self.emax)) if self.energies[-1] < self.emax: self.eend = len(self.energies) - 1 self.emax = self.energies[-1] print( 'specified emax exceeds maximum energy in DOSCAR. setting emax to {}' .format(self.emax)) if self.phi != 0: self.K = array([ tunneling_factor(self.emax, i, self.phi) for i in self.energies[self.estart:self.eend] ]) else: self.K = array([1.0 for i in range(self.estart - self.eend)]) for i in range(self.npts[1]): for j in range(self.npts[0]): pos = array([0.0, 0.0, max(self.coord[:, 2]) + self.tip_disp]) pos += self.lv[0] * (j + 0.5) / (self.npts[0]) + self.lv[1] * ( i + 0.5) / (self.npts[1]) self.x[i][j], self.y[i][j], self.z[i][j] = pos[0], pos[1], pos[ 2] start = time() #executes ldos integration in parallel on a ProcessPool of self.nprocs processors if self.nprocs > 1: pool = ProcessPool(self.nprocs) output = pool.map( self.integrator, [i for i in range(self.npts[1]) for j in range(self.npts[0])], [j for i in range(self.npts[1]) for j in range(self.npts[0])]) self.ldos = sum(output) pool.close() #executes ldos integration on a single processor else: for i in range(self.npts[1]): for j in range(self.npts[0]): pos = array([self.x[i][j], self.y[i][j], self.z[i][j]]) counter = 1 for k in self.periodic_coord: if counter == sum(self.atomnums) + 1: counter = 1 if counter - 1 not in self.exclude: posdiff = norm(pos - k) sf = exp(-1.0 * posdiff * self.K * 1.0e-10) for l in range(len(self.dos[counter])): self.ldos[l][i][j] += sum( self.dos[counter][l][self.estart:self.eend] * sf) counter += 1 print('total time to integrate {} points: {} seconds on {} processors'. format(self.npts[0] * self.npts[1], time() - start, self.nprocs))
def new(self): return tf.data.Dataset.from_generator( self._generator, output_types=('int8', 'int8', 'bool', 'float32', 'int32'), output_shapes=((None, *self.shapes[0]), (None, *self.shapes[0]), (None, *self.shapes[1]), (None, *self.shapes[2]), (None, *self.shapes[3]))) if __name__ == '__main__': GEN_ENDED_AT = int(input()) GEN_ENDS_AT = int(input()) mp.set_start_method('spawn') pool = ProcessPool(mp.cpu_count()) critic = Critic([64, 64, 64, 64, 32, 32, 32, 32, 16, 16], NUM_ACT, STOCK_X) critic(critic.stock) if GEN_ENDED_AT >= 0: with open(f'ddrive/{GEN_ENDED_AT}.txt', 'rb') as f: weights = pickle.loads(lzma.decompress(base64.b85decode(f.read()))) critic.set_weights(weights) critic.compile(optimizer=tf.keras.optimizers.SGD(0.0001), loss='mse') cg = CellGroup() for gen in range(GEN_ENDED_AT + 1, GEN_ENDS_AT + 1):
def __init__(self, evol_params): ''' Initialize evolutionary search ARGS: evol_params: dict required keys - pop_size: int - population size, genotype_size: int - genotype_size, fitness_function: function - a user-defined function that takes a genotype as arg and returns updated genotype and float fitness value elitist_fraction: float - fraction of top performing individuals to retain for next generation mutation_variance: float - variance of the gaussian distribution used for mutation noise optional keys - fitness_args: list-like - optional additional arguments to pass while calling fitness function list such that len(list) == 1 or len(list) == pop_size num_processes: int - pool size for multiprocessing.pool.Pool - defaults to os.cpu_count() ''' # check for required keys required_keys = [ 'pop_size', 'genotype_size', 'fitness_function', 'elitist_fraction', 'mutation_variance' ] for key in required_keys: if key not in evol_params.keys(): raise Exception( 'Argument evol_params does not contain the following required key: {}' .format(key)) # checked for all required keys self.pop_size = evol_params['pop_size'] self.genotype_size = evol_params['genotype_size'] self.fitness_function = evol_params['fitness_function'] self.elitist_fraction = int( np.ceil(evol_params['elitist_fraction'] * self.pop_size)) self.mutation_variance = evol_params['mutation_variance'] # validating fitness function assert self.fitness_function, "Invalid fitness_function" rand_genotype = np.random.rand(self.genotype_size) fitness_return = self.fitness_function(rand_genotype) assert len( fitness_return ) == 2, "Fitness function must return 2 items - updated_genotype and fitness" updated_genotype = fitness_return[0] rand_genotype_fitness = fitness_return[1] assert type(rand_genotype_fitness) == type(0.) or type(rand_genotype_fitness) in np.sctypes['float'],\ "Invalid return type for second return of fitness_function. Should be float or np.dtype('np.float*')" assert len(updated_genotype) == self.genotype_size, \ "Invalid length for first return type of fitness function: length should be equal to genotype_size={}".format(self.genotype_size) # create other required data self.num_processes = evol_params.get('num_processes', None) self.pop = np.random.rand(self.pop_size, self.genotype_size) self.fitness = np.zeros(self.pop_size) self.num_batches = int(self.pop_size / self.num_processes) self.num_remainder = int(self.pop_size % self.num_processes) # check for fitness function kwargs if 'fitness_args' in evol_params.keys(): optional_args = evol_params['fitness_args'] assert len(optional_args) == 1 or len(optional_args) == self.pop_size,\ "fitness args should be length 1 or pop_size." self.optional_args = optional_args else: self.optional_args = None # creating the global process pool to be used across all generations global __search_process_pool __search_process_pool = ProcessPool(self.num_processes) time.sleep(0.5)
return check_monotone(meaning, B, AandB + 1, AminusB) and check_monotone( meaning, B, AandB, AminusB + 1) def check_monotone(meaning, B, AandB=0, AminusB=0, truth_found=False): if is_monotone[B][AandB][AminusB] is None: is_monotone[B][AandB][AminusB] = check_monotone_inner( meaning, B, AandB, AminusB, truth_found) return is_monotone[B][AandB][AminusB] monotone = True for B in range(args.model_size + 1): if not check_monotone(meaning, B): monotone = False break is_monotone = process_pool.map(check_monotone, meanings) return set(i for (i, val) in enumerate(is_monotone) if val) with ProcessPool(nodes=args.processes) as process_pool: a_up = get_monotone_quantifiers('A', 'up', process_pool) b_up = get_monotone_quantifiers('B', 'up', process_pool) a_down = get_monotone_quantifiers('A', 'down', process_pool) b_down = get_monotone_quantifiers('B', 'down', process_pool) indices = a_up.union(b_up).union(a_down).union(b_down) file_util.dump_dill(indices, 'monotone_expression_indices.dill')
def run_pso(self, function, searchspace, target, nparticles, maxiter, precision, domain, verbose=True, pool_size=None): """ Performs a PSO for the given function in the searchspace, looking for the target, which is in the output space. The asynchronous evaluation means the exact definition of iterations may be lost. To preserve some sense of this an iteration is defined to be `nparticles` evaluations performed. This means that not every particle is updated in the history for every iteration. However, the total number of function evaluations (iterations * nparticles) will still be preserved for this definition. function - the function to be optimized. Its domain must include the seachspace and its output must be in the space of target. searchspace - np.array((ssdim, 2)) target - Not used by `ImplicitTargetPSO`. `function` should include any necessary target data. nparticles - number of particles to use in the optimization maxiter - maximum number of iterations to the optimization routine precision - how close to the target to attemp to get domain - absolute boundaries on the trial solutions/particles pool_size - (int) set the ProcessingPool size explicitly. Defaults to 4 if not set. """ if not pool_size: pool_size = 4 # update attributes self.maxiter = maxiter self.precision = precision # search space dimensionality if searchspace.shape[1] != 2: print('WARNING! searchspace does not have dimenstions (N,2).') ssdim = searchspace.shape[0] # init particle positions and velocities xpart = np.random.random((nparticles, ssdim)) for ii in range(ssdim): xpart[:, ii] = (searchspace[ii, 1] - searchspace[ii, 0]) * xpart[:, ii] + searchspace[ ii, 0] # scale the uniform radnom dist vpart = np.zeros(xpart.shape) # init particle best solution pbest = 1.0 * xpart # NOTE: Best not to assume the form of obj function input cpbest = np.array([self.cost(function(*xp), target) for xp in pbest]) # init global best solutions im = np.argmin(cpbest) gbest = pbest[im] cgbest = cpbest[im] if False: return xpart, vpart, pbest, cpbest, gbest, cgbest # intermediate arrays # multiply by 1.0 to make copies not bind references xarr = 1.0 * xpart[:, :, None] varr = 1.0 * vpart[:, :, None] parr = 1.0 * pbest[:, :, None] cparr = 1.0 * cpbest[:, None] garr = 1.0 * gbest[:, None] cgarr = 1.0 * np.array([cgbest]) iternum = 0 evalnum = 0 # Asynchronous process management pool = ProcessPool(pool_size) results = [] # initial submission for fi in range(nparticles): # update velocity vpart[fi] = self.velocity(vpart[fi], xpart[fi], pbest[fi], gbest) # update position xpart[fi] = xpart[fi] + vpart[fi] # keeps particles inside the absolute boundaries given by `domain` xpart[fi] = np.maximum(xpart[fi], domain[:, 0]) xpart[fi] = np.minimum(xpart[fi], domain[:, 1]) # compute cost of new position results.append(pool.apipe(function, xpart[fi])) t1 = time.time() while (iternum <= maxiter) and (cgbest > precision): for i, res in enumerate(results): if res.ready(): # Get result and update cpp = res.get() # update best position if cpp < cpbest[i]: pbest[i] = xpart[i] cpbest[i] = cpp if cpp < cgbest: gbest = xpart[i] cgbest = cpp # update velocity vpart[i] = self.velocity(vpart[i], xpart[i], pbest[i], gbest) # update position xpart[i] = xpart[i] + vpart[i] # keeps particles inside the absolute boundaries given by `domain` xpart[i] = np.maximum(xpart[i], domain[:, 0]) xpart[i] = np.minimum(xpart[i], domain[:, 1]) # Resubmit results[i] = pool.apipe(function, xpart[i]) evalnum += 1 current_iternum = evalnum // nparticles if (current_iternum > iternum) or (cgbest < precision): xarr = np.concatenate((xarr, xpart[:, :, None]), axis=2) varr = np.concatenate((varr, vpart[:, :, None]), axis=2) parr = np.concatenate((parr, pbest[:, :, None]), axis=2) cparr = np.concatenate((cparr, cpbest[:, None]), axis=1) garr = np.concatenate((garr, gbest[:, None]), axis=1) cgarr = np.append(cgarr, cgbest) iternum = current_iternum t2 = time.time() if verbose: print('optimization took {:5.2f} seconds'.format(*[t2 - t1])) return xarr, varr, parr, cparr, garr, cgarr
def train(self, outer_n_epoch, outer_l2, outer_std, outer_learning_rate, outer_n_samples_per_ep, n_cpu=None, fix_ppo=None, **_): # Requires more than 1 MPI process. assert MPI.COMM_WORLD.Get_size() > 1 assert n_cpu is not None if fix_ppo: ppo_factor_schedule = PiecewiseSchedule([(0, 1.), (int(outer_n_epoch / 16), 0.5)], outside_value=0.5) else: ppo_factor_schedule = PiecewiseSchedule([(0, 1.), (int(outer_n_epoch / 8), 0.)], outside_value=0.) outer_lr_scheduler = PiecewiseSchedule([(0, outer_learning_rate), (int(outer_n_epoch / 2), outer_learning_rate * 0.1)], outside_value=outer_learning_rate * 0.1) def objective(env, theta, pool_rank): agent = self.create_agent(env, pool_rank) loss_n_params = len(agent.get_loss().get_params_1d()) agent.get_loss().set_params_1d(theta[:loss_n_params]) if self._outer_evolve_policy_init: agent.pi.set_params_1d(theta[loss_n_params:]) # Agent lifetime is inner_opt_freq * inner_max_n_epoch return run_batch_rl(env, agent, inner_opt_freq=self._inner_opt_freq, inner_buffer_size=self._inner_buffer_size, inner_max_n_epoch=self._inner_max_n_epoch, pool_rank=pool_rank, ppo_factor=ppo_factor_schedule.value(epoch), epoch=None) # Initialize theta. theta = self.init_theta(self._env) num_params = len(theta) logger.log('Theta dim: {}'.format(num_params)) # Set up outer loop parameter update schedule. adam = Adam(shape=(num_params,), beta1=0., stepsize=outer_learning_rate, dtype=np.float32) # Set up intra-machine parallelization. logger.log('Using {} proceses per MPI process.'.format(n_cpu)) from pathos.multiprocessing import ProcessPool pool = ProcessPool(nodes=n_cpu) begin_time, best_test_return = time.time(), -np.inf for epoch in range(outer_n_epoch): # Anneal outer learning rate adam.stepsize = outer_lr_scheduler.value(epoch) noise = np.random.randn(outer_n_samples_per_ep // NUM_EQUAL_NOISE_VECTORS, num_params) noise = np.repeat(noise, NUM_EQUAL_NOISE_VECTORS, axis=0) theta_noise = theta[np.newaxis, :] + noise * outer_std theta_noise = theta_noise.reshape(MPI.COMM_WORLD.Get_size(), -1) # Distributes theta_noise vectors to all nodes. logger.log('Scattering all perturbed theta vectors and running inner loops ...') recvbuf = np.empty(theta_noise.shape[1], dtype='float') MPI.COMM_WORLD.Scatter(theta_noise, recvbuf, root=0) theta_noise = recvbuf.reshape(-1, num_params) # Noise vectors are scattered, run inner loop, parallelized over `pool_size` processes. start_time = time.time() pool_size = int(outer_n_samples_per_ep / MPI.COMM_WORLD.Get_size()) results = pool.amap(objective, [self._env] * pool_size, theta_noise, range(pool_size)).get() # Extract relevant results returns = [utils.ret_to_obj(r['ep_final_rew']) for r in results] update_time = [np.mean(r['update_time']) for r in results] env_time = [np.mean(r['env_time']) for r in results] ep_length = [np.mean(r['ep_length']) for r in results] n_ep = [len(r['ep_length']) for r in results] mean_ep_kl = [np.mean(r['ep_kl']) for r in results] final_rets = [np.mean(r['ep_return'][-3:]) for r in results] # We gather the results at node 0 recvbuf = np.empty([MPI.COMM_WORLD.Get_size(), 7 * pool_size], # 7 = number of scalars in results vector dtype='float') if MPI.COMM_WORLD.Get_rank() == 0 else None results_processed_arr = np.asarray( [returns, update_time, env_time, ep_length, n_ep, mean_ep_kl, final_rets], dtype='float').ravel() MPI.COMM_WORLD.Gather(results_processed_arr, recvbuf, root=0) # Do outer loop update calculations at node 0 if MPI.COMM_WORLD.Get_rank() == 0: end_time = time.time() logger.log( 'All inner loops completed, returns gathered ({:.2f} sec).'.format( time.time() - start_time)) results_processed_arr = recvbuf.reshape(MPI.COMM_WORLD.Get_size(), 7, pool_size) results_processed_arr = np.transpose(results_processed_arr, (0, 2, 1)).reshape(-1, 7) results_processed = [dict(returns=r[0], update_time=r[1], env_time=r[2], ep_length=r[3], n_ep=r[4], mean_ep_kl=r[5], final_rets=r[6]) for r in results_processed_arr] returns = np.asarray([r['returns'] for r in results_processed]) # ES update noise = noise[::NUM_EQUAL_NOISE_VECTORS] returns = np.mean(returns.reshape(-1, NUM_EQUAL_NOISE_VECTORS), axis=1) theta_grad = relative_ranks(returns).dot(noise) / outer_n_samples_per_ep \ - outer_l2 * theta theta -= adam.step(theta_grad) # Perform `NUM_TEST_SAMPLES` evaluation runs on root 0. if epoch % self._outer_plot_freq == 0 or epoch == outer_n_epoch - 1: start_test_time = time.time() logger.log('Performing {} test runs in parallel on node 0 ...'.format(NUM_TEST_SAMPLES)) # Evaluation run with current theta test_results = pool.amap( objective, [self._env] * NUM_TEST_SAMPLES, theta[np.newaxis, :] + np.zeros((NUM_TEST_SAMPLES, num_params)), range(NUM_TEST_SAMPLES) ).get() plotting.plot_results(epoch, test_results) test_return = np.mean([utils.ret_to_obj(r['ep_return']) for r in test_results]) if test_return > best_test_return: best_test_return = test_return # Save theta as numpy array. self.save_theta(theta) self.save_theta(theta, str(epoch)) logger.log('Test runs performed ({:.2f} sec).'.format(time.time() - start_test_time)) logger.logkv('Epoch', epoch) utils.log_misc_stats('Obj', logger, returns) logger.logkv('PPOFactor', ppo_factor_schedule.value(epoch)) logger.logkv('EpochTimeSpent(s)', end_time - start_time) logger.logkv('TotalTimeSpent(s)', end_time - begin_time) logger.logkv('BestTestObjMean', best_test_return) logger.dumpkvs()
def ProcessPool( self, key=None, new=False, *args, **kwargs ): process_pool = ProcessPool(*args, **kwargs) self.register_atexit( process_pool ) return process_pool
def GlobalProcessPool( self, *args, **kwargs ): if self.process_pool is None: self.process_pool = ProcessPool(*args, **kwargs) return self.process_pool
def wrapper(*args): with ProcessPool() as p: p.map(insert_func, *args)
### check if point is within polygon bool_list = [] for point in range(0, len(grid_jamaica)): bool_val = grid_jamaica.geometry.iloc[point].within( jamaica.iloc[0].geometry) bool_list.append(bool_val) ### extract points inside grid_jamaica = grid_jamaica[bool_list] grid_jamaica['country'] = 'Jamaica' grid_jamaica.to_file('Input/grid_jamaica.gpkg', driver='GPKG') ### add some additional information grid_jamaica['region'] = str(region) list_years = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] for sample in list_years: grid_jamaica['sample_num'] = str(sample) print(len(grid_jamaica)) pool = ProcessPool(nodes=cpu_count() - 2) output = pool.map(TC_analysis, grid_jamaica.latitude, grid_jamaica.longitude, grid_jamaica.ID_point, grid_jamaica.country, grid_jamaica.region, grid_jamaica.sample_num) output_files = pd.concat(output) output_files.to_csv('Output/TC_Jamaica_' + str(region) + '_' + str(sample) + '.csv', index=False)
def main(argv): try: opts, args = getopt.getopt(argv, "i:t:m:c:o:p:", [ "infile=", "trainfile=", "model=", "cutoff=", "output=", "pool_num=" ]) except getopt.GetoptError: print( 'TrainingTool.py -i <infile> -t <trainfile> -m <model> -c <cutoff> -o <output_name> -p <pool_num>' ) sys.exit(2) for opt, arg in opts: if opt == '-h': print( 'TrainingTool.py -i <infile> -t <trainfile> -m <model> -c <cutoff> -o <output_name> -p <pool_num>' ) sys.exit() elif opt in ("-i", "--infile"): infile = arg elif opt in ("-t", "--trainfile"): trainfile = arg elif opt in ("-m", "--model"): model = arg elif opt in ("-c", "--cutoff"): cutoff = arg elif opt in ("-p", "--pool_num"): pool_num = arg elif opt in ("-o", "--output_name"): output_name = arg pool = ProcessPool(int(pool_num)) ##Read predict sample... ##modify the name/order of features df_predict = pd.read_csv(infile, delimiter='\t', low_memory=False) colnames = df_predict.columns.values.tolist() #df_predict=changeName(colnames,df_predict) column_num = df_predict.shape[1] df_predict.rename(columns={ colnames[0]: 'V1', colnames[1]: 'V2', colnames[2]: 'V3' }, inplace=True) df_predict = df_predict.set_index(["V1"], append=False, drop=False) chromName_list = df_predict.index.values.tolist() chromName_list_new = [] extract_out = [] for i in chromName_list: if i not in chromName_list_new: chromName_list_new.append(i) ##extract features form the output of Extractfeatures.sh setDir(output_name + "/tmp2") list_df_predict = [df_predict for row in range(len(chromName_list_new))] list_output_name = [output_name for row in range(len(chromName_list_new))] extract_out = pool.map(extractFunc, list_df_predict, chromName_list_new, list_output_name) ##merge features feature_num = 2 * column_num df_features = pd.DataFrame(columns=[feature_num]) rootdir = output_name + '/tmp2' list_path = os.listdir(rootdir) list_rootdir = [rootdir for row in range(len(list_path))] list_column_num = [column_num for row in range(len(list_path))] list_output_name = [output_name for row in range(len(list_path))] ##load the scale and model of training set df_train = pd.read_csv(trainfile, delimiter='\t', header=None) data_train = df_train.values.astype('float') X = data_train[:, 1:40] Y = data_train[:, 0] for i in range(len(Y)): if Y[i] == 0: Y[i] = math.log(Y[i] + 1) else: Y[i] = math.log(Y[i]) scaler_x = StandardScaler().fit(X) scaler_y = StandardScaler().fit(Y.reshape(-1, 1)) clf = joblib.load(model) print("Loading model success!") list_clf = [clf for row in range(len(list_path))] list_scaler_x = [scaler_x for row in range(len(list_path))] list_scaler_y = [scaler_y for row in range(len(list_path))] list_cutoff = [cutoff for row in range(len(list_path))] ##scale the features and predict setDir(output_name + "/predict_results/") pool.map(scaleAndPredict, list_rootdir, list_path, list_scaler_x, list_scaler_y, list_clf, list_column_num, list_output_name, list_cutoff)
def data_generator(annotation_lines, input_shape, anchors, nb_classes, batch_size=1, augment=True, max_boxes=20, jitter=0.3, img_scaling=1.2, resize_img=True, allow_rnd_shift=True, color_hue=0.1, color_sat=1.5, color_val=1.5, flip_horizontal=True, flip_vertical=False, bbox_overlap=0.95, nb_threads=1): """data generator for fit_generator :param list(str) annotation_lines: :param int batch_size: :param ndarray anchors: :param int nb_classes: :param tuple(int,int) input_shape: CNN input size :param bool augment: perform augmentation :param int max_boxes: maximal number of training bounding boxes :param float jitter: :param float color_hue: range of change of HSV color HUE :param float color_sat: range of change of HSV color SAT :param float color_val: range of change of HSV color value :param float img_scaling: upper image scaling :param bool flip_horizontal: allow random flop image/boxes vertical :param bool flip_vertical: allow random flop image/boxes horizontal :param bool resize_img: resize image to fit fully to CNN :param bool allow_rnd_shift: allow shifting image not only centered crop :param float bbox_overlap: threshold in case cut image, drop all boxes with lower overlap :param float|int nb_threads: nb threads running in parallel :return: >>> np.random.seed(0) >>> path_img = os.path.join(update_path('model_data'), 'bike-car-dog.jpg') >>> line = path_img + ' 100,150,200,250,0 300,50,400,200,1' >>> anchors = get_anchors(os.path.join(update_path('model_data'), 'yolo_anchors.csv')) >>> gen = data_generator([line], (416, 416), anchors, 3, nb_threads=2) >>> batch = next(gen) >>> len(batch) 2 >>> [b.shape for b in batch[0]] [(1, 416, 416, 3), (1, 13, 13, 3, 8), (1, 26, 26, 3, 8), (1, 52, 52, 3, 8)] >>> gen = data_generator([line], (416, 416), anchors, 3, augment=False) >>> batch = next(gen) >>> len(batch) 2 >>> [b.shape for b in batch[0]] [(1, 416, 416, 3), (1, 13, 13, 3, 8), (1, 26, 26, 3, 8), (1, 52, 52, 3, 8)] """ nb_lines = len(annotation_lines) circ_i = 0 if nb_lines == 0 or batch_size <= 0: return None color_hue = abs(color_hue) color_sat = color_sat if color_sat > 1 else 1. / color_sat color_val = color_val if color_val > 1 else 1. / color_val nb_threads = nb_workers(nb_threads) pool = ProcessPool(nb_threads) if nb_threads > 1 else None _wrap_rand_data = partial( get_augmented_data, input_shape=input_shape, augment=augment, max_boxes=max_boxes, jitter=jitter, resize_img=resize_img, img_scaling=img_scaling, allow_rnd_shift=allow_rnd_shift, hue=color_hue, sat=color_sat, val=color_val, flip_horizontal=flip_horizontal, flip_vertical=flip_vertical, bbox_overlap=bbox_overlap, ) while True: if circ_i < batch_size: # shuffle while you are starting new cycle np.random.shuffle(annotation_lines) batch_image_data = [] batch_box_data = [] # create the list of lines to be loaded in batch annot_lines = annotation_lines[circ_i:circ_i + batch_size] batch_offset = (circ_i + batch_size) - nb_lines # chekck if the loaded batch size have sufficient size if batch_offset > 0: annot_lines += annotation_lines[:batch_offset] # multiprocessing loading of batch data map_process = pool.imap if pool else map for image, box in map_process(_wrap_rand_data, annot_lines): batch_image_data.append(image) batch_box_data.append(box) circ_i = (circ_i + batch_size) % nb_lines batch_image_data = np.array(batch_image_data) batch_box_data = np.array(batch_box_data) y_true = preprocess_true_boxes(batch_box_data, input_shape, anchors, nb_classes) batch = [batch_image_data, *y_true], np.zeros(batch_size) yield batch gc.collect() if pool: pool.close() pool.join() pool.clear()
import random from pathos.multiprocessing import ProcessPool import Generator from Monotonicity import MonotonicityMeasurer import matplotlib.pyplot as plt universe = Generator.generate_simplified_models(10) meanings = [tuple(random.choice([True, False]) for i in range(len(universe))) for j in range(5000)] measurer_up = MonotonicityMeasurer(universe, 10, 'B') measurer_down = MonotonicityMeasurer(universe, 10, 'B', down=True) with ProcessPool(4) as process_pool: monotonicities_up = process_pool.map(measurer_up, meanings) monotonicities_down = process_pool.map(measurer_down, meanings) monotonicities = process_pool.map(max, monotonicities_up, monotonicities_down) fig = plt.figure() plt.hist(monotonicities,bins=30,range=[0,1]) plt.show() fig.savefig('results/random_monotone.png', bbox_inches='tight')
def iterate_mproc_map(wrap_func, iterate_vals, nb_workers=CPU_COUNT, desc='', ordered=True): """ create a multi-porocessing pool and execute a wrapped function in separate process :param func wrap_func: function which will be excited in the iterations :param list iterate_vals: list or iterator which will ide in iterations, if -1 then use all available threads :param int nb_workers: number og jobs running in parallel :param str|None desc: description for the bar, if it is set None, bar is suppressed :param bool ordered: whether enforce ordering in the parallelism Waiting reply on: * https://github.com/celery/billiard/issues/280 * https://github.com/uqfoundation/pathos/issues/169 See: * https://sebastianraschka.com/Articles/2014_multiprocessing.html * https://github.com/nipy/nipype/pull/2754 * https://medium.com/contentsquare-engineering-blog/multithreading-vs-multiprocessing-in-python-ece023ad55a * http://mindcache.me/2015/08/09/ python-multiprocessing-module-daemonic-processes-are-not-allowed-to-have-children.html * https://medium.com/@bfortuner/python-multithreading-vs-multiprocessing-73072ce5600b >>> list(iterate_mproc_map(np.sqrt, range(5), nb_workers=1, desc=None)) # doctest: +ELLIPSIS [0.0, 1.0, 1.41..., 1.73..., 2.0] >>> list(iterate_mproc_map(sum, [[0, 1]] * 5, nb_workers=2, ordered=False)) [1, 1, 1, 1, 1] >>> list(iterate_mproc_map(max, [(2, 1)] * 5, nb_workers=2, desc='')) [2, 2, 2, 2, 2] """ iterate_vals = list(iterate_vals) nb_workers = 1 if not nb_workers else int(nb_workers) nb_workers = CPU_COUNT if nb_workers < 0 else nb_workers if desc is not None: pbar = tqdm.tqdm(total=len(iterate_vals), desc=str('%r @%i-threads' % (desc, nb_workers))) else: pbar = None if nb_workers > 1: logging.debug('perform parallel in %i threads', nb_workers) # Standard mproc.Pool created a demon processes which can be called # inside its children, cascade or multiprocessing # https://stackoverflow.com/questions/6974695/python-process-pool-non-daemonic # pool = mproc.Pool(nb_workers) # pool = NonDaemonPool(nb_workers) pool = ProcessPool(nb_workers) # pool = Pool(nb_workers) mapping = pool.imap if ordered else pool.uimap else: logging.debug('perform sequential') pool = None mapping = map for out in mapping(wrap_func, iterate_vals): pbar.update() if pbar else None yield out if pool: pool.close() pool.join() pool.clear() pbar.close() if pbar else None
def test_repeatability(self): import matplotlib.pyplot as mpl import numpy as np start, end = (10, 10), (350, 250) repeats = 2 equal_paths = [] rdrs = np.linspace(-1, 1, 10) risk_sums = [] def make_path(start, end, rdr): algo = RiskGridAStar( ManhattanRiskHeuristic(self.large_diag_environment, risk_to_dist_ratio=rdr)) return algo.find_path(self.large_diag_environment, start, end) def run_params(rdr): paths = [make_path(start, end, rdr) for _ in range(repeats)] equal_paths.append(all([p == paths[0] for p in paths])) if not paths[0]: return [rdr, np.inf] risk_sum = sum([ self.large_diag_environment.grid[n[0], n[1]] for n in paths[0] ]) return [rdr, risk_sum] pool = ProcessPool(nodes=8) params = np.array(rdrs) risk_sums = pool.map(run_params, params) pool.close() # for rdr in rdrs: # paths = [make_path(start, end, rdr) for _ in range(repeats)] # equal_paths.append(all([p == paths[0] for p in paths])) # if not paths[0]: # risk_sums.append([rdr, np.inf]) # continue # risk_sum = sum([n.n for n in paths[0]]) # risk_sums.append([rdr, risk_sum]) # # fig = mpl.figure() # ax = fig.add_subplot(111) # for path in paths: # ax.plot([n.x for n in path], [n.y for n in path], color='red') # im = ax.imshow(self.large_no_diag_environment.grid) # fig.colorbar(im, ax=ax, label='Population') # ax.set_title(f'RiskA* with RDR={rdr:.4g} \n Risk sum={risk_sum:.4g}') # fig.show() risk_sums = np.array(risk_sums) rdr_fig = mpl.figure() ax = rdr_fig.add_subplot(111) ax.scatter(risk_sums[:, 0], risk_sums[:, 1]) # ax.set_xscale('log') ax.set_yscale('symlog') ax.set_xlabel('Risk-Distance Ratio') ax.set_ylabel('Path Risk sum') ax.set_title('Risk Grid A*') rdr_fig.show() self.assertTrue(all(equal_paths), 'Paths are not generated repeatably')
num_processes = 6 if __name__ == "__main__": def process_updates(x): from API_TTERMS import getconf2, query, testingKWT # tid = x[0] tid = 428 testingKWT(tid, '144.167.35.89') conf = getconf2() q_trackers = f"select tid from trackers where userid = '*****@*****.**' or YEAR(date_created) in (2019,2020)" # q_trackers = f"select t.tid from trackers t left join tracker_keyword tk on t.tid = tk.tid where t.tid is null or tk.tid is null or tk.status_percentage < 100 or tk.status != 1 or tk.status_percentage is null or tk.status is null" tracker_result = query(conf, q_trackers) if parallel: process_pool = ProcessPool(num_processes) for record in tqdm(process_pool.imap(process_updates, tracker_result), desc="Terms", ascii=True, file=sys.stdout, total=len(tracker_result)): pass process_pool.close() print("Joining pool") process_pool.join() print("Clearing pool") process_pool.clear() print("Finished!") else: for x in tqdm(tracker_result, desc="Terms",
def test_repeatability(self): import matplotlib.pyplot as mpl import numpy as np from pathos.multiprocessing import ProcessPool from itertools import product start, end = (10, 10), (350, 250) repeats = 2 equal_paths = [] rdrs = np.linspace(-100, 100, 10) jgs = [0] # np.linspace(0, 5000, 2) jls = np.linspace(0, 50, 2) def make_path(start, end, rdr, jg, jl): algo = RiskJumpPointSearchAStar(ManhattanRiskHeuristic( self.large_diag_environment, risk_to_dist_ratio=rdr), jump_gap=jg, jump_limit=jl) return algo.find_path(self.large_diag_environment, start, end) def run_params(rdr, jg, jl): paths = [ make_path(start, end, rdr, jg, jl) for _ in range(repeats) ] equal_paths.append(all([p == paths[0] for p in paths])) if not paths[0]: return [rdr, np.inf, jl, jg] risk_sum = sum([ self.large_diag_environment.grid[n[0], n[1]] for n in paths[0] ]) return [rdr, risk_sum, jl, jg] pool = ProcessPool(nodes=8) pool.restart(force=True) params = np.array(list(product(rdrs, jgs, jls))) risk_sums = pool.map(run_params, params[:, 0], params[:, 1], params[:, 2]) pool.close() # risk_sums = [] # for rdr, jg, jl in product(rdrs, jgs, jls): # paths = [make_path(start, end, rdr, jg, jl) for _ in range(repeats)] # equal_paths.append(all([p == paths[0] for p in paths])) # if not paths[0]: # risk_sums.append([rdr, np.inf, jl, jg]) # continue # risk_sum = sum([n.n for n in paths[0]]) # risk_sums.append([rdr, risk_sum, jl, jg]) # # fig = mpl.figure() # ax = fig.add_subplot(111) # for path in paths: # ax.plot([n.x for n in path], [n.y for n in path], color='red') # im = ax.imshow(self.large_diag_environment.grid) # fig.colorbar(im, ax=ax, label='Population') # ax.set_title(f'Risk JPS A* with RDR={rdr:.4g}, JL={jl} \n Risk sum={risk_sum:.4g}') # fig.show() risk_sums = np.array(risk_sums) jl_fig = mpl.figure() ax = jl_fig.add_subplot(111) sc = ax.scatter(risk_sums[:, 0], risk_sums[:, 1], c=risk_sums[:, 2]) ax.set_yscale('symlog') ax.set_xlabel('Risk-Distance Ratio') ax.set_ylabel('Path Risk sum') ax.set_title('R JPS+ A* Jump Limits') jl_fig.colorbar(sc, ax=ax, label='Jump Limit') jl_fig.show() jg_fig = mpl.figure() ax = jg_fig.add_subplot(111) sc = ax.scatter(risk_sums[:, 0], risk_sums[:, 1], c=risk_sums[:, 3]) ax.set_yscale('symlog') ax.set_xlabel('Risk-Distance Ratio') ax.set_ylabel('Path Risk sum') ax.set_title('R JPS+ A* Jump Gaps') jg_fig.colorbar(sc, ax=ax, label='Jump Gap') jg_fig.show() self.assertTrue(all(equal_paths), 'Paths are not generated repeatably')
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation) # Copyright (c) 1997-2016 California Institute of Technology. # Copyright (c) 2016-2022 The Uncertainty Quantification Foundation. # License: 3-clause BSD. The full license text is available at: # - https://github.com/uqfoundation/pathos/blob/master/LICENSE """ minimal interface to python's multiprocessing module Notes: This module has been deprecated in favor of ``pathos.pools``. """ from pathos.multiprocessing import ProcessPool, __STATE from pathos.threading import ThreadPool #XXX: thread __STATE not imported from pathos.helpers import cpu_count mp = ProcessPool() tp = ThreadPool() __all__ = ['mp_map'] # backward compatibility #FIXME: deprecated... and buggy! (fails to dill on imap/uimap) def mp_map(function, sequence, *args, **kwds): '''extend python's parallel map function to multiprocessing Inputs: function -- target function sequence -- sequence to process in parallel Additional Inputs: nproc -- number of 'local' cpus to use [defaut = 'autodetect']
def main(): main_dir = Path( r'P:\Synchronize\IWS\Testings\fourtrans_practice\multisite_phs_spec_corr' ) os.chdir(main_dir) interp_var = 'temp' ft_type = 'mag' #========================================================================== if interp_var == 'temp': # MEAN TEMPERATURE in_data_file = os.path.join(f'temperature_{ft_type}_spec_df.csv') in_vgs_file = os.path.join(r'temperature_cftns.csv') in_stns_coords_file = os.path.join(os.path.dirname(in_data_file), r'temperature_avg_coords.csv') out_dir = r'temperature_kriging' var_units = u'\u2103' # 'centigrade' var_name = 'temperature' out_krig_net_cdf_file = f'kriging_1km_{ft_type}.nc' # interpolated values # can be int, float, 'min_in'/'max_in' or None # min_var_val = 'min_in' # max_var_val = 'max_in' # min_var_val = None # max_var_val = None #========================================================================== #========================================================================== elif interp_var == 'ppt': # PRECIPITATION in_data_file = os.path.join(f'precipitation_{ft_type}_spec_df.csv') in_vgs_file = os.path.join(r'precipitation_cftns.csv') in_stns_coords_file = os.path.join(os.path.dirname(in_data_file), r'precipitation_coords.csv') out_dir = r'precipitation_kriging' var_units = 'mm' var_name = 'precipitation' out_krig_net_cdf_file = f'kriging_1km_{ft_type}.nc' # interpolated values # can be int, float, 'min_in'/'max_in' or None # min_var_val = 'min_in' # max_var_val = 'max_in' # min_var_val = None # max_var_val = None #========================================================================== else: raise ValueError(f'Invalid value for interp_var: {interp_var}!') out_krig_net_cdf_file = out_krig_net_cdf_file # assuming in_drift_raster and in_stns_coords_file and in_bounds_shp_file # have the same coordinates system # assuming in_drift_rasters_list have the same cell sizes, bounds and NDVs # basically they are copies of each other except for the drift values in_drift_rasters_list = ([ r'P:\Synchronize\IWS\QGIS_Neckar\raster\lower_de_gauss_z3_1km.tif' ]) # in_bounds_shp_file = ( # os.path.join(r'P:\Synchronize\IWS\QGIS_Neckar\raster', # r'taudem_out_spate_rockenau\watersheds.shp')) in_bounds_shp_file = (os.path.join( r'P:\Synchronize\IWS\QGIS_Neckar\raster\taudem_out_spate_rockenau\watersheds.shp' )) align_ras_file = in_drift_rasters_list[0] out_figs_dir = os.path.join(out_dir, 'krige_figs') x_coords_lab = 'X' y_coords_lab = 'Y' time_dim_lab = 'freq' nc_mode = 'w' # min_ppt_thresh = 1.0 idw_exp = 5 n_cpus = 1 buffer_dist = 20e3 sec_buffer_dist = 2e3 in_sep = str(';') ord_krige_flag = True sim_krige_flag = True edk_krige_flag = True idw_flag = True plot_figs_flag = True # ord_krige_flag = False sim_krige_flag = False edk_krige_flag = False idw_flag = False plot_figs_flag = False os.chdir(main_dir) if not os.path.exists(out_dir): os.mkdir(out_dir) if (not os.path.exists(out_figs_dir)) and plot_figs_flag: os.mkdir(out_figs_dir) # print('min_var_val:', min_var_val) # print('max_var_val:', max_var_val) print('idw_exp:', idw_exp) print('n_cpus:', n_cpus) print('nc_mode:', nc_mode) print('var_name:', var_name) print('out_dir:', out_dir) print('in_bounds_shp_file:', in_bounds_shp_file) print('out_krig_net_cdf_file:', out_krig_net_cdf_file) assert any([ord_krige_flag, sim_krige_flag, edk_krige_flag, idw_flag]) #========================================================================== # read the data frames #========================================================================== in_data_df = pd.read_csv(in_data_file, sep=in_sep, index_col=0, encoding='utf-8') in_vgs_df = pd.read_csv(in_vgs_file, sep=in_sep, index_col=0, encoding='utf-8') in_stns_coords_df = pd.read_csv(in_stns_coords_file, sep=in_sep, index_col=0, encoding='utf-8') all_stns = in_data_df.columns.intersection(in_stns_coords_df.index) assert all_stns.shape[0] in_data_df = in_data_df.loc[:, all_stns] in_stns_coords_df = in_stns_coords_df.loc[all_stns, :] #========================================================================== # Get stations that are around/in the bounds_shp only #========================================================================== bds_vec = ogr.Open(in_bounds_shp_file) assert bds_vec bds_lyr = bds_vec.GetLayer(0) feat_buffs_list = [] feat_sec_buffs_list = [] for feat in bds_lyr: # just to get the names of the catchments geom = feat.GetGeometryRef().Clone() assert geom feat_buffs_list.append(geom.Buffer(buffer_dist)) feat_sec_buffs_list.append(geom.Buffer(sec_buffer_dist)) bds_vec.Destroy() assert feat_buffs_list and feat_sec_buffs_list print(len(feat_buffs_list), 'polygons in the in_bounds_shp_file...') fin_stns = [] for poly in feat_buffs_list: for stn in all_stns: if stn in fin_stns: continue curr_pt = cnvt_to_pt(*in_stns_coords_df.loc[stn, ['X', 'Y']].values) if chk_cntmt(curr_pt, poly): fin_stns.append(stn) assert fin_stns print('%d stations out of %d within buffer zone of in_bounds_shp_file' % (len(fin_stns), in_stns_coords_df.shape[0])) fin_stns = np.unique(fin_stns) in_data_df = in_data_df.loc[:, fin_stns] in_stns_coords_df = in_stns_coords_df.loc[fin_stns, :] #========================================================================== # Read the DEM #========================================================================== # if edk_krige_flag: # in_drift_arr_list = [] # _rows_list = [] # _cols_list = [] # # for in_drift_raster in in_drift_rasters_list: # in_drift_ds = gdal.Open(in_drift_raster) # # assert in_drift_ds, 'GDAL cannot open %s' % in_drift_raster # # drift_rows = in_drift_ds.RasterYSize # drift_cols = in_drift_ds.RasterXSize # # drift_geotransform = in_drift_ds.GetGeoTransform() # # _drift_x_min = drift_geotransform[0] # _drift_y_max = drift_geotransform[3] # # drift_band = in_drift_ds.GetRasterBand(1) # drift_ndv = drift_band.GetNoDataValue() # # cell_width = drift_geotransform[1] # cell_height = abs(drift_geotransform[5]) # # _drift_x_max = _drift_x_min + (drift_cols * cell_width) # _drift_y_min = _drift_y_max - (drift_rows * cell_height) # # _arr = in_drift_ds.ReadAsArray() # # in_drift_arr_list.append(_arr) # _rows_list.append(_arr.shape[0]) # _cols_list.append(_arr.shape[1]) # # assert all(_ == _rows_list[0] for _ in _rows_list), ( # 'Drift raster have unequal number of rows!') # # assert all(_ == _cols_list[0] for _ in _cols_list), ( # 'Drift raster have unequal number of columns!') #========================================================================== # Read the bounding shapefile #========================================================================== # sf = shp.Reader(in_bounds_shp_file) # polys_list = [i.__geo_interface__ for i in sf.iterShapes()] ((fin_x_min, fin_x_max, fin_y_min, fin_y_max), cell_width) = get_aligned_shp_bds_and_cell_size(in_bounds_shp_file, align_ras_file) cell_height = cell_width fin_x_min -= 2 * cell_width fin_x_max += 2 * cell_width fin_y_min -= 2 * cell_height fin_y_max += 2 * cell_height # if edk_krige_flag: # assert fin_x_min > _drift_x_min # assert fin_x_max < _drift_x_max # assert fin_y_min > _drift_y_min # assert fin_y_max < _drift_y_max # # min_col = int(max(0, (fin_x_min - _drift_x_min) / cell_width)) # max_col = int(ceil((fin_x_max - _drift_x_min) / cell_width)) # # min_row = int(max(0, (_drift_y_max - fin_y_max) / cell_height)) # max_row = int(ceil((_drift_y_max - fin_y_min) / cell_height)) # # else: min_col = 0 max_col = int(ceil((fin_x_max - fin_x_min) / cell_width)) min_row = 0 max_row = int(ceil((fin_y_max - fin_y_min) / cell_height)) #========================================================================== # Calculate coordinates at which to krige #========================================================================== assert 0 <= min_col <= max_col, (min_col, max_col) assert 0 <= min_row <= max_row, (min_row, max_row) strt_x_coord = fin_x_min + (0.5 * cell_width) end_x_coord = strt_x_coord + ((max_col - min_col) * cell_width) strt_y_coord = fin_y_max - (0.5 * cell_height) end_y_coord = strt_y_coord - ((max_row - min_row) * cell_height) krige_x_coords = np.linspace(strt_x_coord, end_x_coord, (max_col - min_col + 1)) krige_y_coords = np.linspace(strt_y_coord, end_y_coord, (max_row - min_row + 1)) krige_x_coords_mesh, krige_y_coords_mesh = np.meshgrid( krige_x_coords, krige_y_coords) krige_coords_orig_shape = krige_x_coords_mesh.shape # if plot_figs_flag: # # xy coords for pcolormesh # pcolmesh_x_coords = np.linspace( # fin_x_min, fin_x_max, (max_col - min_col + 1)) # # pcolmesh_y_coords = np.linspace( # fin_y_max, fin_y_min, (max_row - min_row + 1)) # # krige_x_coords_plot_mesh, krige_y_coords_plot_mesh = ( # np.meshgrid(pcolmesh_x_coords, pcolmesh_y_coords)) # # else: # krige_x_coords_plot_mesh, krige_y_coords_plot_mesh = None, None krige_x_coords_mesh = krige_x_coords_mesh.ravel() krige_y_coords_mesh = krige_y_coords_mesh.ravel() # print('\n\n') # print('#' * 10) # # _beg_t = timeit.default_timer() # # print(krige_x_coords_mesh.shape[0], # 'cells to interpolate per step before intersection!') # fin_cntn_idxs = np.ones(krige_x_coords_mesh.shape[0], dtype=bool) # fin_cntn_idxs = np.zeros(krige_x_coords_mesh.shape[0], dtype=bool) # ogr_pts = np.vectorize(cnvt_to_pt)(krige_x_coords_mesh, krige_y_coords_mesh) # # for poly in feat_sec_buffs_list: # curr_cntn_idxs = np.vectorize(chk_cntmt)(ogr_pts, poly) # fin_cntn_idxs = fin_cntn_idxs | curr_cntn_idxs # # print(fin_cntn_idxs.sum(), # 'cells to interpolate per step after intersection!') # # _end_t = timeit.default_timer() # _tot_t = _end_t - _beg_t # # print(f'Took {_tot_t:0.4f} seconds!') # print('#' * 10) # # krige_x_coords_mesh = krige_x_coords_mesh[fin_cntn_idxs] # krige_y_coords_mesh = krige_y_coords_mesh[fin_cntn_idxs] # if edk_krige_flag: # drift_vals_list = [] # # krige_cols = np.arange(min_col, max_col + 1, dtype=int) # krige_rows = np.arange(min_row, max_row + 1, dtype=int) # # assert krige_x_coords.shape[0] == krige_cols.shape[0] # assert krige_y_coords.shape[0] == krige_rows.shape[0] # # (krige_drift_cols_mesh, # krige_drift_rows_mesh) = np.meshgrid(krige_cols, krige_rows) # # krige_drift_cols_mesh = krige_drift_cols_mesh.ravel() # krige_drift_rows_mesh = krige_drift_rows_mesh.ravel() # # krige_drift_cols_mesh = krige_drift_cols_mesh[fin_cntn_idxs] # krige_drift_rows_mesh = krige_drift_rows_mesh[fin_cntn_idxs] # # for _drift_arr in in_drift_arr_list: # _drift_vals = _drift_arr[ # krige_drift_rows_mesh, krige_drift_cols_mesh] # # drift_vals_list.append(_drift_vals) # # # drift_vals_arr = np.array(drift_vals_list, dtype=float) # # drift_df_cols = list(range(len(in_drift_rasters_list))) # in_stns_drift_df = pd.DataFrame( # index=in_stns_coords_df.index, # columns=drift_df_cols, # dtype=float) # # for stn in in_stns_drift_df.index: # stn_x = in_stns_coords_df.loc[stn, x_coords_lab] # stn_y = in_stns_coords_df.loc[stn, y_coords_lab] # # stn_col = int((stn_x - _drift_x_min) / cell_width) # stn_row = int((_drift_y_max - stn_y) / cell_height) # # for col, _arr in zip(drift_df_cols, in_drift_arr_list): # try: # _ = _arr[stn_row, stn_col] # if not np.isclose(drift_ndv, _): # in_stns_drift_df.loc[stn, col] = _ # # except IndexError: # pass # # in_stns_drift_df.dropna(inplace=True) #========================================================================== # Open NC #========================================================================== out_nc = nc.Dataset(os.path.join(out_dir, out_krig_net_cdf_file), mode=str(nc_mode)) if nc_mode == 'w': out_nc.set_auto_mask(False) out_nc.createDimension(x_coords_lab, krige_x_coords.shape[0]) out_nc.createDimension(y_coords_lab, krige_y_coords.shape[0]) out_nc.createDimension(time_dim_lab, in_data_df.shape[0]) x_coords_nc = out_nc.createVariable(x_coords_lab, 'd', dimensions=x_coords_lab) x_coords_nc[:] = krige_x_coords y_coords_nc = out_nc.createVariable(y_coords_lab, 'd', dimensions=y_coords_lab) y_coords_nc[:] = krige_y_coords time_nc = out_nc.createVariable(time_dim_lab, 'i8', dimensions=time_dim_lab) time_nc[:] = np.arange(in_data_df.shape[0]) else: raise RuntimeError('Not configured for this option!') time_nc = out_nc.variables[time_dim_lab] krige_y_coords = y_coords_nc[:] krige_x_coords = x_coords_nc[:] #========================================================================== # MP stuff #========================================================================== mp_cond = False if ((n_cpus > 1) and (in_data_df.shape[0] > (n_cpus + 1))): idxs = pd.np.linspace(0, in_data_df.shape[0], (n_cpus) + 1, endpoint=True, dtype=int) idxs = np.unique(idxs) print('MP idxs:', idxs) if idxs.shape[0] == 1: idxs = np.concatenate((np.array([0]), idxs)) mp_cond = True else: idxs = [0, in_data_df.shape[0]] #========================================================================== # Krige #========================================================================== if ord_krige_flag: print('\n\n') print('#' * 10) _beg_t = timeit.default_timer() print('Ordinary Kriging...') if 'OK' not in out_nc.variables: ok_nc = out_nc.createVariable('OK', 'd', dimensions=(time_dim_lab, y_coords_lab, x_coords_lab), fill_value=False) else: ok_nc = out_nc.variables['OK'] ok_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]], in_stns_coords_df, in_vgs_df.loc[ft_type][0], krige_x_coords_mesh, krige_y_coords_mesh, krige_coords_orig_shape, (idxs[i], idxs[i + 1]), fin_cntn_idxs) for i in range(n_cpus)) if mp_cond: ok_krige_flds = np.full( (in_data_df.shape[0], krige_coords_orig_shape[0], krige_coords_orig_shape[1]), np.nan, dtype=np.float32) mp_ress = [] try: mp_pool = ProcessPool(n_cpus) mp_pool.restart(True) mp_ress = list(mp_pool.uimap(ordinary_kriging, ok_vars_gen)) mp_pool.clear() except Exception as msg: mp_pool.close() mp_pool.join() print('Error in ordinary_kriging:', msg) for mp_res in mp_ress: if (len(mp_res) != 3) and (not isinstance(list)): print('\n', mp_res, '\n') continue [strt_index, end_index, sub_ok_krige_flds] = mp_res ok_krige_flds[strt_index:end_index] = sub_ok_krige_flds # free memory mp_res[2], sub_ok_krige_flds = None, None ok_nc[:] = ok_krige_flds else: [strt_index, end_index, ok_krige_flds] = ordinary_kriging(next(ok_vars_gen)) ok_nc[:] = ok_krige_flds ok_nc.units = var_units ok_nc.standard_name = var_name + ' (ordinary kriging)' ok_krige_flds = None _end_t = timeit.default_timer() _tot_t = _end_t - _beg_t print(f'Took {_tot_t:0.4f} seconds!') print('#' * 10) # if sim_krige_flag: # print('\n\n') # print('#' * 10) # # _beg_t = timeit.default_timer() # # print('Simple Kriging...') # if 'SK' not in out_nc.variables: # sk_nc = out_nc.createVariable( # 'SK', # 'd', # dimensions=(time_dim_lab, y_coords_lab, x_coords_lab), # fill_value=False) # # else: # sk_nc = out_nc.variables['SK'] # # sk_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]], # in_stns_coords_df, # in_vgs_df.iloc[idxs[i]:idxs[i + 1]], # min_ppt_thresh, # var_name, # krige_x_coords_mesh, # krige_y_coords_mesh, # krige_coords_orig_shape, # min_var_val, # max_var_val, # (idxs[i], idxs[i + 1]), # plot_figs_flag, # krige_x_coords_plot_mesh, # krige_y_coords_plot_mesh, # var_units, # polys_list, # out_figs_dir, # fin_cntn_idxs) for i in range(n_cpus)) # # if mp_cond: # sk_krige_flds = np.full( # (in_data_df.shape[0], # krige_coords_orig_shape[0], # krige_coords_orig_shape[1]), # np.nan, # dtype=np.float32) # # mp_ress = [] # # try: # mp_pool = ProcessPool(n_cpus) # mp_pool.restart(True) # # mp_ress = list(mp_pool.uimap(simple_kriging, sk_vars_gen)) # # mp_pool.clear() # # except Exception as msg: # mp_pool.close() # mp_pool.join() # print('Error in simple_kriging:', msg) # # for mp_res in mp_ress: # if (len(mp_res) != 3) and (not isinstance(list)): # print('\n', mp_res, '\n') # continue # # [strt_index, end_index, sub_sk_krige_flds] = mp_res # sk_krige_flds[strt_index:end_index] = sub_sk_krige_flds # # # free memory # mp_res[2], sub_sk_krige_flds = None, None # # sk_nc[:] = sk_krige_flds # # else: # [strt_index, # end_index, # sk_krige_flds] = simple_kriging(next(sk_vars_gen)) # # sk_nc[:] = sk_krige_flds # # sk_nc.units = var_units # sk_nc.standard_name = var_name + ' (simple kriging)' # # sk_krige_flds = None # # _end_t = timeit.default_timer() # _tot_t = _end_t - _beg_t # # print(f'Took {_tot_t:0.4f} seconds!') # print('#' * 10) # # if edk_krige_flag: # print('\n\n') # print('#' * 10) # # _beg_t = timeit.default_timer() # # print('External Drift Kriging...') # if 'EDK' not in out_nc.variables: # edk_nc = out_nc.createVariable( # 'EDK', # 'd', # dimensions=(time_dim_lab, y_coords_lab, x_coords_lab), # fill_value=False) # # else: # edk_nc = out_nc.variables['EDK'] # # edk_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]], # in_stns_drift_df, # in_stns_coords_df, # in_vgs_df.iloc[idxs[i]:idxs[i + 1]], # min_ppt_thresh, # var_name, # krige_x_coords_mesh, # krige_y_coords_mesh, # drift_vals_arr, # krige_coords_orig_shape, # drift_ndv, # min_var_val, # max_var_val, # (idxs[i], idxs[i + 1]), # plot_figs_flag, # krige_x_coords_plot_mesh, # krige_y_coords_plot_mesh, # var_units, # polys_list, # out_figs_dir, # fin_cntn_idxs) for i in range(n_cpus)) # # if mp_cond: # edk_krige_flds = np.full( # (in_data_df.shape[0], # krige_coords_orig_shape[0], # krige_coords_orig_shape[1]), # np.nan, # dtype=np.float32) # # mp_ress = [] # # try: # mp_pool = ProcessPool(n_cpus) # mp_pool.restart(True) # # mp_ress = list(mp_pool.uimap( # external_drift_kriging, edk_vars_gen)) # # mp_pool.clear() # # except Exception as msg: # mp_pool.close() # mp_pool.join() # print('Error in external_drift_kriging:', msg) # # for mp_res in mp_ress: # if (len(mp_res) != 3) and (not isinstance(list)): # print('\n', mp_res, '\n') # continue # # [strt_index, end_index, sub_edk_krige_flds] = mp_res # edk_krige_flds[strt_index:end_index] = sub_edk_krige_flds # # print('sub_min:', np.nanmin(sub_edk_krige_flds)) # print('sub_max:', np.nanmax(sub_edk_krige_flds)) # # # free memory # mp_res[2], sub_edk_krige_flds = None, None # # else: # [strt_index, # end_index, # edk_krige_flds] = external_drift_kriging(next(edk_vars_gen)) # # edk_nc[:] = edk_krige_flds # # edk_nc.units = var_units # edk_nc.standard_name = var_name + ' (external drift kriging)' # # edk_krige_flds = None # # _end_t = timeit.default_timer() # _tot_t = _end_t - _beg_t # # print(f'Took {_tot_t:0.4f} seconds!') # print('#' * 10) # # #========================================================================== # # IDW # #========================================================================== # if idw_flag: # print('\n\n') # print('#' * 10) # # _beg_t = timeit.default_timer() # # print('Inverse Distance Weighting...') # if 'IDW' not in out_nc.variables: # idw_nc = out_nc.createVariable( # 'IDW', # 'd', # dimensions=(time_dim_lab, y_coords_lab, x_coords_lab), # fill_value=False) # # else: # idw_nc = out_nc.variables['IDW'] # # idw_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]], # in_stns_coords_df, # min_ppt_thresh, # idw_exp, # var_name, # krige_x_coords_mesh, # krige_y_coords_mesh, # krige_coords_orig_shape, # min_var_val, # max_var_val, # (idxs[i], idxs[i + 1]), # plot_figs_flag, # krige_x_coords_plot_mesh, # krige_y_coords_plot_mesh, # var_units, # polys_list, # out_figs_dir, # fin_cntn_idxs) for i in range(n_cpus)) # # if mp_cond: # idw_flds = np.full( # (in_data_df.shape[0], # krige_coords_orig_shape[0], # krige_coords_orig_shape[1]), # np.nan, # dtype=np.float32) # # mp_ress = [] # try: # mp_pool = ProcessPool(n_cpus) # mp_pool.restart(True) # # mp_ress = list(mp_pool.uimap( # inverse_distance_wtng, idw_vars_gen)) # # mp_pool.clear() # # except Exception as msg: # mp_pool.close() # mp_pool.join() # print('Error in inverse_distance_wtng:', msg) # # for mp_res in mp_ress: # if (len(mp_res) != 3) and (not isinstance(list)): # print('\n', mp_res, '\n') # continue # # [strt_index, end_index, sub_idw_flds] = mp_res # idw_flds[strt_index:end_index] = sub_idw_flds # # # free memory # mp_res[2], sub_idw_flds = None, None # # else: # [strt_index, # end_index, # idw_flds] = inverse_distance_wtng(next(idw_vars_gen)) # # idw_nc[:] = idw_flds # # idw_nc.units = var_units # idw_nc.standard_name = ( # var_name + ' (IDW (exp=%0.3f))' % float(idw_exp)) # # idw_flds = None # # _end_t = timeit.default_timer() # _tot_t = _end_t - _beg_t # # print(f'Took {_tot_t:0.4f} seconds!') # print('#' * 10) out_nc.Author = 'Faizan IWS Uni-Stuttgart' out_nc.Source = out_nc.filepath() out_nc.close() return
def run(self, data): pool = ProcessPool(nodes=self.processes) result = pool.map(self.operations, data) # with mp.Pool(self.processes) as pool: # result = pool.map(self.operations, data) return result
# Say x-dimension corresponds to alpha, y-dimension corresponds to scatter XX, YY = np.meshgrid(alphas, scatters) ndim1, ndim2 = XX.shape # Calculate the stochastic covariance matrix at these values Niter = 40 Ntot = XX.size means = np.zeros(shape=(ndim1, ndim2, p.nbins)) covmats = np.zeros(shape=(ndim1, ndim2, p.nbins, p.nbins)) k = 1 extime = list() filename = "./Pickles/theta_{}_.p".format(logSMlim) pool = ProcessPool(ncore) for i in range(ndim1): for j in range(ndim2): start = time() alpha, scatter = XX[i, j], YY[i, j] p.dump_pickle([alpha, scatter], filename) def catfunc(i): alpha, scatter = p.load_pickle(filename) return model.abundance_match(alpha, scatter, 1)[0] catalogs = pool.map(catfunc, np.arange(Niter)) os.system("rm " + filename) print("Generated catalogs") #catalogs = model.abundance_match(alpha, scatter, Niter)
def samples(self, param1, emaxins, model): """ Returns a sample M of utility values """ #number of choices J = 2 * 2 * 3 #updating sample with new betas and emax simdata_ins = simdata.SimData( self.N, param1, emaxins, self.x_w, self.x_m, self.x_k, self.x_wmk, self.passign, self.nkids0, self.married0, self.agech0_a, self.agech0_b, self.d_childa, self.d_childb, self.hours_p, self.hours_f, self.wr, self.cs, self.ws, model) #save here util_list = [] income_matrix = np.zeros((self.N, self.nperiods, self.M)) consumption_matrix = np.zeros((self.N, self.nperiods, self.M)) iscost_matrix = np.zeros((self.N, self.nperiods, self.M)) cscost_matrix = np.zeros((self.N, self.nperiods, self.M)) childcare_a_matrix = np.zeros((self.N, self.nperiods, self.M)) childcare_b_matrix = np.zeros((self.N, self.nperiods, self.M)) utils_periodt = np.zeros((self.N, J, self.nperiods, self.M)) utils_c_periodt = np.zeros((self.N, J, self.nperiods, self.M)) theta_matrix_a = np.zeros((self.N, self.nperiods, self.M)) theta_matrix_b = np.zeros((self.N, self.nperiods, self.M)) wage_matrix = np.zeros((self.N, self.nperiods, self.M)) spouse_income_matrix = np.zeros((self.N, self.nperiods, self.M)) spouse_employment_matrix = np.zeros((self.N, self.nperiods, self.M)) hours_matrix = np.zeros((self.N, self.nperiods, self.M)) ssrs_t2_matrix_a = np.zeros((self.N, self.M)) ssrs_t2_matrix_b = np.zeros((self.N, self.M)) ssrs_t5_matrix_a = np.zeros((self.N, self.M)) ssrs_t5_matrix_b = np.zeros((self.N, self.M)) #Computing samples (in parallel) def sample_gen(j): np.random.seed(j + 100) return simdata_ins.fake_data(self.nperiods) pool = ProcessPool(nodes=10) dics = pool.map(sample_gen, range(self.M)) pool.close() pool.join() pool.clear() # dics = [] # for j in range(self.M): # np.random.seed(j+100) # dics.append(simdata_ins.fake_data(self.nperiods)) #Saving results for j in range(0, self.M): income_matrix[:, :, j] = dics[j]['Income'] consumption_matrix[:, :, j] = dics[j]['Consumption'] iscost_matrix[:, :, j] = dics[j]['nh_matrix'] cscost_matrix[:, :, j] = dics[j]['cs_cost_matrix'] childcare_a_matrix[:, :, j] = dics[j]['Childcare_a'] childcare_b_matrix[:, :, j] = dics[j]['Childcare_b'] theta_matrix_a[:, :, j] = dics[j]['Theta'][0] theta_matrix_b[:, :, j] = dics[j]['Theta'][1] ssrs_t2_matrix_a[:, j] = dics[j]['SSRS_t2'][0] ssrs_t2_matrix_b[:, j] = dics[j]['SSRS_t2'][1] ssrs_t5_matrix_a[:, j] = dics[j]['SSRS_t5'][0] ssrs_t5_matrix_b[:, j] = dics[j]['SSRS_t5'][1] wage_matrix[:, :, j] = dics[j]['Wage'] spouse_income_matrix[:, :, j] = dics[j]['Spouse_income'] spouse_employment_matrix[:, :, j] = dics[j]['Spouse_employment_matrix'] hours_matrix[:, :, j] = dics[j]['Hours'] for periodt in range(0, self.nperiods): utils_periodt[:, :, periodt, j] = dics[j]['Uti_values_dic'][periodt] utils_c_periodt[:, :, periodt, j] = dics[j]['Uti_values_c_dic'][periodt] return { 'utils_periodt': utils_periodt, 'utils_c_periodt': utils_c_periodt, 'income_matrix': income_matrix, 'theta_matrix_a': theta_matrix_a, 'theta_matrix_b': theta_matrix_b, 'ssrs_t2_matrix_a': ssrs_t2_matrix_a, 'ssrs_t2_matrix_b': ssrs_t2_matrix_b, 'ssrs_t5_matrix_a': ssrs_t5_matrix_a, 'ssrs_t5_matrix_b': ssrs_t5_matrix_b, 'childcare_a_matrix': childcare_a_matrix, 'childcare_b_matrix': childcare_b_matrix, 'wage_matrix': wage_matrix, 'consumption_matrix': consumption_matrix, 'spouse_income_matrix': spouse_income_matrix, 'spouse_employment_matrix': spouse_employment_matrix, 'hours_matrix': hours_matrix, 'cscost_matrix': cscost_matrix, 'iscost_matrix': iscost_matrix }
def recursive(self): """ Recursively computes a series of interpolating instances Generates a dictionary with the emax instances There is a sequence of Emax for each child age (1-10) """ def emax_gen(j): for t in range(j, 0, -1): if t == j: #last period emax_bigt_ins = self.emax_bigt(j) emax_dic = {'emax' + str(t): emax_bigt_ins[0]} elif t == j - 1: #at T-1 emax_t1_ins = self.emax_t(t, j, emax_bigt_ins[0]) emax_dic['emax' + str(t)] = emax_t1_ins[0] else: emax_t1_ins = self.emax_t(t, j, emax_t1_ins[0]) emax_dic['emax' + str(t)] = emax_t1_ins[0] return [emax_dic] pool = ProcessPool(nodes=10) #7: old child (11 years old) solves for 7 emax #19: young child (0 years old) solves for 18 emax list_emax = pool.map(emax_gen, range(8, 18)) pool.close() pool.join() pool.clear() """ list_emax = [] for j in range(7,19): print ('Im in emax j ', j) for t in range(j,0,-1): print ('In period t ', t) if t==j:#last period emax_bigt_ins=self.emax_bigt(j) emax_dic={'emax'+str(t): emax_bigt_ins[0]} #emax_values={'emax'+str(t): emax_bigt_ins[1]} elif t==j-1: #at T-1 emax_t1_ins=self.emax_t(t,j,emax_bigt_ins[0]) emax_dic['emax'+str(t)]=emax_t1_ins[0] #emax_values['emax'+str(t)]=emax_t1_ins[1] else: emax_t1_ins=self.emax_t(t,j,emax_t1_ins[0]) emax_dic['emax'+str(t)]=emax_t1_ins[0] #emax_values['emax'+str(t)]=emax_t1_ins[1] list_emax.append([emax_dic]) """ return list_emax
def fit(self, X, y, trials=5, indicators=indicators, ranges=ranges, tune_series=tune_series, tune_params=tune_params, spearman=True, weights=None, early_stop=50): self.fitted = [] X.columns = X.columns.str.lower() # columns must be lower case pool = ProcessPool(nodes=self.n_jobs) for low, high in ranges: if low <= 1: raise ValueError("Range low must be > 1") if high >= len(X): raise ValueError( f"Range high:{high} must be > length of X:{len(X)}") for ind in indicators: idx = 0 if ":" in ind: idx = int(ind.split(":")[1]) ind = ind.split(":")[0] fn = f"{ind}(" if ind[0:3] == "tta": usage = eval(f"{ind}.__doc__").split(")")[0].split("(")[1] params = re.sub('[^0-9a-zA-Z_\s]', '', usage).split() else: sig = inspect.signature(eval(ind)) params = sig.parameters.values() for param in params: param = re.split(':|=', str(param))[0].strip() if param == "open_": param = "open" if param == "real": fn += f"X.close, " elif param == "ohlc": fn += f"X, " elif param == "ohlcv": fn += f"X, " elif param in tune_series: fn += f"X.{param}, " elif param in tune_params: fn += f"{param}=trial.suggest_int('{param}', {low}, {high}), " fn += ")" self.fitted.append( pool.apipe( Optimize(function=fn, n_trials=trials, spearman=spearman).fit, X, y, idx=idx, verbose=self.verbose, weights=weights, early_stop=early_stop, )) self.fitted = [fit.get() for fit in self.fitted] # Get results of jobs
def improve_circuit(circuit, subcircuit_size=5, connected=True): print('Trying to improve a circuit of size', len(circuit.gates), flush=True) circuit_graph = circuit.construct_graph() # total, current, time = correct_subcircuit_count(circuit, subcircuit_size, connected=connected), 0, 0 # print(f'\nEnumerating subcircuits of size {subcircuit_size} (total={total})...') def worker(graph): if connected and not nx.is_weakly_connected(graph): return None subcircuit = tuple(graph.nodes) # start = timer() subcircuit_inputs, subcircuit_outputs = get_inputs_and_outputs( circuit, circuit_graph, subcircuit) if len(subcircuit_outputs) == subcircuit_size: return None # current += 1 # print(f'\n{subcircuit_size}: {current}/{total} ({100 * current // total}%) ', end='', flush=True) random.shuffle(subcircuit_inputs) sub_in_tt, sub_out_tt = make_truth_tables(circuit, subcircuit_inputs, subcircuit_outputs) improved_circuit = find_circuit(subcircuit_inputs, subcircuit_size - 1, sub_in_tt, sub_out_tt) if isinstance(improved_circuit, Circuit): replaced_graph = circuit.replace_subgraph(improved_circuit, subcircuit, subcircuit_outputs) if nx.is_directed_acyclic_graph(replaced_graph): print('\nCircuit Improved!\n', end='', flush=True) improved_full_circuit = Circuit.make_circuit( replaced_graph, circuit.input_labels, make_improved_circuit_outputs(circuit.outputs, subcircuit_outputs, improved_circuit.outputs)) return fix_labels(improved_full_circuit), 1 # stop = timer() # time += stop - start # remaining = time / current * (total - current) # print(f' | curr: {int(stop - start)} sec | rem: {int(remaining)} sec ({round(remaining / 60, 1)} min)', end='', # flush=True) return None all_subgraphs = ( circuit_graph.subgraph(selected_nodes) for selected_nodes in combinations(circuit.gates, subcircuit_size)) all_correct_subgraphs = filter( lambda gr: (not connected) or (connected and nx.is_weakly_connected(gr)), all_subgraphs) total = correct_subcircuit_count(circuit, subcircuit_size, connected=True) print("start multiprocessing") with ProcessPool() as pool: res_list = list( tqdm.tqdm(pool.imap(worker, all_correct_subgraphs), total=total)) # res_list = [worker(gr) for gr in all_correct_subgraphs] res = next((item for item in res_list if item is not None), None) if res is not None: return res return circuit, 0
import glob import imageio import scipy.misc import pickle import math from pathos.multiprocessing import ProcessPool import h5py flags = tf.app.flags flags.DEFINE_integer("epoch", 200, "Epoch to train [200]") flags.DEFINE_float("learning_rate", 0.0005, "Learning rate [0.0005]") flags.DEFINE_float("beta1", 0.9, "Momentum term of adam [0.5]") flags.DEFINE_float("beta2", 0.999, "Momentum term of adam [0.5]") flags.DEFINE_integer("batch_size", 64, "batch size used in training [64]") flags.DEFINE_integer("param_size", 15, "batch size used in training [64]") pool = ProcessPool() FLAGS = flags.FLAGS def load_real_images(): f = h5py.File("../RenderGAN-tensorflow/data/beestags/real_tags.hdf5", "r") raw = f["rois"] return raw def load(): """load data and labels in parallel""" load_helper_im = lambda i: np.reshape( np.array(
def get_square_blender_dir(self, out_dir, n_total_frames, use_key_frames=True): if not isdir(out_dir): os.makedirs(out_dir) n_key_frames = 4 with open(join(out_dir, 'last_frame.txt'), 'w') as f: f.write('{} {}\n'.format(n_key_frames, n_total_frames)) if use_key_frames: key_frame_vt = self.get_key_frame_vt(range(n_key_frames)) else: camera_pos_list = [] total_length = (2 + 2 * np.sqrt(2)) cam_path = join(self.sample_dir, 'camera.txt') cameras = np.loadtxt(cam_path) for i in range(n_total_frames): t = i / n_total_frames * total_length if t < 1: seg_t = t camera_pos = (1 - seg_t) * cameras[0] + seg_t * cameras[1] w = np.array([1 - seg_t, seg_t, 0, 0]) elif t < 1 + np.sqrt(2): seg_t = (t - 1) / np.sqrt(2) camera_pos = (1 - seg_t) * cameras[1] + seg_t * cameras[2] w = np.array([ seg_t * (1 - seg_t), (1 - seg_t) * (1 - seg_t), seg_t * seg_t, (1 - seg_t) * seg_t ]) elif t < 2 + np.sqrt(2): seg_t = t - (1 + np.sqrt(2)) camera_pos = (1 - seg_t) * cameras[2] + seg_t * cameras[3] w = np.array([0, 0, 1 - seg_t, seg_t]) else: seg_t = (t - (2 + np.sqrt(2))) / np.sqrt(2) camera_pos = (1 - seg_t) * cameras[3] + seg_t * cameras[0] w = np.array([ seg_t * seg_t, (1 - seg_t) * seg_t, seg_t * (1 - seg_t), (1 - seg_t) * (1 - seg_t) ]) if use_key_frames: vt = sum([key_frame_vt[i] * w[i] for i in range(4)]) out_path = join(out_dir, 'frame_{}.obj'.format(i)) print('write to', out_path) write_obj(Obj(v=self.pd_v, f=self.fcs, vt=vt), out_path) else: camera_pos_list.append(camera_pos) camera_transform = self.blender_pos(camera_pos), np.array( [1, 0, 0, 0]) out_path = join(out_dir, 'cam_{}.txt'.format(i)) # print('write to',out_path) self.write_camera_transform(out_path, camera_transform) if not use_key_frames: n_threads = 30 def f(frame_i): self.get_fill_obj(camera_pos_list[frame_i], postfix=str(frame_i), frame=frame_i, check=False) pool = ProcessPool(nodes=n_threads) pool.map(f, range(n_total_frames))
alpha_12 = np.zeros(boot_n) alpha_13 = np.zeros(boot_n) alpha_14 = np.zeros(boot_n) alpha_15 = np.zeros(boot_n) beta_0 = np.zeros(boot_n) beta_1 = np.zeros(boot_n) beta_2 = np.zeros(boot_n) beta_3 = np.zeros(boot_n) beta_4 = np.zeros(boot_n) gamma_0 = np.zeros(boot_n) gamma_1 = np.zeros(boot_n) gamma_2 = np.zeros(boot_n) start_time = time.time() pool = ProcessPool(nodes=18) dics = pool.map(simulation, range(boot_n)) pool.close() pool.join() pool.clear() time_opt = time.time() - start_time print('Done in') print("--- %s seconds ---" % (time_opt)) #saving results for j in range(boot_n): alpha_00[j] = dics[j][0] alpha_01[j] = dics[j][1]
def graph_sampling(graph: FSN, strategy: Optional[str] = "MetaDiff", n_jobs: Optional[int] = 4, use_cache: Optional[bool] = True, **kwargs) \ -> List[List[Union[str, int]]]: """ Sampling the sequences of nodes from FSN w.r.t. chosen strategy Parameters ---------- graph : FSN object Graph to be processed strategy : str, default is 'MetaDiff' Walking strategy to be used n_jobs : int, default is 4 Number of workers to be created in parallel pool use_cache : bool, default is True To use the previously cached files Returns ------- Sampled sequences of BP nodes """ set_new_config(**kwargs) local_logger = logging.getLogger(f"{__name__}") if use_cache and os.path.isfile(CONFIG.WORK_FOLDER[0] + "sampled_sequences_cached.pkl"): local_logger.info("Loading sequences from cache... wait...") try: with open(CONFIG.WORK_FOLDER[0] + "sampled_sequences_cached.pkl", "rb") as file: res = pickle.load(file) local_logger.info(f"Total number of raw sampled sequences is {len(res)}") local_logger.info(f"Average length of sequences is {sum(map(len, res)) / float(len(res))}") return res except FileNotFoundError: local_logger.info("File not found... Recalculate \n") pass except Exception as e: local_logger.error(f"Unexpected error: {e}") local_logger.info("Sampling sequences... wait...") max_processes = max(n_jobs, os.cpu_count()) global walk if strategy in strategy_to_class.keys(): walk = strategy_to_class[strategy](G=graph, walk_length=CONFIG.WALKS_LENGTH, direction=CONFIG.DIRECTION, pressure=CONFIG.PRESSURE, allow_back=CONFIG.ALLOW_BACK) else: raise KeyError( f"The given strategy {strategy} is unknown. The following ones are implemented: {strategy_to_class.keys()}") sampling_pool = ProcessPool(nodes=max_processes) local_logger.info("Created a Pool with " + str(max_processes) + " processes ") # required to restart pool to update CONFIG inside the parallel part sampling_pool.terminate() sampling_pool.restart() BPs = graph.get_BPs() n_BPs = len(BPs) sampled = list() try: with tqdm(total=n_BPs) as pbar: for i, res in enumerate(sampling_pool.uimap(wrappedWalk, BPs)): sampled.append(res) pbar.update() except KeyboardInterrupt: print('Got ^C while pool mapping, terminating the pool') sampling_pool.terminate() res = list(itertools.chain(*sampled)) sampling_pool.terminate() sampling_pool.restart() local_logger.info("Cashing sampled sequences!") if use_cache: with open(CONFIG.WORK_FOLDER[0] + "sampled_sequences_cached.pkl", "wb") as file: pickle.dump(res, file) local_logger.info(f"Total number of raw sampled sequences is {len(res)}") local_logger.info(f"Average length of sequences is {sum(map(len, res)) / float(len(res))}") return res