def setup_pool(self, n=None): """Setup process pool.""" if n is None: n = self['config', 'num_workers'] if n == 1: self.pool = None else: if self['config', 'backend'].lower() == 'torch': logging.info('Using torch multiprocessing') from torch.multiprocessing import Pool self.pool = Pool(n) else: logging.info('Using usual multiprocessing') from multiprocessing import Pool self.pool = Pool(n)
def batch_clique_graph(batch, classes_dataframe, processes=None): """Creates a graph Data object from an image batch, to use with a semi-supervised graph learning model. The created graph connects all batch elements with each other (clique graph) and graph vertex weights correspond to word vector distances of class labels. Assumes data and labels are the first two parameters of each sample. Args: batch: data to be sent to device. classes_dataframe: dataframe containing class names and their word vectors. processes: number of parallel workers to be used for creating batch graphs. If `None`, then `os.cpu_count()` will be used. (Default value = None) Returns: the batch clique graph. """ x, y, *_ = batch # unpack extra parameters into `_` edge_index = torch.stack( [ # create the binary adjacency matrix for the clique graph torch.arange(x.shape[0]).repeat_interleave( x.shape[0]), # each index repeated num_edges times torch.arange(x.shape[0]).repeat(x.shape[0]), ]) # the index range repeated num_edges times with Pool(processes=processes ) as pool: # create edge weights from the word vector distances edge_classes = torch.stack( [y.repeat_interleave(y.shape[0]), y.repeat(y.shape[0])]).t().contiguous() edge_attr = torch.stack( pool.starmap( wordvector_distance, zip(edge_classes, repeat(torch.tensor(classes_dataframe["distances"]))))) return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
def optimize_parallel_cpu( self, train_function, nb_trials, nb_workers=4, ): """ Runs optimization across n cpus :param train_function: :param nb_trials: :param nb_workers: :return: """ self.trials = strategies.generate_trials( strategy=self.strategy, flat_params=self.__flatten_params(self.opt_args), nb_trials=nb_trials ) self.trials = [(self.__namespace_from_trial(x), train_function) for x in self.trials] # init a pool with the nb of worker threads we want if self.pool is None: self.pool = Pool(processes=nb_workers) # apply parallelization results = self.pool.map(optimize_parallel_cpu_private, self.trials) return results
def thread_fn(self, data, fn, num_threads): tmp_result = [] thread_args = self.gen_args(data, num_threads) with Pool(processes=num_threads) as pool: for result in pool.imap(fn, thread_args): tmp_result.extend(result) return tmp_result
def setup_featurizer(self, dataset, total_vars, classes, processes=20): self.ds = dataset self.total_vars = total_vars self.classes = classes self.pool = Pool(processes) self.setup_done = True self.specific_setup()
def train_one_round(sites: Sequence[Site], num_epochs: int) -> Dict[str, float]: train = partial(train_site, num_epochs=num_epochs) with Pool(processes=len(sites)) as pool: results = pool.map_async(train, sites) per_site_metrics = results.get() return synchronize_metrics(sites, per_site_metrics)
def policy_iteration(args): set_start_method('spawn') agent = Agent(args) eval_policy(agent, 500) replay_memory = deque(maxlen=args.MAX_MEMORY_SIZE) num_episode_per_thread = args.step_per_iteration // args.NUM_OF_PROCESSES // ( GAMEOVER_ROUND - CUT) outer = tqdm(range(args.total_iterations), desc='Iteration', position=0) for i in outer: if i == 0: num_episode = args.observation_data // args.NUM_OF_PROCESSES // ( GAMEOVER_ROUND - CUT) thread_args = gen_args(agent, num_episode, args.NUM_OF_PROCESSES) else: thread_args = gen_args(agent, num_episode_per_thread, args.NUM_OF_PROCESSES) with Pool(processes=args.NUM_OF_PROCESSES) as pool: for result in pool.imap(thread_thunk, thread_args): replay_memory.extend(result) train_data = list(replay_memory) if i == 0: message = "num of training data: {}".format(len(train_data)) outer.write(message) agent.train(train_data, 2) else: message = "num of training data: {}".format( args.step_per_iteration) outer.write(message) agent.train(train_data[-args.step_per_iteration:]) eval_policy(agent, 500) agent.update_epsilon()
def run_synthetic_confounded_cdm_experiment(): print(f'Running run_synthetic_confounded_cdm_experiment()...') models = [Logit, CDM, MultinomialLogit, MultinomialCDM] learning_rates = [0.01] use_ipws = [True, False] samples = [10000] embedding_dims = [2] seeds = list(range(16)) context_strengths = [1] confounding_strengths = np.linspace(0, 8, 21) params = list( itertools.product(models, learning_rates, use_ipws, samples, embedding_dims, seeds, context_strengths, confounding_strengths)) random.shuffle(params) results = dict() with Pool(N_THREADS) as pool: for args, state_dict, train_loss, test_loss, test_mrr, num_params, mcar_loss, mcar_mrr in tqdm( pool.imap_unordered(synthetic_confounded_cdm_experiment_helper, params), total=len(params)): results[ args] = state_dict, train_loss, test_loss, test_mrr, num_params, mcar_loss, mcar_mrr fname = f'{RESULTS_DIR}/synthetic_counfounded_cdm_results.pt' with open(fname, 'wb') as f: torch.save(results, f)
def __init__(self, path, seqNames, nProcessLoader=50, MAX_SIZE_LOADED=4000000000): """ Args: - path (string): path to the training dataset - seqNames (list): sequences to load - nProcessLoader (int): number of processes to call when loading the data from the disk - MAX_SIZE_LOADED (int): target maximal size of the floating array containing all loaded data. """ self.MAX_SIZE_LOADED = MAX_SIZE_LOADED self.nProcessLoader = nProcessLoader self.dbPath = Path(path) self.seqNames = [self.dbPath / x for _, x in seqNames] self.reload_pool = Pool(nProcessLoader) self.prepare() self.data = [] self.loadNextPack(first=True) self.loadNextPack()
def _report_rouge(self, predictions, references): a_lst = [] predictions = list(predictions) references = list(references) for i, p in enumerate(predictions): a_lst.append((p, references[i])) pool = Pool(24) rouge_scores = {"r1": [], "r2": [], "rl": []} for d in tqdm(pool.imap(_multi_rg, a_lst), total=len(a_lst)): if d is not None: rouge_scores["r1"].append(d[0]) rouge_scores["r2"].append(d[1]) rouge_scores["rl"].append(d[2]) pool.close() pool.join() r1 = np.mean(rouge_scores["r1"]) r2 = np.mean(rouge_scores["r2"]) rl = np.mean(rouge_scores["rl"]) if len(self.args.log_folds) > 0: with open(self.args.log_folds, mode='a') as f: f.write("{:.4f}\t{:.4f}\t{:.4f}".format(r1 / 100, r2 / 100, rl / 100)) f.write('\n') logger.info("Metric\tScore\t95% CI") logger.info("ROUGE-1\t{:.2f}\t({:.2f},{:.2f})".format(r1 * 100, 0, 0)) logger.info("ROUGE-2\t{:.2f}\t({:.2f},{:.2f})".format(r2 * 100, 0, 0)) logger.info("ROUGE-L\t{:.2f}\t({:.2f},{:.2f})".format(rl * 100, 0, 0)) logger.info("Data path: %s" % self.args.bert_data_path) logger.info("Model path: %s" % self.args.model_path) return r1, r2, rl
def load_multi_process(self, load_fn: Callable, path: Sequence) -> List: """ Helper function to load dataset with multiple processes Args: load_fn: function to load a single sample path: a sequence of paths which should be loaded Returns: list: loaded data """ _processes = cpu_count( ) if self._num_workers is None else self._num_workers if self._verbosity: pbar = tqdm(total=len(path), unit='samples', desc="Loading Samples") def update(*a): pbar.update(1) callback = update else: callback = None with Pool(processes=_processes) as pool: jobs = [ load_async(pool, load_fn, p, callback=callback) for p in path ] _data = [j.get() for j in jobs] return _data
def test(n=10000): n_cpus = mp.cpu_count() print("num cpus: ", n_cpus) p = Pool(n_cpus) import time # s1 = time.time() data = [] for i in range(n): inp_val = np.random.random(size=10) vec_val = np.random.random(size=10) data.append((inp_val, vec_val)) # # res = p.map(compute_hvp, data) # e1 = time.time() # print ("Time 1: ", (e1-s1)) s2 = time.time() for i in range(n): inp_val, vec_val = data[i] inp = Variable(torch.FloatTensor([inp_val]), requires_grad=True) v = Variable(torch.FloatTensor([vec_val]), requires_grad=False) z = three_sin(inp) l = F.mse_loss(z, torch.zeros_like(z)) # hvp_rop_lop = Hvp_RopLop(f, inp, v) # print ("hvp: ", hvp_rop_lop.data) # hvp_dbl_bp = Hvp_dbl_bp(l, inp, v) # print ("hvp: ", hvp_dbl_bp.data) # print ("hvp: ", hvp_rop_lop.data, hvp_dbl_bp.data) gnvp_roplop = GNvp_RopLop(l, z, inp, v) e2 = time.time() print("Time 2: ", (e2 - s2))
def async_build_examples( self, data_type: str, dials: List[Tuple[str, dict]]) -> Tuple[list, list]: """Use multiprocessing to process raw dialogue data. Args: data_type: train, dev or test dials: raw dialogues data Returns: new examples by all processes """ neg_examples = Manager().list() pos_examples = Manager().list() dials4single_process = (len(dials) - 1) // self.config['num_processes'] + 1 print(f'Single process have {dials4single_process} dials ...') pool = Pool(self.config['num_processes']) for i in range(self.config['num_processes']): pool.apply_async(func=self.iter_dials, args=(dials[dials4single_process * i:dials4single_process * (i + 1)], data_type, pos_examples, neg_examples, i)) pool.close() pool.join() pos_examples = list(pos_examples) neg_examples = list(neg_examples) return neg_examples, pos_examples
def _greedy_bayes_multiprocess(self, encoded_df, k=1): """Construct a Bayesian Network (BN) using greedy algorithm.""" dataset = encoded_df.astype(str, copy=False) root_attribute = choice(dataset.columns) V = [root_attribute] rest_attributes = set(dataset.columns) rest_attributes.remove(root_attribute) bayesian_net = [] while rest_attributes: parents_pair_list = [] mutual_info_list = [] num_parents = min(len(V), k) tasks = [(child, V, num_parents, split, dataset) for child, split in product(rest_attributes, range(len(V) - num_parents + 1))] with Pool(processes=PROCESSES) as pool: res_list = pool.map(bayes_worker, tasks) for res in res_list: parents_pair_list += res[0] mutual_info_list += res[1] idx = mutual_info_list.index(max(mutual_info_list)) bayesian_net.append(parents_pair_list[idx]) adding_attribute = parents_pair_list[idx][0] V.append(adding_attribute) rest_attributes.remove(adding_attribute) return bayesian_net
def optimize_trials_parallel_gpu( self, train_function, nb_trials, trials, gpu_ids, nb_workers=4, ): """ Runs optimization across gpus with cuda drivers :param train_function: :param nb_trials: :param gpu_ids: List of strings like: ['0', '1, 3'] :param nb_workers: :return: """ self.trials = trials self.trials = [(x, train_function) for x in self.trials] # build q of gpu ids so we can use them in each process # this is thread safe so each process can pull out a gpu id, run its task and put it back when done if self.pool is None: gpu_q = Queue() for gpu_id in gpu_ids: gpu_q.put(gpu_id) # init a pool with the nb of worker threads we want self.pool = Pool(processes=nb_workers, initializer=init, initargs=(gpu_q,)) # apply parallelization results = self.pool.map(optimize_parallel_gpu_private, self.trials) return results
def optimize_parallel_gpu( self, train_function, gpu_ids, max_nb_trials=None, ): """ Runs optimization across gpus with cuda drivers :param train_function: :param max_nb_trials: :param gpu_ids: List of strings like: ['0', '1, 3'] :return: """ self.trials = strategies.generate_trials( strategy=self.strategy, flat_params=self.__flatten_params(self.opt_args), nb_trials=max_nb_trials, ) self.trials = [(self.__namespace_from_trial(x), train_function) for x in self.trials] # build q of gpu ids so we can use them in each process # this is thread safe so each process can pull out a gpu id, run its task and put it back when done if self.pool is None: gpu_q = Queue() for gpu_id in gpu_ids: gpu_q.put(gpu_id) # init a pool with the nb of worker threads we want nb_workers = len(gpu_ids) self.pool = Pool(processes=nb_workers, initializer=init, initargs=(gpu_q,)) # apply parallelization results = self.pool.map(optimize_parallel_gpu_private, self.trials) return results
def multiprocess(self, *args, **kwargs): foundIter = False if self.reference > -1: foundIter = self.reference length = len(args[self.reference]) else: for i,x in enumerate(args): if isIterable(x): foundIter = i length = len(x) break if self.n_jobs is None: self.n_jobs = length if length > self.count else self.count self.n_jobs = length if self.count > length else self.n_jobs if foundIter is False: raise Parallel_ReferenceError() toCall = [] for i,x in enumerate(args): if foundIter == i: toCall.append(x) continue if isTensor(x): x.share_memory_() toCall.append([x]*length) with Pool(processes = self.n_jobs) as pool: output = pool.starmap(self.f, zip(*toCall) ) return output
def main(data_dir: Path, save_dir: Path, encoder_path: Path, seg_len: int, n_workers: int): device = "cuda" if torch.cuda.is_available() else "cpu" set_start_method("spawn") encoder = torch.jit.load(encoder_path).eval().to(device) meta_data = defaultdict(list) (save_dir / "uttrs").mkdir(exist_ok=True) (save_dir / "embed").mkdir(exist_ok=True) spk_dirs = data_dir.iterdir() wav2mel = Wav2Mel() file2mel = partial(process_file, wav2mel=wav2mel) for spk in tqdm(spk_dirs): wav_files = list(spk.iterdir()) with Pool(n_workers) as p: mels = p.map(file2mel, wav_files) mels = list(filter(lambda x: x is not None, mels)) mels = [mel.to(device) for mel in mels] embed = embed_uttrs(encoder, mels, seg_len) rnd_paths = [f"uttrs/{uuid4().hex}.pt" for _ in range(len(mels))] dummy = [ torch.save(mel.cpu(), save_dir / path) for path, mel in zip(rnd_paths, mels) ] emb_path = f"embed/{spk}.pt" torch.save(embed.cpu(), save_dir / emb_path) meta_data[spk] = {"embed": emb_path, "uttrs": rnd_paths} json.dump(meta_data, (save_dir / "metadata.json").open(mode="w"))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-modelType', default=4, type=int, help='Refer train_utils.py ') parser.add_argument('-numSpkrs', default=7323, type=int, help='Number of output labels for model') parser.add_argument('modelDirectory', help='Directory containing the model checkpoints') parser.add_argument( 'featDir', help='Directory containing features ready for extraction') parser.add_argument('embeddingDir', help='Output directory') args = parser.parse_args() modelFile = max(glob.glob(args.modelDirectory + '/*'), key=os.path.getctime) # Load model definition if args.modelType == 3: net = simpleTDNN(args.numSpkrs, p_dropout=0) else: net = xvecTDNN(args.numSpkrs, p_dropout=0) checkpoint = torch.load(modelFile, map_location=torch.device('cuda')) new_state_dict = OrderedDict() for k, v in checkpoint['model_state_dict'].items(): if k.startswith('module.'): new_state_dict[k[7:]] = v # ugly fix to remove 'module' from key else: new_state_dict[k] = v # load trained weights net.load_state_dict(new_state_dict) net = net.cuda() net.eval() # Parallel Processing try: nSplits = int( sorted(glob.glob(args.featDir + '/split*'), key=getSplitNum)[-1].split('/')[-1].lstrip('split')) except: print('Cannot find %s/splitN directory' % args.featDir) sys.exit(1) if not os.path.isdir(args.embeddingDir): os.makedirs(args.embeddingDir) nProcs = nSplits L = [('%s/split%d/%d/feats.scp' % (args.featDir, nSplits, i), '%s/xvector.%d.ark' % (args.embeddingDir, i), '%s/xvector.%d.scp' % (args.embeddingDir, i), net, 'fc1') for i in range(1, nSplits + 1)] pool2 = Pool(processes=nProcs) result = pool2.starmap(par_core_extractXvectors, L) pool2.terminate() os.system('cat %s/xvector.*.scp > %s/xvector.scp' % (args.embeddingDir, args.embeddingDir))
def test_load_async(self): callback = Mock() with Pool(processes=1) as p: ref = load_async(p, lambda x: x, 0, callback=callback) self.assertEqual(ref.get(), 0) callback.assert_called_once()
def setup_class(self): """Setup the metric class. This will spawn the pool of workers that are used for metric testing and setup_ddp """ self.poolSize = NUM_PROCESSES self.pool = Pool(processes=self.poolSize) self.pool.starmap(setup_ddp, [(rank, self.poolSize) for rank in range(self.poolSize)])
def run_torch_parallel(args, experiment): multiprocessing.set_start_method('spawn') thread_params = [] for i in range(experiment.trials): thread_params.append((args.algorithm, args.env, experiment, i)) with Pool(args.num_processes) as p: p.map(run_thread, thread_params)
def meta_ars(env_name, policy, meta_epochs, meta_seed, n_seeds=4, n_top_seeds=1, n_workers=4, mean_lookback=10, ars_epochs=10, env_config=None, step_size=.02, n_delta=32, n_top=16, exp_noise=0.03): n_children = n_seeds // n_top_seeds np.random.seed(meta_seed) W = torch.nn.utils.parameters_to_vector(policy.parameters()) W = torch.zeros_like(W) torch.nn.utils.vector_to_parameters(W, policy.parameters()) pool = Pool(processes=n_seeds) ars_partial = partial(ars, env_name, ars_epochs, env_config, step_size, n_delta, n_top, exp_noise, n_workers) #root = Node(meta_seed) reward_log = [] top_policies = [] for _ in range(n_top_seeds): top_policies.append(copy.deepcopy(policy)) for epoch in range(meta_epochs): pols_and_seeds = [] for pol in top_policies: for _ in range(n_children): pols_and_seeds.append( (pol, int(np.random.randint(0, 2**32 - 1, 1)))) results = pool.starmap(ars_partial, pols_and_seeds) p_list = [] r_list = [] for result in results: policy, rews = result p_list.append(policy) r = torch.stack(rews[-mean_lookback:]) r_list.append(r.mean()) top_idx = sorted(range(len(r_list)), key=lambda k: r_list[k], reverse=True)[:n_top_seeds] for i in top_idx: top_policies.append(p_list[i]) reward_log.append(max(r_list)) return top_policies, reward_log
def step(self, items_seq, items_ori, items_batch=None, boxes_batch=None): if (items_batch != None) & (boxes_batch != None): self.items_batch = items_batch self.boxes_batch = boxes_batch self.batch_indx = list(range(self.BATCH_SIZE)) self.expected_items_n = [self.ITEMS_SEQ_LN] * BATCH_SIZE self.all_outs = {i: [] for i in range(self.BATCH_SIZE)} self.current_level = 0 self.items_batch_alligned = None self.boxes_batch_alligned = None items_seq_ = torch.LongTensor(items_seq).transpose(1, 0).expand( self.INPUT_SIZE, self.ITEMS_SEQ_LN, self.BATCH_SIZE).transpose(2, 0) items_ori_ = items_ori[torch.arange(self.BATCH_SIZE).expand( self.ITEMS_SEQ_LN, self.BATCH_SIZE).transpose(1, 0), torch.LongTensor(items_seq). expand(self.BATCH_SIZE, self.ITEMS_SEQ_LN)] self.items_batch_alligned = self.items_batch[[ self.base_indx_items, items_seq_, items_ori_ ]] lookup_sm = self.boxes_batch.expand( self.ITEMS_SEQ_LN, self.BATCH_SIZE, self.ITEMS_SEQ_LN, self.INPUT_SIZE).transpose( 1, 0) - self.items_batch_alligned.unsqueeze(2) validities = (lookup_sm >= 0).all(3).any(2).tolist() new_seq = [] for i, j in zip(items_seq, validities): new_seq.append([i[k] for k in range(len(i)) if j[k] == True]) self.batch_indx = [i for i in self.batch_indx if len(new_seq[i]) > 0] items_seq = [i for i in new_seq if len(i) > 0] zp = list( zip(self.batch_indx, self.items_batch[self.batch_indx], self.boxes_batch[self.batch_indx], items_seq, items_ori[self.batch_indx])) p = Pool(10) out = p.map(self.target_func, zp) p.close() p.join() out = [pickle.loads(i) for i in out] out_series = pd.Series(out) _ = out_series.apply(lambda x: self.dict_update(x)) # out = [i for i in out if i[1] < i[2]] self.batch_indx = [i[0] for i in out] self.current_level += 1 items_seq = [i[5] for i in out] all_rewards = [i[-1] * i[-2] for i in out] # filled_items_indx = {i:[i[2] for i in j] for i,j in self.all_outs.items() if len(j) > 0} # filled_items_HUs = {i:[i[7] for i in j if len(i[7]) > 0] for i,j in self.all_outs.items()} # all_rewards = [self.calc_reward(self.items_batch[i],i,filled_items_indx,filled_items_HUs) for i in range(self.BATCH_SIZE)] return all_rewards
def perform_grid_search(self): kwargs = self.generate_grid() with Pool(10) as p: results = list(tqdm(p.imap(self.run_experiment, kwargs), total=len(kwargs))) print("BEST RESULTS") results = sorted(results, key=lambda t: t[0]) for r in results[-20:]: print(r)
def play_n_episodes(self, n): """Plays n episodes in parallel using the fixed policy and returns the data""" with closing(Pool(processes=n)) as pool: results = pool.map(self, range(n)) pool.terminate() states_for_all_episodes = [episode[0] for episode in results] actions_for_all_episodes = [episode[1] for episode in results] rewards_for_all_episodes = [episode[2] for episode in results] return states_for_all_episodes, actions_for_all_episodes, rewards_for_all_episodes
def run_main(): deck_lists = list(map(int,args.decklists.split(","))) if args.decklists is not None else None if deck_lists is None: deck_lists = list(deck_id_2_name.keys()) else: assert all(key in deck_id_2_name for key in deck_lists) if deck_lists == [0, 1, 4, 5, 10, 12]: deck_lists = [0, 1, 4, 12, 5, 10] mylogger.info("deck_lists:{}".format(deck_lists)) D = [Deck() for i in range(len(deck_lists))] deck_index = 0 # sorted_keys = sorted(list(deck_id_2_name.keys())) # for i in sorted_keys: # if i not in deck_lists: # continue for i in deck_lists: mylogger.info("{}(deck_id:{}):{}".format(deck_index, i, key_2_tsv_name[i])) D[deck_index] = tsv_to_deck(key_2_tsv_name[i][0]) D[deck_index].set_leader_class(key_2_tsv_name[i][1]) deck_index += 1 Results = {} list_range = range(len(deck_lists)) #print(list(itertools.product(list_range,list_range))) Player1 = Player(9, True, policy=New_Dual_NN_Non_Rollout_OM_ISMCTSPolicy(model_name=args.model_name), mulligan=Min_cost_mulligan_policy()) if args.opponent is not None: if args.model_name is not None: if args.opponent == "Greedy": Player2 = Player(9, True, policy=NN_GreedyPolicy(model_name=args.model_name), mulligan=Min_cost_mulligan_policy()) elif args.opponent == "MCTS": Player2 = Player(9, True, policy=New_Dual_NN_Non_Rollout_OM_ISMCTSPolicy(model_name=args.model_name), mulligan=Min_cost_mulligan_policy()) else: Player2 = Player(9, True, policy=Opponent_Modeling_MCTSPolicy(), mulligan=Min_cost_mulligan_policy()) else: Player2 = Player(9, True, policy=AggroPolicy(), mulligan=Min_cost_mulligan_policy()) Player1.name = "Alice" Player2.name = "Bob" iteration = int(args.iteration) if args.iteration is not None else 10 deck_list_len = len(deck_lists) iter_data = [(deck_list_len*i+j,Player1, Player2,(i,j),(deck_lists[i],deck_lists[j]),iteration) for i,j in itertools.product(list_range,list_range)] pool = Pool(3) # 最大プロセス数:8 # memory = pool.map(preparation, iter_data) result = pool.map(multi_battle, iter_data) #result = list(tqdm(result, total=len(list_range)**2)) pool.close() # add this. pool.terminate() # add this. for data in result: Results[data[0]] = data[1] print(Results)
def compute_image_statistics(image_list, image_open_fn, single_process=False): """Given a list of images (paths to files), return the per channel mean and stdev. Also returns a dictionary mapping filename to image size Parameters ---------- image_list: list List of str of filepaths to images that can be opened by PIL image_open_fn: function Function to open image files in image_list, i.e. PIL.Image.open single_process: bool If it's True, it gets image stats in single process for debugging. Defaults to False. Returns ------- global_mean: np.ndarray Channel wise mean of images over images in given list global_stdevr: np.ndarray Channel wise standard deviation of images in given list all_image_sizes: dict Dict mapping from filenames to image sizes (C, H, W) """ valid_extensions = (".jpg", ".png", ".bmp", ".pgm", ".tiff") image_list = list(filter(lambda x: x.endswith(valid_extensions), image_list)) if single_process: image_stats_per_process = [] for idx, image in enumerate(image_list): image_stats_per_process.append(_get_image_stats([image], idx, len(image_list), image_open_fn)) else: num_processes = cpu_count() if len(image_list) < num_processes: num_processes = len(image_list) chunk_size = int(len(image_list) / num_processes) with Pool(num_processes) as p: image_stats_per_process = p.starmap( _get_image_stats, [(image_list[i:i + chunk_size], i, num_processes, image_open_fn) for i in range(0, len(image_list), chunk_size)] ) global_mean, global_var, all_image_sizes = np.array([0., 0., 0.]), np.array([0., 0., 0.]), {} for means, variances, image_sizes in image_stats_per_process: global_mean += means global_var += variances all_image_sizes.update(image_sizes) global_mean /= len(image_list) global_stdev = np.sqrt(global_var / len(image_list)) return list(global_mean), list(global_stdev), all_image_sizes
def test_v2(self, ): data = [ '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**' ] # 单进程 with Pool(processes=2) as p: # Parallelizing over 2 GPUs results = p.map(self.test, data)
def train_instances(self): """ Let all instances play against each other in parallel fashion. :return: checkpoints produced by all workers """ instances = list(range(self.instances_num)) self.logger.info("Train {} instances.".format(len(instances))) with Pool() as pool: results = pool.map(self.train_instance_pair, instances) return results