def __init__(self, num_threads: int): if not isinstance(num_threads, int): raise pyrado.TypeErr(given=num_threads, expected_type=int) if num_threads < 1: raise pyrado.ValueErr(given=num_threads, ge_constraint="1") self._num_threads = num_threads if not ENABLE_SINGLE_WORKER_OPTIMIZATION or num_threads > 1: # Create workers self._workers = [ _WorkerInfo(i + 1) for i in range(self._num_threads) ] self._manager = mp.Manager() self._G = GlobalNamespace()
def run_games_for_agent(self, agent_number, agent_class): """Runs a set of games for a given agent, saving the results in self.results""" agent_results = [] agent_name = agent_class.agent_name agent_group = self.agent_to_agent_group[agent_name] agent_round = 1 # print("!!", self.config.environment) # print(self.config.environment._max_episode_steps) #&&&&&&&&&&&& agent_config = copy.deepcopy(self.config) if self.environment_has_changeable_goals(agent_config.environment) \ and self.agent_cant_handle_changeable_goals_without_flattening(agent_name): print("Flattening changeable-goal environment for agent {}".format( agent_name)) agent_config.environment = FlattenDictWrapper( agent_config.environment, dict_keys=["observation", "desired_goal"]) if self.config.randomise_random_seed: agent_config.seed = random.randint(0, 2**32 - 2) agent_config.hyperparameters = agent_config.hyperparameters[ agent_group] print("AGENT NAME: {}".format(agent_name)) manager = mp.Manager() return_q = manager.Queue() agent = agent_class(agent_config) self.environment_name = agent.environment_title jobs = [] for i in range(self.config.runs_per_agent): p = mp.Process(target=agent.run_n_episodes, args=(return_q, )) jobs.append(p) p.start() for proc in jobs: proc.join() # print("(GridTrainer.py) process end!") for game_scores, rolling_scores, time_taken in iter( return_q.get, None): agent_results.append([ game_scores, rolling_scores, len(rolling_scores), -1 * max(rolling_scores), time_taken ]) if return_q.empty(): break #&&&&&&&&& self.results[agent_name] = agent_results
def parallelize_sessions(self, global_nets=None): mp_dict = mp.Manager().dict() workers = [] spec = deepcopy(self.spec) for _s in range(spec['meta']['max_session']): spec_util.tick(spec, 'session') w = mp.Process(target=mp_run_session, args=(spec, global_nets, mp_dict)) w.start() workers.append(w) for w in workers: w.join() session_metrics_list = [mp_dict[idx] for idx in sorted(mp_dict.keys())] return session_metrics_list
def spawn_train(cfg): # print(torch.cuda.nccl.version()) # mp.set_start_method("spawn") manager = mp.Manager() return_dict = manager.dict() jobs = [] for i in range(cfg.world_size): p = mp.Process(target=train_ddp, args=(i, cfg, return_dict)) jobs.append(p) p.start() for proc in jobs: proc.join() return return_dict
def parallel_predict(nets, data, data_dir, configs): processes = [] # pred_probs = mp.Array('probs', range(len(nets))) # mAPs = mp.Array('mAPs', range(len(nets))) manager = mp.Manager() pred_probs = manager.dict() for view in range(len(nets)): p = mp.Process(target=wrap_predict, args=(nets[view], data, data_dir, configs[view], view, pred_probs)) p.start() processes.append(p) for p in processes: p.join() return pred_probs.values()
def connect(self, jov): '''-------------------------------- connect maze and jovian: 1. shared_cue_dict. := {cue_name: cue_pos} 2. shared_cue_height := {cue_name: cue_height} 3. coord transformations -------------------------------- ''' self.jov = jov mgr = multiprocessing.Manager() self.shared_cue_dict = mgr.dict() self.jov.set_trigger(self.shared_cue_dict) self.jov.shared_cue_height = self.cues_height self.jov._to_maze_coord = self._to_maze_coord self.jov._to_jovian_coord = self._to_jovian_coord self.is_jovian_connected = True
def main(stage, pipeline, dataset): if pipeline == 'literal': experiment = LiteralExperimentConfig(dataset, **kwargs) else: experiment = HybridExperimentConfig(dataset, **kwargs) set_seed(experiment) describe_devices() print(experiment.describe()) world_size = experiment.num_gpu or torch.cuda.device_count() with mp.Manager() as manager: shared = manager.dict() lock = manager.Lock() mp.spawn(process, args=(world_size, experiment, stage, pipeline, shared, lock), nprocs=world_size, join=True)
def propagate(nnf, feat_A, feat_AP, feat_B, feat_BP, patch_size, iters=2, rand_search_radius=200): print("\tpatch_size:{}; num_iters:{}; rand_search_radius:{}".format(patch_size, iters, rand_search_radius)) nnd = np.zeros(nnf.shape[:2]) A_size = feat_A.shape[:2] B_size = feat_B.shape[:2] for ay in range(A_size[0]): for ax in range(A_size[1]): by, bx = nnf[ay, ax] nnd[ay, ax] = cal_dist(ay, ax, by, bx, feat_A, feat_AP, feat_B, feat_BP, A_size, B_size, patch_size) manager = mp.Manager() q = manager.Queue(A_size[1] * A_size[0]) cpus = min(mp.cpu_count(), A_size[0] // 20 + 1) for i in range(iters): p = Pool(cpus) ay_start = 0 while ay_start < A_size[0]: ax_start = 0 while ax_start < A_size[1]: p.apply_async(pixelmatch, args=(q, ax_start, ay_start, cpus, nnf, nnd, A_size, B_size, feat_A, feat_AP, feat_B, feat_BP, patch_size, rand_search_radius,)) ax_start += A_size[1] // cpus + 1 ay_start += A_size[0] // cpus + 1 p.close() p.join() while not q.empty(): ax, ay, xbest, ybest, dbest = q.get() nnf[ay, ax] = np.array([ybest, xbest]) nnd[ay, ax] = dbest return nnf, nnd
def decode_process(argv, qdecoder): pool = mp.Pool(argv.threads) manager = mp.Manager() write_mutex = manager.Value('i', 1) while True: item = qdecoder.get(timeout=200) try: qdecoder_size = qdecoder.qsize() print('\n current qdecoder size: ', qdecoder_size) except NotImplementedError: pass if item is None: print('decoder, qdoceder is None') pool.close() pool.join() return pool.apply_async(func=writer, args=(argv, item, write_mutex))
def main(): print('starting') m = mp.Manager() lock = m.Lock() processes = [] for rank in range(4): p = mp.Process(target=runner, args=(lock, rank,)) p.start() processes.append(p) print('started') print('waiting for processes to finish') for p in processes: p.join() print('join') print('done')
def main(args): # logger logger = logging.getLogger() # data prep loader_aug = data.loaders.DataLoaderWrapper(args) # get model, optimizer, loss cae = model.cae.ConvolutionalAutoEncoder(loader_aug.img_shape, args.embedding_size, args.dropout) cae.load_state_dict(torch.load(args.model)) logger.info("Trainable model parameters: %d" % sum(p.numel() for p in cae.parameters() if p.requires_grad)) global_step = 0 manager = multiprocessing.Manager() queue = manager.Queue() consumer = multiprocessing.Process(target=writetolmdb, args=(args.output, queue, len(loader_aug.ds)), name="Reporting") if args.cuda: cae.cuda() try: consumer.start() c_process = psutil.Process(consumer.pid) this_process = psutil.Process() with torch.autograd.detect_anomaly(): for b_i, batch in enumerate(tqdm(loader_aug, leave=False)): global_step += 1 if args.cuda: batch = batch.cuda() embedding = cae.encoder(batch) queue.put(embedding.cpu()) finally: queue.put(None) consumer.join()
def _create_mapper_rollouts(self, ans_cfg): V = ans_cfg.MAPPER.map_size imH, imW = ans_cfg.image_scale_hw mapper_observation_space = { "rgb_at_t": spaces.Box( low=0.0, high=255.0, shape=(imH, imW, 3), dtype=np.float32 ), "depth_at_t": spaces.Box( low=0.0, high=255.0, shape=(imH, imW, 1), dtype=np.float32 ), "ego_map_gt_at_t": spaces.Box( low=0.0, high=1.0, shape=(V, V, 2), dtype=np.float32 ), "ego_map_gt_dilation_at_t": spaces.Box( low=0.0, high=1.0, shape=(V, V, 2), dtype=np.float32 ), "pose_at_t": spaces.Box( low=-100000.0, high=100000.0, shape=(3,), dtype=np.float32 ), "pose_gt_at_t": spaces.Box( low=-100000.0, high=100000.0, shape=(3,), dtype=np.float32 ), "ego_map_gt_anticipated_at_t": self.envs.observation_spaces[0].spaces[ "ego_map_gt_anticipated" ], } mapper_observation_space = spaces.Dict(mapper_observation_space) # Multiprocessing manager mapper_manager = mp.Manager() mapper_device = self.device if ans_cfg.MAPPER.use_data_parallel and len(ans_cfg.MAPPER.gpu_ids) > 0: mapper_device = ans_cfg.MAPPER.gpu_ids[0] mapper_rollouts = MapLargeRolloutStorageMP( ans_cfg.MAPPER.replay_size, mapper_observation_space, mapper_device, mapper_manager, ) return mapper_rollouts
def run_job(logger, opt, output_dir, train): device_id = allocate_device() opt_override = {'device': device_id} def merge(a, b): d = {} d.update(a) d.update(b) return d # opt = {**opt, **opt_override} opt = merge(opt, opt_override) logger.info('new job: job_id={}, device_id={}'.format( opt['job_id'], opt['device'])) try: logger.info("spawning process: job_id={}, device_id={}".format( opt['job_id'], opt['device'])) try: output_dir_thread = os.path.join(output_dir, str(opt['job_id'])) os.makedirs(output_dir_thread, exist_ok=True) # logger_thread = setup_logging('job{}'.format(opt['job_id']), output_dir_thread, console=True) run_job_lock.acquire() manager = multiprocessing.Manager() return_dict = manager.dict() p = multiprocessing.Process(target=train, args=(opt, output_dir, output_dir_thread, return_dict)) p.start() finally: run_job_lock.release() p.join() logger.info('finished process: job_id={}, device_id={}'.format( opt['job_id'], opt['device'])) if not 'stats' in return_dict: raise ValueError('train() did not populate return_dict with stats') return return_dict['stats'] finally: free_device(device_id)
def __init__(self, game, nnet, args, multiprocessing=False): self.game = game self.nnet = nnet # queue/pipe connection, not nnet itself self.args = args self.multiprocessing = multiprocessing if multiprocessing: self.queue = mp.Manager().Queue() # pipeSend, pipeRecv = mp.Pipe() # self.pipeSend = pipeSend # self.pipeRecv = pipeRecv self.Qsa = {} # stores Q values for s,a (as defined in the paper) self.Nsa = {} # stores #times edge s,a was visited self.Ns = {} # stores #times board s was visited self.Ps = {} # stores initial policy (returned by neural net) self.Es = {} # stores game.getGameEnded ended for board s self.Vs = {} # stores game.getValidMoves for board s
def run(C=0.1, d_startRound_arr=[(0, 0), (25, 0)], rounds=250, client_epochs=5, batch_size=64, num_proc=2): m = int(max(round(C * K), 1)) # SAVE_TRAIN = True # EMBED_DIMS = 50 M = mp.Manager() dl = M.list() lock = M.Lock() s_lock = M.Lock() s_idx = M.Value("i", 0, lock=True) run_kws = dict(dl=dl, lock=lock, s_idx=s_idx, s_lock=s_lock, D_r=d_startRound_arr, R=rounds, E=client_epochs, B=batch_size, C=C) # m=m,) print("{}|processes: {}, C: {}, m: {}, dir: {}".format( time.ctime(), num_proc, C, m, result_dir)) print(run_kws) for k in range(len(nodes_df)): dl.append(None) processes = [] for rank in range(num_proc): p = mp.Process(target=init_processes, args=(rank, num_proc, process, run_kws)) p.start() processes.append(p) for p in processes: p.join()
def train(self): for rnd in range(self.rounds): np.random.shuffle(self.nets_pool) pool = mp.Pool(self.num_per_rnd) self.q = mp.Manager().Queue() dict_new = self.global_agent.model.state_dict() if self.estimate_weights_in_center and rnd % self.interval == 0: w_d = self.global_agent.estimate_weights(self.policy) else: w_d = None for net in self.nets_pool[:self.num_per_rnd]: net.model.load_state_dict(dict_new) net.set_lr(self.global_agent.lr) pool.apply_async( train_local_mp, (net, self.local_epochs, rnd, self.q, self.policy, w_d)) pool.close() pool.join() self.update_global(rnd)
def parallel_test(nets, test_data, configs): processes = [] manager = mp.Manager() pred_probs = manager.dict() gt_y = test_data[1] for view, net in enumerate(nets): p = mp.Process(target=test_net, args=(net, test_data, configs[view], view, pred_probs)) p.start() processes.append(p) for p in processes: p.join() pred_probs = pred_probs.values() pred_y1 = np.argmax(pred_probs[0], axis=1) pred_y2 = np.argmax(pred_probs[1], axis=1) pred_y = np.argmax(sum(pred_probs), axis=1) print('view 1: %0.4f; view 2: %0.4f; fuse: %0.4f\n' % (np.mean(pred_y1 == gt_y), np.mean(pred_y2 == gt_y), np.mean(pred_y == gt_y)))
def __init__(self, n_readers=1, read_fn=_default_read_fn, n_retries=3): self._read_queue = mp.Queue() self._out_queue = mp.Queue() self._manager = mp.Manager() self._buf = self._manager.dict() self._read_fn = read_fn self._read_workers = [ mp.Process( target=self.__read_worker, args=(self._read_queue, self._out_queue), daemon=True, ) for _ in range(n_readers) ] if platform.system() != "Windows": for w in self._read_workers: w.start() self.n_retries = n_retries
def evaluation(model, name, adjacency, neproc, vectors=None, cuda=False, verbose=False): t_start = timeit.default_timer() adjacency = list(adjacency.items()) chunk = int(len(adjacency) / neproc + 1) if vectors is not None: with torch.no_grad(): vectors = Variable(torch.from_numpy(vectors).float()) if cuda: vectors = vectors.cuda() embeds = model.module.embed(vectors) else: embeds = model.module.embed() queue = mp.Manager().Queue() processes = [] for rank in range(neproc): if "sips" in name: p = mp.Process(target=eval_sips_thread, args=(adjacency[rank * chunk:(rank + 1) * chunk], model, embeds, queue, rank == 0 and verbose)) else: p = mp.Process(target=eval_thread, args=(adjacency[rank * chunk:(rank + 1) * chunk], model, embeds, queue, rank == 0 and verbose)) p.start() processes.append(p) ranks = list() ap_scores = list() for i in range(neproc): msg = queue.get() _ranks, _ap_scores = msg ranks += _ranks ap_scores += _ap_scores return np.mean(ranks), np.mean(ap_scores), timeit.default_timer() - t_start
def main(args): assert args.render or not args.gif, 'If you want to display a gif, you must set render to true' if args.load is False and os.path.isfile('./model/breakout.pt'): while True: load = input( 'Are you sure you want to erase the previous training? (y/n) ') if load.lower() in ('y', 'yes', '1'): break elif load.lower() in ('n', 'no', '0'): import sys sys.exit() # create shared variables between all the processes manager = mp.Manager() # used to send the results of the net common_dict = manager.dict() # a queue of batches to be fed to the training net mem_queue = manager.Queue(1500 * mp.cpu_count()) # a queue of operations pending process_queue = manager.Queue(mp.cpu_count() - 1) with mp.Pool() as pool: try: workers: int = pool._processes print(f"Running pool with {workers//2} workers") pool.apply_async( gpu_thread, (args.load, mem_queue, process_queue, common_dict, [0, 1])) if args.render: pool.apply_async(cpu_thread, (2 if args.gif else 1, mem_queue, process_queue, common_dict, [2, 3])) for i in range(2 * (1 + args.render), workers, 2): pool.apply_async( cpu_thread, (0, mem_queue, process_queue, common_dict, [i, i + 1])) # Wait for children to finish pool.close() pool.join() except KeyboardInterrupt: pool.join()
def __init__(self, config): self.seed = config.seed self.config = config self.num_workers = config.num_workers if config.agent['name'] == 'ppo2': Transition = PPO_Transition elif (config.agent['name'] == 'cppo') or (config.agent['name'] == 'cppo2'): Transition = CPPO_Transition elif (config.agent['name'] == 'safe_sac'): Transition = Safe_TD_Transition else: Transition = TD_Transition if config.sampler_gpu_index == -1: self.device = torch.device('cpu') else: self.device = torch.device('cuda', index=config.sampler_gpu_index) if torch.cuda.is_available() else torch.device( 'cpu') self.remotes, self.work_remotes = zip(*[mp.Pipe() for _ in range(self.num_workers)]) self.manager = mp.Manager() # Sampler 接受消息的队列 self.recv_queue = self.manager.Queue(self.num_workers) # Sampler 接受消息的队列的锁 self.recv_lock = self.manager.Lock() if is_on_policy(self.config.agent['name']): self.buffer = Memory(Transition=Transition) else: self.buffer = ReplayBuffer(size=config.agent['buffer_size'], Transition=Transition) self.workers = [EnvWorker(id, remote, self.recv_queue, self.recv_lock, self.config, Transition) for (id, remote) in zip(range(self.num_workers), self.work_remotes)] for worker in self.workers: worker.start() self.sample_iter = 0 self.result_dict = {}
def eval_strategy_network(self, steps): print("\nEvaluating strategy network after {} steps".format(steps)) self.strategy_network._network = self.strategy_network._network.cpu() self.strategy_network._device = torch.device("cpu") for p in self.strategy_network._network.parameters(): assert (p.device == torch.device("cpu")) manager = mp.Manager() save_lock = manager.Lock() t0 = time.time() exploits = [] strategies = {0: self.strategy_network, 1: self.strategy_network} for k in range(self.opt.NUM_TRAVERSALS_EVAL): sb_player_idx = k % 2 round_state = create_new_round(sb_player_idx) precomputed_ev = make_precomputed_ev(round_state) info = traverse(round_state, make_actions, make_infoset, 0, sb_player_idx, strategies, None, None, 0, precomputed_ev) exploits.append(info.exploitability.sum()) elapsed = time.time() - t0 print("Time for {} eval traversals {} sec".format( self.opt.NUM_TRAVERSALS_EVAL, elapsed)) mbb_per_game = 1e3 * torch.Tensor(exploits) / ( 2.0 * Constants.SMALL_BLIND_AMOUNT) mean_mbb_per_game = mbb_per_game.mean() stdev_mbb_per_game = mbb_per_game.std() writer = self.writers["train"] writer.add_scalar("strt_exploit_mbbg_mean", mean_mbb_per_game, steps) writer.add_scalar("strt_exploit_mbbg_stdev", stdev_mbb_per_game, steps) writer.close() print( "===> [EVAL] [STRATEGY] Exploitability | mean={} mbb/g | stdev={} | (steps={})" .format(mean_mbb_per_game, stdev_mbb_per_game, steps))
def execute_split_frame_search(T, fixed_frames, parallel, *args): procs, proc_limit = [], 10 if parallel: blocks_i = mp.Manager().dict() else: blocks_i = {} for t in range(T): fixed_frames_t = copy.deepcopy(fixed_frames) if parallel: p = mp.Process(target=search_across_frame, args=(t, blocks_i, fixed_frames_t, *args)) p.start() procs.append(p) # -- wait and reset proc queue -- if len(procs) == proc_limit: finish_procs(procs, proc_limit) procs = [] else: search_across_frame(t, blocks_i, fixed_frames_t, *args) finish_procs(procs, proc_limit) blocks_i = [blocks_i[str(t)] for t in range(T)] # if parallel: blocks_i = copy.deepcopy(blocks_i) return blocks_i
def train(solved_score, population_size, elite_size, num_proc, log_video_rate): setup_logger() manager = mp.Manager() work_queue = manager.Queue() results_queue = manager.Queue() # Random Search 1st generation start_time = time.time() env = create_environment() population = create_population(env, population_size) print(population[0]) elite, top_scores = get_top_performers_from_random_population( env, population, elite_size) elapsed_time = time.time() - start_time log_generation_stats(1, top_scores, elapsed_time) # 2nd -> inf generation: Mutate Top Performers (classic GA) ma_reward = 0 spawn_processes(num_proc, work_fn=mutate_and_evaluate_task, args=(elite, work_queue, results_queue)) for generation in count(start=2, step=1): start_time = time.time() spawn_mutation_work(work_queue, elite_size, population_size) evaluated_population = collect_results(results_queue, size=population_size) top_scores = get_top_performers(evaluated_population, elite, elite_size) elapsed_time = time.time() - start_time if generation % log_video_rate == 0: record_evaluation_video(elite[0], env) log_generation_stats(generation, top_scores, elapsed_time) ma_reward = 0.7 * ma_reward + 0.3 * top_scores.mean() if ma_reward >= solved_score: print(f"Solved in {generation} generations") kill_processes(work_queue, num_proc) break
def run_distributed(create_env_fn, log_dir, Actor, Learner, num_actors, configs): mp.freeze_support() shared_kwargs = { 'shared_memory': mp.Queue(100), 'shared_weights': mp.Manager().dict() } learner_kwargs = dict( env=create_env_fn(), log_dir=log_dir, Learner=Learner, **configs['common'], **configs['learner'], **shared_kwargs, ) processes = [mp.Process(target=run_learner, kwargs=learner_kwargs)] for actor_id in range(num_actors): actor_kwargs = dict( env=create_env_fn(), log_dir=log_dir, Actor=Actor, actor_id=actor_id, num_actors=num_actors, **configs['common'], **configs['actor'], **shared_kwargs, ) processes.append( mp.Process(target=run_actor, kwargs=actor_kwargs)) for pi in range(len(processes)): processes[pi].start() for p in processes: p.join()
def run(self, num_neighbors, num_layers, num_workers): offset = self.start_idx batch_size = min(self.end_idx - self.start_idx + 1, self.batch_size) ########################################### # create queue to share data between process manager = mp.Manager() queue = manager.dict() ########################################### # multi-process to create graph process = GraphGenProcessor(queue, self.num_nodes, offset, batch_size, self.graph, self.interactions, self.neg_nodes, \ self.times, num_neighbors, num_layers, num_workers) process.start() while (True): process.join() data = queue['data'] offset += batch_size if offset >= self.end_idx: offset = self.start_idx # next batch batch_size = min(self.end_idx - self.start_idx + 1, self.batch_size) queue = manager.dict() process = GraphGenProcessor(queue, self.num_nodes, offset, batch_size, self.graph, self.interactions, self.neg_nodes, \ self.times, num_neighbors, num_layers, num_workers) ########################################### # start running next timestep process process.start() yield data
def collect_train_samples_parallel(epoch, max_steps, objects, num_workers=10): """ Purpose: collect rollouts for max_steps steps using num_workers workers Return: stats_collector """ num_steps_per_worker = max_steps // num_workers num_residual_steps = max_steps - num_steps_per_worker * num_workers queue = mp.Manager().Queue() workers = [] for i in range(num_workers): worker_steps = num_steps_per_worker + num_residual_steps if i == 0 else num_steps_per_worker worker_kwargs = dict(epoch=epoch, max_steps=worker_steps, objects=objects, pid=i + 1, queue=queue) workers.append( mp.Process(target=collect_train_samples_serial, kwargs=worker_kwargs)) for j, worker in enumerate(workers): worker.start() start = time.time() master_stats_collector = objects['stats_collector_builder']() for j, worker in enumerate(workers): worker_pid, worker_stats_data = queue.get() master_stats_collector.extend(worker_stats_data) end = time.time() objects['printer']( 'Time to extend master_stats_collector: {}'.format(end - start)) for j, worker in enumerate(workers): worker.join() assert master_stats_collector.get_total_steps() == max_steps return master_stats_collector
def main(episodes, agent, num_processes): running_reward_array = [] # lowered = False mp.set_start_method('spawn') for episode in range(episodes): successful_runs = 0 master_reward, reward, running_reward = 0, 0, 0 processes = [] queueue = mp.Manager().Queue() for proc in range(num_processes): p = mp.Process(target=run_episode, args=(queueue, agent)) p.start() processes.append(p) for p in processes: p.join() while not queueue.empty(): try: fake_out = queueue.get() except MemoryError as e: print(e) fake_out = [-13, None] if fake_out[0] != -13: master_reward += fake_out[0] running_reward_array.append(fake_out[0]) agent.replay_buffer.extend(fake_out[1]) successful_runs += 1 if successful_runs > 0: reward = master_reward / float(successful_runs) agent.end_episode(reward, num_processes) running_reward = sum(running_reward_array[-100:]) / float(min(100.0, len(running_reward_array))) if episode % 50 == 0: print(f'Episode {episode} Last Reward: {reward} Average Reward: {running_reward}') print(f"Running {num_processes} concurrent simulations per episode") if episode % 500 == 0: agent.save('../models/' + str(episode) + 'th') return running_reward_array
def __init__(self, q, batch_size: int, num_workers: int, transform): """ Args: q: A thread-safe queue. It should be multiprocessing.Manager().Queue or torch.multiprocessing.Manager().Queue. batch_size (int): the maximum size of batch. num_workers (int): the number of processes. transform: a function that receives a string (msg) and returns any object. """ assert isinstance(q, mp.managers.BaseProxy) or isinstance( q, tmp.managers.BaseProxy) assert batch_size > 0 assert num_workers > 0 self.batch_size = batch_size self.num_workers = num_workers self.m = tmp.Manager() self.source = q self.sink = self.m.Queue(maxsize=batch_size * 3) self.pool = tmp.Pool(num_workers) for i in range(num_workers): r = self.pool.apply_async(self._worker_loop, (self.source, self.sink, transform))
def main(episodes, agent, num_processes, ENV_NAME): running_reward_array = [] for episode in range(episodes): master_reward = 0 reward, running_reward = 0, 0 processes = [] q = mp.Manager().Queue() for proc in range(num_processes): p = mp.Process(target=run_episode, args=(q, agent, ENV_NAME)) p.start() processes.append(p) for p in processes: p.join() while not q.empty(): fake_out = q.get() master_reward += fake_out[0] running_reward_array.append(fake_out[0]) agent.replay_buffer.extend(fake_out[1]) tuple_out = run_episode(None, agent, ENV_NAME) master_reward += tuple_out[0] running_reward_array.append(tuple_out[0]) agent.replay_buffer.extend(tuple_out[1]) reward = master_reward / float(num_processes + 1) agent.end_episode(reward, num_processes) running_reward = sum(running_reward_array[-100:]) / float( min(100.0, len(running_reward_array))) print(episode) if episode % 50 == 0: print( f'Episode {episode} Last Reward: {reward} Average Reward: {running_reward}' ) if episode % 500 == 0: agent.save('../models/' + str(episode) + 'th') return running_reward_array