def init_workers(self): """ Initialize all types of workers and start their worker processes. """ actor_queues = [MpQueue(2 * 1000 * 1000) for _ in range(self.cfg.num_workers)] policy_worker_queues = dict() for policy_id in range(self.cfg.num_policies): policy_worker_queues[policy_id] = [] for i in range(self.cfg.policy_workers_per_policy): policy_worker_queues[policy_id].append(TorchJoinableQueue()) log.info('Initializing learners...') policy_locks = [multiprocessing.Lock() for _ in range(self.cfg.num_policies)] resume_experience_collection_cv = [multiprocessing.Condition() for _ in range(self.cfg.num_policies)] learner_idx = 0 for policy_id in range(self.cfg.num_policies): learner_worker = LearnerWorker( learner_idx, policy_id, self.cfg, self.obs_space, self.action_space, self.report_queue, policy_worker_queues[policy_id], self.traj_buffers, policy_locks[policy_id], resume_experience_collection_cv[policy_id], ) learner_worker.start_process() learner_worker.init() self.learner_workers[policy_id] = learner_worker learner_idx += 1 log.info('Initializing policy workers...') for policy_id in range(self.cfg.num_policies): self.policy_workers[policy_id] = [] policy_queue = MpQueue() self.policy_queues[policy_id] = policy_queue for i in range(self.cfg.policy_workers_per_policy): policy_worker = PolicyWorker( i, policy_id, self.cfg, self.obs_space, self.action_space, self.traj_buffers, policy_queue, actor_queues, self.report_queue, policy_worker_queues[policy_id][i], policy_locks[policy_id], resume_experience_collection_cv[policy_id], ) self.policy_workers[policy_id].append(policy_worker) policy_worker.start_process() log.info('Initializing actors...') # We support actor worker initialization in groups, which can be useful for some envs that # e.g. crash when too many environments are being initialized in parallel. # Currently the limit is not used since it is not required for any envs supported out of the box, # so we parallelize initialization as hard as we can. # If this is required for your environment, perhaps a better solution would be to use global locks, # like FileLock (see doom_gym.py) self.actor_workers = [] max_parallel_init = int(1e9) # might be useful to limit this for some envs worker_indices = list(range(self.cfg.num_workers)) for i in range(0, self.cfg.num_workers, max_parallel_init): workers = self.init_subset(worker_indices[i:i + max_parallel_init], actor_queues) self.actor_workers.extend(workers)
def init_workers(self): actor_queues = [ faster_fifo.Queue() for _ in range(self.cfg.num_workers) ] policy_worker_queues = dict() for policy_id in range(self.cfg.num_policies): policy_worker_queues[policy_id] = [] for i in range(self.cfg.policy_workers_per_policy): policy_worker_queues[policy_id].append(TorchJoinableQueue()) log.info('Initializing learners...') policy_locks = [ multiprocessing.Lock() for _ in range(self.cfg.num_policies) ] resume_experience_collection_cv = [ multiprocessing.Condition() for _ in range(self.cfg.num_policies) ] learner_idx = 0 for policy_id in range(self.cfg.num_policies): learner_worker = LearnerWorker( learner_idx, policy_id, self.cfg, self.obs_space, self.action_space, self.report_queue, policy_worker_queues[policy_id], self.traj_buffers, policy_locks[policy_id], resume_experience_collection_cv[policy_id], ) learner_worker.start_process() learner_worker.init() self.learner_workers[policy_id] = learner_worker learner_idx += 1 log.info('Initializing policy workers...') for policy_id in range(self.cfg.num_policies): self.policy_workers[policy_id] = [] policy_queue = faster_fifo.Queue() self.policy_queues[policy_id] = policy_queue for i in range(self.cfg.policy_workers_per_policy): policy_worker = PolicyWorker( i, policy_id, self.cfg, self.obs_space, self.action_space, self.traj_buffers, policy_queue, actor_queues, self.report_queue, policy_worker_queues[policy_id][i], policy_locks[policy_id], resume_experience_collection_cv[policy_id], ) self.policy_workers[policy_id].append(policy_worker) policy_worker.start_process() log.info('Initializing actors...') self.actor_workers = [] max_parallel_init = int( 1e9) # might be useful to limit this for some envs worker_indices = list(range(self.cfg.num_workers)) for i in range(0, self.cfg.num_workers, max_parallel_init): workers = self.init_subset(worker_indices[i:i + max_parallel_init], actor_queues) self.actor_workers.extend(workers)