def make_aggregators_and_optimizer(workers, config): if config["num_aggregation_workers"] > 0: # Create co-located aggregator actors first for placement pref aggregators = TreeAggregator.precreate_aggregators( config["num_aggregation_workers"]) else: aggregators = None workers.add_workers(config["num_workers"]) optimizer = AsyncSamplesOptimizer( workers, lr=config["lr"], num_gpus=config["num_gpus"], rollout_fragment_length=config["rollout_fragment_length"], train_batch_size=config["train_batch_size"], replay_buffer_num_slots=config["replay_buffer_num_slots"], replay_proportion=config["replay_proportion"], num_data_loader_buffers=config["num_data_loader_buffers"], max_sample_requests_in_flight_per_worker=config[ "max_sample_requests_in_flight_per_worker"], broadcast_interval=config["broadcast_interval"], num_sgd_iter=config["num_sgd_iter"], minibatch_buffer_size=config["minibatch_buffer_size"], num_aggregation_workers=config["num_aggregation_workers"], learner_queue_size=config["learner_queue_size"], learner_queue_timeout=config["learner_queue_timeout"], **config["optimizer"]) if aggregators: # Assign the pre-created aggregators to the optimizer optimizer.aggregator.init(aggregators) return optimizer
def testMultiTierAggregation(self): local, remotes = self._make_evs() workers = WorkerSet._from_existing(local, remotes) aggregators = TreeAggregator.precreate_aggregators(1) optimizer = AsyncSamplesOptimizer(workers, num_aggregation_workers=1) optimizer.aggregator.init(aggregators) self._wait_for(optimizer, 1000, 1000)
def testMultiTierAggregationBadConf(self): local, remotes = self._make_evs() workers = WorkerSet._from_existing(local, remotes) aggregators = TreeAggregator.precreate_aggregators(4) optimizer = AsyncSamplesOptimizer(workers, num_aggregation_workers=4) self.assertRaises(ValueError, lambda: optimizer.aggregator.init(aggregators))
def testMultiTierAggregationBadConf(self): local, remotes = self._make_evs() aggregators = TreeAggregator.precreate_aggregators(4) optimizer = AsyncSamplesOptimizer(local, remotes, {"num_aggregation_workers": 4}) self.assertRaises(ValueError, lambda: optimizer.aggregator.init(aggregators))
def testMultiTierAggregation(self): local, remotes = self._make_evs() aggregators = TreeAggregator.precreate_aggregators(1) optimizer = AsyncSamplesOptimizer(local, remotes, { "num_aggregation_workers": 1, }) optimizer.aggregator.init(aggregators) self._wait_for(optimizer, 1000, 1000)
def _init(self, config, env_creator): for k in OPTIMIZER_SHARED_CONFIGS: if k not in config["optimizer"]: config["optimizer"][k] = config[k] policy_cls = self._get_policy_graph() self.local_evaluator = self.make_local_evaluator( self.env_creator, policy_cls) if self.config["num_aggregation_workers"] > 0: # Create co-located aggregator actors first for placement pref aggregators = TreeAggregator.precreate_aggregators( self.config["num_aggregation_workers"]) self.remote_evaluators = self.make_remote_evaluators( env_creator, policy_cls, config["num_workers"]) self.optimizer = AsyncSamplesOptimizer(self.local_evaluator, self.remote_evaluators, config["optimizer"]) if config["entropy_coeff"] < 0: raise DeprecationWarning("entropy_coeff must be >= 0") if self.config["num_aggregation_workers"] > 0: # Assign the pre-created aggregators to the optimizer self.optimizer.aggregator.init(aggregators)