def testMultiTierAggregation(self): local, remotes = self._make_evs() workers = WorkerSet._from_existing(local, remotes) aggregators = TreeAggregator.precreate_aggregators(1) optimizer = AsyncSamplesOptimizer(workers, num_aggregation_workers=1) optimizer.aggregator.init(aggregators) self._wait_for(optimizer, 1000, 1000)
def make_aggregators_and_optimizer(workers, config): if config["num_aggregation_workers"] > 0: # Create co-located aggregator actors first for placement pref aggregators = TreeAggregator.precreate_aggregators( config["num_aggregation_workers"]) else: aggregators = None workers.add_workers(config["num_workers"]) optimizer = AsyncSamplesOptimizer( workers, lr=config["lr"], num_gpus=config["num_gpus"], rollout_fragment_length=config["rollout_fragment_length"], train_batch_size=config["train_batch_size"], replay_buffer_num_slots=config["replay_buffer_num_slots"], replay_proportion=config["replay_proportion"], num_data_loader_buffers=config["num_data_loader_buffers"], max_sample_requests_in_flight_per_worker=config[ "max_sample_requests_in_flight_per_worker"], broadcast_interval=config["broadcast_interval"], num_sgd_iter=config["num_sgd_iter"], minibatch_buffer_size=config["minibatch_buffer_size"], num_aggregation_workers=config["num_aggregation_workers"], learner_queue_size=config["learner_queue_size"], learner_queue_timeout=config["learner_queue_timeout"], **config["optimizer"]) if aggregators: # Assign the pre-created aggregators to the optimizer optimizer.aggregator.init(aggregators) return optimizer
def testMultiTierAggregationBadConf(self): local, remotes = self._make_evs() workers = WorkerSet._from_existing(local, remotes) aggregators = TreeAggregator.precreate_aggregators(4) optimizer = AsyncSamplesOptimizer(workers, num_aggregation_workers=4) self.assertRaises(ValueError, lambda: optimizer.aggregator.init(aggregators))
def testMultiTierAggregationBadConf(self): local, remotes = self._make_evs() aggregators = TreeAggregator.precreate_aggregators(4) optimizer = AsyncSamplesOptimizer(local, remotes, {"num_aggregation_workers": 4}) self.assertRaises(ValueError, lambda: optimizer.aggregator.init(aggregators))
def testMultiTierAggregation(self): local, remotes = self._make_evs() aggregators = TreeAggregator.precreate_aggregators(1) optimizer = AsyncSamplesOptimizer(local, remotes, { "num_aggregation_workers": 1, }) optimizer.aggregator.init(aggregators) self._wait_for(optimizer, 1000, 1000)
def _init(self, config, env_creator): for k in OPTIMIZER_SHARED_CONFIGS: if k not in config["optimizer"]: config["optimizer"][k] = config[k] policy_cls = self._get_policy_graph() self.local_evaluator = self.make_local_evaluator( self.env_creator, policy_cls) if self.config["num_aggregation_workers"] > 0: # Create co-located aggregator actors first for placement pref aggregators = TreeAggregator.precreate_aggregators( self.config["num_aggregation_workers"]) self.remote_evaluators = self.make_remote_evaluators( env_creator, policy_cls, config["num_workers"]) self.optimizer = AsyncSamplesOptimizer(self.local_evaluator, self.remote_evaluators, config["optimizer"]) if config["entropy_coeff"] < 0: raise DeprecationWarning("entropy_coeff must be >= 0") if self.config["num_aggregation_workers"] > 0: # Assign the pre-created aggregators to the optimizer self.optimizer.aggregator.init(aggregators)
def __init__(self, workers, train_batch_size=500, sample_batch_size=50, num_envs_per_worker=1, num_gpus=0, lr=0.0005, replay_buffer_num_slots=0, replay_proportion=0.0, num_data_loader_buffers=1, max_sample_requests_in_flight_per_worker=2, broadcast_interval=1, num_sgd_iter=1, minibatch_buffer_size=1, learner_queue_size=16, num_aggregation_workers=0, _fake_gpus=False): PolicyOptimizer.__init__(self, workers) self._stats_start_time = time.time() self._last_stats_time = {} self._last_stats_sum = {} if num_gpus > 1 or num_data_loader_buffers > 1: logger.info( "Enabling multi-GPU mode, {} GPUs, {} parallel loaders".format( num_gpus, num_data_loader_buffers)) if num_data_loader_buffers < minibatch_buffer_size: raise ValueError( "In multi-gpu mode you must have at least as many " "parallel data loader buffers as minibatch buffers: " "{} vs {}".format(num_data_loader_buffers, minibatch_buffer_size)) self.learner = TFMultiGPULearner( self.workers.local_worker(), lr=lr, num_gpus=num_gpus, train_batch_size=train_batch_size, num_data_loader_buffers=num_data_loader_buffers, minibatch_buffer_size=minibatch_buffer_size, num_sgd_iter=num_sgd_iter, learner_queue_size=learner_queue_size, _fake_gpus=_fake_gpus) else: self.learner = LearnerThread(self.workers.local_worker(), minibatch_buffer_size, num_sgd_iter, learner_queue_size) self.learner.start() # Stats self._optimizer_step_timer = TimerStat() self._stats_start_time = time.time() self._last_stats_time = {} if num_aggregation_workers > 0: self.aggregator = TreeAggregator( workers, num_aggregation_workers, replay_proportion=replay_proportion, max_sample_requests_in_flight_per_worker=( max_sample_requests_in_flight_per_worker), replay_buffer_num_slots=replay_buffer_num_slots, train_batch_size=train_batch_size, sample_batch_size=sample_batch_size, broadcast_interval=broadcast_interval) else: self.aggregator = SimpleAggregator( workers, replay_proportion=replay_proportion, max_sample_requests_in_flight_per_worker=( max_sample_requests_in_flight_per_worker), replay_buffer_num_slots=replay_buffer_num_slots, train_batch_size=train_batch_size, sample_batch_size=sample_batch_size, broadcast_interval=broadcast_interval)