Example #1
0
 def testMultiTierAggregation(self):
     local, remotes = self._make_evs()
     workers = WorkerSet._from_existing(local, remotes)
     aggregators = TreeAggregator.precreate_aggregators(1)
     optimizer = AsyncSamplesOptimizer(workers, num_aggregation_workers=1)
     optimizer.aggregator.init(aggregators)
     self._wait_for(optimizer, 1000, 1000)
Example #2
0
def make_aggregators_and_optimizer(workers, config):
    if config["num_aggregation_workers"] > 0:
        # Create co-located aggregator actors first for placement pref
        aggregators = TreeAggregator.precreate_aggregators(
            config["num_aggregation_workers"])
    else:
        aggregators = None
    workers.add_workers(config["num_workers"])

    optimizer = AsyncSamplesOptimizer(
        workers,
        lr=config["lr"],
        num_gpus=config["num_gpus"],
        rollout_fragment_length=config["rollout_fragment_length"],
        train_batch_size=config["train_batch_size"],
        replay_buffer_num_slots=config["replay_buffer_num_slots"],
        replay_proportion=config["replay_proportion"],
        num_data_loader_buffers=config["num_data_loader_buffers"],
        max_sample_requests_in_flight_per_worker=config[
            "max_sample_requests_in_flight_per_worker"],
        broadcast_interval=config["broadcast_interval"],
        num_sgd_iter=config["num_sgd_iter"],
        minibatch_buffer_size=config["minibatch_buffer_size"],
        num_aggregation_workers=config["num_aggregation_workers"],
        learner_queue_size=config["learner_queue_size"],
        learner_queue_timeout=config["learner_queue_timeout"],
        **config["optimizer"])

    if aggregators:
        # Assign the pre-created aggregators to the optimizer
        optimizer.aggregator.init(aggregators)
    return optimizer
Example #3
0
 def testMultiTierAggregationBadConf(self):
     local, remotes = self._make_evs()
     workers = WorkerSet._from_existing(local, remotes)
     aggregators = TreeAggregator.precreate_aggregators(4)
     optimizer = AsyncSamplesOptimizer(workers, num_aggregation_workers=4)
     self.assertRaises(ValueError,
                       lambda: optimizer.aggregator.init(aggregators))
Example #4
0
 def testMultiTierAggregationBadConf(self):
     local, remotes = self._make_evs()
     aggregators = TreeAggregator.precreate_aggregators(4)
     optimizer = AsyncSamplesOptimizer(local, remotes,
                                       {"num_aggregation_workers": 4})
     self.assertRaises(ValueError,
                       lambda: optimizer.aggregator.init(aggregators))
Example #5
0
 def testMultiTierAggregation(self):
     local, remotes = self._make_evs()
     aggregators = TreeAggregator.precreate_aggregators(1)
     optimizer = AsyncSamplesOptimizer(local, remotes, {
         "num_aggregation_workers": 1,
     })
     optimizer.aggregator.init(aggregators)
     self._wait_for(optimizer, 1000, 1000)
Example #6
0
    def _init(self, config, env_creator):
        for k in OPTIMIZER_SHARED_CONFIGS:
            if k not in config["optimizer"]:
                config["optimizer"][k] = config[k]
        policy_cls = self._get_policy_graph()
        self.local_evaluator = self.make_local_evaluator(
            self.env_creator, policy_cls)

        if self.config["num_aggregation_workers"] > 0:
            # Create co-located aggregator actors first for placement pref
            aggregators = TreeAggregator.precreate_aggregators(
                self.config["num_aggregation_workers"])

        self.remote_evaluators = self.make_remote_evaluators(
            env_creator, policy_cls, config["num_workers"])
        self.optimizer = AsyncSamplesOptimizer(self.local_evaluator,
                                               self.remote_evaluators,
                                               config["optimizer"])
        if config["entropy_coeff"] < 0:
            raise DeprecationWarning("entropy_coeff must be >= 0")

        if self.config["num_aggregation_workers"] > 0:
            # Assign the pre-created aggregators to the optimizer
            self.optimizer.aggregator.init(aggregators)
    def __init__(self,
                 workers,
                 train_batch_size=500,
                 sample_batch_size=50,
                 num_envs_per_worker=1,
                 num_gpus=0,
                 lr=0.0005,
                 replay_buffer_num_slots=0,
                 replay_proportion=0.0,
                 num_data_loader_buffers=1,
                 max_sample_requests_in_flight_per_worker=2,
                 broadcast_interval=1,
                 num_sgd_iter=1,
                 minibatch_buffer_size=1,
                 learner_queue_size=16,
                 num_aggregation_workers=0,
                 _fake_gpus=False):
        PolicyOptimizer.__init__(self, workers)

        self._stats_start_time = time.time()
        self._last_stats_time = {}
        self._last_stats_sum = {}

        if num_gpus > 1 or num_data_loader_buffers > 1:
            logger.info(
                "Enabling multi-GPU mode, {} GPUs, {} parallel loaders".format(
                    num_gpus, num_data_loader_buffers))
            if num_data_loader_buffers < minibatch_buffer_size:
                raise ValueError(
                    "In multi-gpu mode you must have at least as many "
                    "parallel data loader buffers as minibatch buffers: "
                    "{} vs {}".format(num_data_loader_buffers,
                                      minibatch_buffer_size))
            self.learner = TFMultiGPULearner(
                self.workers.local_worker(),
                lr=lr,
                num_gpus=num_gpus,
                train_batch_size=train_batch_size,
                num_data_loader_buffers=num_data_loader_buffers,
                minibatch_buffer_size=minibatch_buffer_size,
                num_sgd_iter=num_sgd_iter,
                learner_queue_size=learner_queue_size,
                _fake_gpus=_fake_gpus)
        else:
            self.learner = LearnerThread(self.workers.local_worker(),
                                         minibatch_buffer_size, num_sgd_iter,
                                         learner_queue_size)
        self.learner.start()

        # Stats
        self._optimizer_step_timer = TimerStat()
        self._stats_start_time = time.time()
        self._last_stats_time = {}

        if num_aggregation_workers > 0:
            self.aggregator = TreeAggregator(
                workers,
                num_aggregation_workers,
                replay_proportion=replay_proportion,
                max_sample_requests_in_flight_per_worker=(
                    max_sample_requests_in_flight_per_worker),
                replay_buffer_num_slots=replay_buffer_num_slots,
                train_batch_size=train_batch_size,
                sample_batch_size=sample_batch_size,
                broadcast_interval=broadcast_interval)
        else:
            self.aggregator = SimpleAggregator(
                workers,
                replay_proportion=replay_proportion,
                max_sample_requests_in_flight_per_worker=(
                    max_sample_requests_in_flight_per_worker),
                replay_buffer_num_slots=replay_buffer_num_slots,
                train_batch_size=train_batch_size,
                sample_batch_size=sample_batch_size,
                broadcast_interval=broadcast_interval)