def make_learner_thread(local_worker, config): if config["num_gpus"] > 1 or config["num_data_loader_buffers"] > 1: logger.info( "Enabling multi-GPU mode, {} GPUs, {} parallel loaders".format( config["num_gpus"], config["num_data_loader_buffers"])) if config["num_data_loader_buffers"] < config["minibatch_buffer_size"]: raise ValueError( "In multi-gpu mode you must have at least as many " "parallel data loader buffers as minibatch buffers: " "{} vs {}".format(config["num_data_loader_buffers"], config["minibatch_buffer_size"])) learner_thread = TFMultiGPULearner( local_worker, num_gpus=config["num_gpus"], lr=config["lr"], train_batch_size=config["train_batch_size"], num_data_loader_buffers=config["num_data_loader_buffers"], minibatch_buffer_size=config["minibatch_buffer_size"], num_sgd_iter=config["num_sgd_iter"], learner_queue_size=config["learner_queue_size"], learner_queue_timeout=config["learner_queue_timeout"]) else: learner_thread = LearnerThread( local_worker, minibatch_buffer_size=config["minibatch_buffer_size"], num_sgd_iter=config["num_sgd_iter"], learner_queue_size=config["learner_queue_size"], learner_queue_timeout=config["learner_queue_timeout"]) return learner_thread
def __init__(self, local_evaluator, num_gpus=1, lr=0.0005, train_batch_size=500, num_data_loader_buffers=1, minibatch_buffer_size=1, num_sgd_iter=1, learner_queue_size=16, num_data_load_threads=16, _fake_gpus=False): # Multi-GPU requires TensorFlow to function. import tensorflow as tf LearnerThread.__init__(self, local_evaluator, minibatch_buffer_size, num_sgd_iter, learner_queue_size) self.lr = lr self.train_batch_size = train_batch_size if not num_gpus: self.devices = ["/cpu:0"] elif _fake_gpus: self.devices = ["/cpu:{}".format(i) for i in range(num_gpus)] else: self.devices = ["/gpu:{}".format(i) for i in range(num_gpus)] logger.info("TFMultiGPULearner devices {}".format(self.devices)) assert self.train_batch_size % len(self.devices) == 0 assert self.train_batch_size >= len(self.devices), "batch too small" if set(self.local_evaluator.policy_map.keys()) != {DEFAULT_POLICY_ID}: raise NotImplementedError("Multi-gpu mode for multi-agent") self.policy = self.local_evaluator.policy_map[DEFAULT_POLICY_ID] # per-GPU graph copies created below must share vars with the policy # reuse is set to AUTO_REUSE because Adam nodes are created after # all of the device copies are created. self.par_opt = [] with self.local_evaluator.tf_sess.graph.as_default(): with self.local_evaluator.tf_sess.as_default(): with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE): if self.policy._state_inputs: rnn_inputs = self.policy._state_inputs + [ self.policy._seq_lens ] else: rnn_inputs = [] adam = tf.train.AdamOptimizer(self.lr) for _ in range(num_data_loader_buffers): self.par_opt.append( LocalSyncParallelOptimizer( adam, self.devices, [v for _, v in self.policy._loss_inputs], rnn_inputs, 999999, # it will get rounded down self.policy.copy)) self.sess = self.local_evaluator.tf_sess self.sess.run(tf.global_variables_initializer()) self.idle_optimizers = queue.Queue() self.ready_optimizers = queue.Queue() for opt in self.par_opt: self.idle_optimizers.put(opt) for i in range(num_data_load_threads): self.loader_thread = _LoaderThread(self, share_stats=(i == 0)) self.loader_thread.start() self.minibatch_buffer = MinibatchBuffer( self.ready_optimizers, minibatch_buffer_size, num_sgd_iter)
def __init__(self, workers, train_batch_size=500, sample_batch_size=50, num_envs_per_worker=1, num_gpus=0, lr=0.0005, replay_buffer_num_slots=0, replay_proportion=0.0, num_data_loader_buffers=1, max_sample_requests_in_flight_per_worker=2, broadcast_interval=1, num_sgd_iter=1, minibatch_buffer_size=1, learner_queue_size=16, num_aggregation_workers=0, _fake_gpus=False): PolicyOptimizer.__init__(self, workers) self._stats_start_time = time.time() self._last_stats_time = {} self._last_stats_sum = {} if num_gpus > 1 or num_data_loader_buffers > 1: logger.info( "Enabling multi-GPU mode, {} GPUs, {} parallel loaders".format( num_gpus, num_data_loader_buffers)) if num_data_loader_buffers < minibatch_buffer_size: raise ValueError( "In multi-gpu mode you must have at least as many " "parallel data loader buffers as minibatch buffers: " "{} vs {}".format(num_data_loader_buffers, minibatch_buffer_size)) self.learner = TFMultiGPULearner( self.workers.local_worker(), lr=lr, num_gpus=num_gpus, train_batch_size=train_batch_size, num_data_loader_buffers=num_data_loader_buffers, minibatch_buffer_size=minibatch_buffer_size, num_sgd_iter=num_sgd_iter, learner_queue_size=learner_queue_size, _fake_gpus=_fake_gpus) else: self.learner = LearnerThread(self.workers.local_worker(), minibatch_buffer_size, num_sgd_iter, learner_queue_size) self.learner.start() # Stats self._optimizer_step_timer = TimerStat() self._stats_start_time = time.time() self._last_stats_time = {} if num_aggregation_workers > 0: self.aggregator = TreeAggregator( workers, num_aggregation_workers, replay_proportion=replay_proportion, max_sample_requests_in_flight_per_worker=( max_sample_requests_in_flight_per_worker), replay_buffer_num_slots=replay_buffer_num_slots, train_batch_size=train_batch_size, sample_batch_size=sample_batch_size, broadcast_interval=broadcast_interval) else: self.aggregator = SimpleAggregator( workers, replay_proportion=replay_proportion, max_sample_requests_in_flight_per_worker=( max_sample_requests_in_flight_per_worker), replay_buffer_num_slots=replay_buffer_num_slots, train_batch_size=train_batch_size, sample_batch_size=sample_batch_size, broadcast_interval=broadcast_interval)
class AsyncSamplesOptimizer(PolicyOptimizer): """Main event loop of the IMPALA architecture. This class coordinates the data transfers between the learner thread and remote workers (IMPALA actors). """ def __init__(self, workers, train_batch_size=500, sample_batch_size=50, num_envs_per_worker=1, num_gpus=0, lr=0.0005, replay_buffer_num_slots=0, replay_proportion=0.0, num_data_loader_buffers=1, max_sample_requests_in_flight_per_worker=2, broadcast_interval=1, num_sgd_iter=1, minibatch_buffer_size=1, learner_queue_size=16, num_aggregation_workers=0, _fake_gpus=False): PolicyOptimizer.__init__(self, workers) self._stats_start_time = time.time() self._last_stats_time = {} self._last_stats_sum = {} if num_gpus > 1 or num_data_loader_buffers > 1: logger.info( "Enabling multi-GPU mode, {} GPUs, {} parallel loaders".format( num_gpus, num_data_loader_buffers)) if num_data_loader_buffers < minibatch_buffer_size: raise ValueError( "In multi-gpu mode you must have at least as many " "parallel data loader buffers as minibatch buffers: " "{} vs {}".format(num_data_loader_buffers, minibatch_buffer_size)) self.learner = TFMultiGPULearner( self.workers.local_worker(), lr=lr, num_gpus=num_gpus, train_batch_size=train_batch_size, num_data_loader_buffers=num_data_loader_buffers, minibatch_buffer_size=minibatch_buffer_size, num_sgd_iter=num_sgd_iter, learner_queue_size=learner_queue_size, _fake_gpus=_fake_gpus) else: self.learner = LearnerThread(self.workers.local_worker(), minibatch_buffer_size, num_sgd_iter, learner_queue_size) self.learner.start() # Stats self._optimizer_step_timer = TimerStat() self._stats_start_time = time.time() self._last_stats_time = {} if num_aggregation_workers > 0: self.aggregator = TreeAggregator( workers, num_aggregation_workers, replay_proportion=replay_proportion, max_sample_requests_in_flight_per_worker=( max_sample_requests_in_flight_per_worker), replay_buffer_num_slots=replay_buffer_num_slots, train_batch_size=train_batch_size, sample_batch_size=sample_batch_size, broadcast_interval=broadcast_interval) else: self.aggregator = SimpleAggregator( workers, replay_proportion=replay_proportion, max_sample_requests_in_flight_per_worker=( max_sample_requests_in_flight_per_worker), replay_buffer_num_slots=replay_buffer_num_slots, train_batch_size=train_batch_size, sample_batch_size=sample_batch_size, broadcast_interval=broadcast_interval) def add_stat_val(self, key, val): if key not in self._last_stats_sum: self._last_stats_sum[key] = 0 self._last_stats_time[key] = self._stats_start_time self._last_stats_sum[key] += val def get_mean_stats_and_reset(self): now = time.time() mean_stats = { key: round(val / (now - self._last_stats_time[key]), 3) for key, val in self._last_stats_sum.items() } for key in self._last_stats_sum.keys(): self._last_stats_sum[key] = 0 self._last_stats_time[key] = time.time() return mean_stats @override(PolicyOptimizer) def step(self): if len(self.workers.remote_workers()) == 0: raise ValueError("Config num_workers=0 means training will hang!") assert self.learner.is_alive() with self._optimizer_step_timer: sample_timesteps, train_timesteps = self._step() if sample_timesteps > 0: self.add_stat_val("sample_throughput", sample_timesteps) if train_timesteps > 0: self.add_stat_val("train_throughput", train_timesteps) self.num_steps_sampled += sample_timesteps self.num_steps_trained += train_timesteps @override(PolicyOptimizer) def stop(self): self.learner.stopped = True @override(PolicyOptimizer) def reset(self, remote_workers): self.workers.reset(remote_workers) self.aggregator.reset(remote_workers) @override(PolicyOptimizer) def stats(self): def timer_to_ms(timer): return round(1000 * timer.mean, 3) stats = self.aggregator.stats() stats.update(self.get_mean_stats_and_reset()) stats["timing_breakdown"] = { "optimizer_step_time_ms": timer_to_ms(self._optimizer_step_timer), "learner_grad_time_ms": timer_to_ms(self.learner.grad_timer), "learner_load_time_ms": timer_to_ms(self.learner.load_timer), "learner_load_wait_time_ms": timer_to_ms(self.learner.load_wait_timer), "learner_dequeue_time_ms": timer_to_ms(self.learner.queue_timer), } stats["learner_queue"] = self.learner.learner_queue_size.stats() if self.learner.stats: stats["learner"] = self.learner.stats return dict(PolicyOptimizer.stats(self), **stats) def _step(self): sample_timesteps, train_timesteps = 0, 0 for train_batch in self.aggregator.iter_train_batches(): sample_timesteps += train_batch.count self.learner.inqueue.put(train_batch) if (self.learner.weights_updated and self.aggregator.should_broadcast()): self.aggregator.broadcast_new_weights() while not self.learner.outqueue.empty(): count = self.learner.outqueue.get() train_timesteps += count return sample_timesteps, train_timesteps
def __init__(self, local_worker, num_gpus=1, lr=0.0005, train_batch_size=500, num_data_loader_buffers=1, minibatch_buffer_size=1, num_sgd_iter=1, learner_queue_size=16, learner_queue_timeout=300, num_data_load_threads=16, _fake_gpus=False): """Initialize a multi-gpu learner thread. Arguments: local_worker (RolloutWorker): process local rollout worker holding policies this thread will call learn_on_batch() on num_gpus (int): number of GPUs to use for data-parallel SGD lr (float): learning rate train_batch_size (int): size of batches to learn on num_data_loader_buffers (int): number of buffers to load data into in parallel. Each buffer is of size of train_batch_size and increases GPU memory usage proportionally. minibatch_buffer_size (int): max number of train batches to store in the minibatching buffer num_sgd_iter (int): number of passes to learn on per train batch learner_queue_size (int): max size of queue of inbound train batches to this thread num_data_loader_threads (int): number of threads to use to load data into GPU memory in parallel """ LearnerThread.__init__(self, local_worker, minibatch_buffer_size, num_sgd_iter, learner_queue_size, learner_queue_timeout) self.lr = lr self.train_batch_size = train_batch_size if not num_gpus: self.devices = ["/cpu:0"] elif _fake_gpus: self.devices = [ "/cpu:{}".format(i) for i in range(int(math.ceil(num_gpus))) ] else: self.devices = [ "/gpu:{}".format(i) for i in range(int(math.ceil(num_gpus))) ] logger.info("TFMultiGPULearner devices {}".format(self.devices)) assert self.train_batch_size % len(self.devices) == 0 assert self.train_batch_size >= len(self.devices), "batch too small" if set(self.local_worker.policy_map.keys()) != {DEFAULT_POLICY_ID}: raise NotImplementedError("Multi-gpu mode for multi-agent") self.policy = self.local_worker.policy_map[DEFAULT_POLICY_ID] # per-GPU graph copies created below must share vars with the policy # reuse is set to AUTO_REUSE because Adam nodes are created after # all of the device copies are created. self.par_opt = [] with self.local_worker.tf_sess.graph.as_default(): with self.local_worker.tf_sess.as_default(): with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE): if self.policy._state_inputs: rnn_inputs = self.policy._state_inputs + [ self.policy._seq_lens ] else: rnn_inputs = [] adam = tf.train.AdamOptimizer(self.lr) for _ in range(num_data_loader_buffers): self.par_opt.append( LocalSyncParallelOptimizer( adam, self.devices, [v for _, v in self.policy._loss_inputs], rnn_inputs, 999999, # it will get rounded down self.policy.copy)) self.sess = self.local_worker.tf_sess self.sess.run(tf.global_variables_initializer()) self.idle_optimizers = queue.Queue() self.ready_optimizers = queue.Queue() for opt in self.par_opt: self.idle_optimizers.put(opt) for i in range(num_data_load_threads): self.loader_thread = _LoaderThread(self, share_stats=(i == 0)) self.loader_thread.start() self.minibatch_buffer = MinibatchBuffer(self.ready_optimizers, minibatch_buffer_size, learner_queue_timeout, num_sgd_iter)