def __init__(self, *, workers: WorkerSet, sgd_minibatch_size: int, num_sgd_iter: int, num_gpus: int, shuffle_sequences: bool, policies: List[PolicyID] = frozenset([]), _fake_gpus: bool = False, framework: str = "tf"): self.workers = workers self.policies = policies or workers.local_worker().policies_to_train self.num_sgd_iter = num_sgd_iter self.sgd_minibatch_size = sgd_minibatch_size self.shuffle_sequences = shuffle_sequences self.framework = framework # Collect actual GPU devices to use. if not num_gpus: _fake_gpus = True num_gpus = 1 type_ = "cpu" if _fake_gpus else "gpu" self.devices = [ "/{}:{}".format(type_, 0 if _fake_gpus else i) for i in range(int(math.ceil(num_gpus))) ] # Total batch size (all towers). Make sure it is dividable by # num towers. self.batch_size = int(sgd_minibatch_size / len(self.devices)) * len( self.devices) assert self.batch_size % len(self.devices) == 0 assert self.batch_size >= len(self.devices), "batch size too small" # Batch size per tower. self.per_device_batch_size = int(self.batch_size / len(self.devices)) # per-GPU graph copies created below must share vars with the policy # reuse is set to AUTO_REUSE because Adam nodes are created after # all of the device copies are created. self.optimizers = {} with self.workers.local_worker().tf_sess.graph.as_default(): with self.workers.local_worker().tf_sess.as_default(): for policy_id in self.policies: policy = self.workers.local_worker().get_policy(policy_id) with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE): if policy._state_inputs: rnn_inputs = policy._state_inputs + [ policy._seq_lens ] else: rnn_inputs = [] self.optimizers[policy_id] = ( LocalSyncParallelOptimizer( policy._optimizer, self.devices, list(policy._loss_input_dict_no_rnn.values()), rnn_inputs, self.per_device_batch_size, policy.copy)) self.sess = self.workers.local_worker().tf_sess self.sess.run(tf1.global_variables_initializer())
def __init__(self, workers: WorkerSet, sgd_minibatch_size: int, num_sgd_iter: int, num_gpus: int, rollout_fragment_length: int, num_envs_per_worker: int, train_batch_size: int, shuffle_sequences: bool, policies: List[PolicyID] = frozenset([]), _fake_gpus: bool = False): self.workers = workers self.policies = policies or workers.local_worker().policies_to_train self.num_sgd_iter = num_sgd_iter self.sgd_minibatch_size = sgd_minibatch_size self.shuffle_sequences = shuffle_sequences # Collect actual devices to use. if not num_gpus: _fake_gpus = True num_gpus = 1 type_ = "cpu" if _fake_gpus else "gpu" self.devices = [ "/{}:{}".format(type_, i) for i in range(int(math.ceil(num_gpus))) ] self.batch_size = int(sgd_minibatch_size / len(self.devices)) * len( self.devices) assert self.batch_size % len(self.devices) == 0 assert self.batch_size >= len(self.devices), "batch size too small" self.per_device_batch_size = int(self.batch_size / len(self.devices)) # per-GPU graph copies created below must share vars with the policy # reuse is set to AUTO_REUSE because Adam nodes are created after # all of the device copies are created. self.optimizers = {} with self.workers.local_worker().tf_sess.graph.as_default(): with self.workers.local_worker().tf_sess.as_default(): for policy_id in self.policies: policy = self.workers.local_worker().get_policy(policy_id) with tf.variable_scope(policy_id, reuse=tf.AUTO_REUSE): if policy._state_inputs: rnn_inputs = policy._state_inputs + [ policy._seq_lens ] else: rnn_inputs = [] self.optimizers[policy_id] = ( LocalSyncParallelOptimizer( policy._optimizer, self.devices, [v for _, v in policy._loss_inputs], rnn_inputs, self.per_device_batch_size, policy.copy)) self.sess = self.workers.local_worker().tf_sess self.sess.run(tf.global_variables_initializer())
def add_optimizer(self, policy_id): policy = self.workers.local_worker().get_policy(policy_id) with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE): if policy._state_inputs: rnn_inputs = policy._state_inputs + [policy._seq_lens] else: rnn_inputs = [] self.optimizers[policy_id] = (LocalSyncParallelOptimizer( policy._optimizer, self.devices, list(policy._loss_input_dict_no_rnn.values()), rnn_inputs, self.per_device_batch_size, policy.copy))
def __init__(self, local_worker, num_gpus=1, lr=0.0005, train_batch_size=500, num_data_loader_buffers=1, minibatch_buffer_size=1, num_sgd_iter=1, learner_queue_size=16, learner_queue_timeout=300, num_data_load_threads=16, _fake_gpus=False): """Initialize a multi-gpu learner thread. Arguments: local_worker (RolloutWorker): process local rollout worker holding policies this thread will call learn_on_batch() on num_gpus (int): number of GPUs to use for data-parallel SGD lr (float): learning rate train_batch_size (int): size of batches to learn on num_data_loader_buffers (int): number of buffers to load data into in parallel. Each buffer is of size of train_batch_size and increases GPU memory usage proportionally. minibatch_buffer_size (int): max number of train batches to store in the minibatching buffer num_sgd_iter (int): number of passes to learn on per train batch learner_queue_size (int): max size of queue of inbound train batches to this thread num_data_loader_threads (int): number of threads to use to load data into GPU memory in parallel """ LearnerThread.__init__(self, local_worker, minibatch_buffer_size, num_sgd_iter, learner_queue_size, learner_queue_timeout) self.lr = lr self.train_batch_size = train_batch_size if not num_gpus: self.devices = ["/cpu:0"] elif _fake_gpus: self.devices = [ "/cpu:{}".format(i) for i in range(int(math.ceil(num_gpus))) ] else: self.devices = [ "/gpu:{}".format(i) for i in range(int(math.ceil(num_gpus))) ] logger.info("TFMultiGPULearner devices {}".format(self.devices)) assert self.train_batch_size % len(self.devices) == 0 assert self.train_batch_size >= len(self.devices), "batch too small" if set(self.local_worker.policy_map.keys()) != {DEFAULT_POLICY_ID}: raise NotImplementedError("Multi-gpu mode for multi-agent") self.policy = self.local_worker.policy_map[DEFAULT_POLICY_ID] # per-GPU graph copies created below must share vars with the policy # reuse is set to AUTO_REUSE because Adam nodes are created after # all of the device copies are created. self.par_opt = [] with self.local_worker.tf_sess.graph.as_default(): with self.local_worker.tf_sess.as_default(): with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE): if self.policy._state_inputs: rnn_inputs = self.policy._state_inputs + [ self.policy._seq_lens ] else: rnn_inputs = [] adam = tf.train.AdamOptimizer(self.lr) for _ in range(num_data_loader_buffers): self.par_opt.append( LocalSyncParallelOptimizer( adam, self.devices, [v for _, v in self.policy._loss_inputs], rnn_inputs, 999999, # it will get rounded down self.policy.copy)) self.sess = self.local_worker.tf_sess self.sess.run(tf.global_variables_initializer()) self.idle_optimizers = queue.Queue() self.ready_optimizers = queue.Queue() for opt in self.par_opt: self.idle_optimizers.put(opt) for i in range(num_data_load_threads): self.loader_thread = _LoaderThread(self, share_stats=(i == 0)) self.loader_thread.start() self.minibatch_buffer = MinibatchBuffer( self.ready_optimizers, minibatch_buffer_size, learner_queue_timeout, num_sgd_iter)