def prepare_gan(self): if self.args.num_gpu == 2: worker_device = f"/job:worker/task:{self.task}/gpu:1" else: worker_device = f"/job:worker/task:{self.task}/gpu:0" self.replay = replay.ReplayBuffer(self.args, self.input_shape) self.replay_dequeue = \ self.replay_queue.dequeue_many(self.args.discrim_batch_size) self.replay_thread = rl_utils.ReplayThread( self.replay, self.replay_dequeue) with tf.device(worker_device): self.discrim_step = tf.get_variable( "discrim_step", [], tf.int32, initializer=tf.constant_initializer(0, dtype=tf.int32), trainable=False) self.discrim = models.Discriminator( self.args, self.input_shape) grads_and_vars = list(zip(grads, self.discrim.var_list)) discrim_inc_step = self.policy_step.assign_add(tf.shape(pi.x)[0]) discrim_opt = tf.train.AdamOptimizer(self.args.policy_lr) self.discrim_optim = tf.group( discrim_opt.apply_gradients(grads_and_vars), discrim_inc_step)
def prepare_gan(self): self.replay = replay.ReplayBuffer(self.args, self.input_shape) self.replay_dequeue = \ self.replay_queue.dequeue_many(self.args.disc_batch_size) self.replay_thread = rl_utils.ReplayThread( self.replay, self.replay_dequeue) self.local_disc = models.Discriminator( self.args, self.input_shape, "local") self.disc_sync = ut.tf.get_sync_op( self.global_disc.var_list, self.local_disc.var_list)