Esempio n. 1
0
    def prepare_gan(self):
        if self.args.num_gpu == 2:
            worker_device = f"/job:worker/task:{self.task}/gpu:1"
        else:
            worker_device = f"/job:worker/task:{self.task}/gpu:0"

        self.replay = replay.ReplayBuffer(self.args, self.input_shape)
        self.replay_dequeue = \
                self.replay_queue.dequeue_many(self.args.discrim_batch_size)

        self.replay_thread = rl_utils.ReplayThread(
                self.replay, self.replay_dequeue)

        with tf.device(worker_device):
            self.discrim_step = tf.get_variable(
                    "discrim_step", [], tf.int32,
                    initializer=tf.constant_initializer(0, dtype=tf.int32),
                    trainable=False)

            self.discrim = models.Discriminator(
                    self.args, self.input_shape)

            grads_and_vars = list(zip(grads, self.discrim.var_list))
            discrim_inc_step = self.policy_step.assign_add(tf.shape(pi.x)[0])

            discrim_opt = tf.train.AdamOptimizer(self.args.policy_lr)
            self.discrim_optim = tf.group(
                    discrim_opt.apply_gradients(grads_and_vars),
                    discrim_inc_step)
Esempio n. 2
0
    def prepare_gan(self):
        self.replay = replay.ReplayBuffer(self.args, self.input_shape)
        self.replay_dequeue = \
                self.replay_queue.dequeue_many(self.args.disc_batch_size)

        self.replay_thread = rl_utils.ReplayThread(
                self.replay, self.replay_dequeue)

        self.local_disc = models.Discriminator(
                self.args, self.input_shape, "local")

        self.disc_sync = ut.tf.get_sync_op(
                self.global_disc.var_list,
                self.local_disc.var_list)