def build_model(self):
    all_d_grads = []
    all_g_grads = []
    config = self.config
    d_opt = tf.train.AdamOptimizer(config.discriminator_learning_rate,
                                   beta1=config.beta1)
    g_opt = tf.train.AdamOptimizer(config.generator_learning_rate,
                                   beta1=config.beta1)

    with tf.variable_scope(tf.get_variable_scope()):
        for idx, device in enumerate(self.devices):
            with tf.device("/%s" % device):
                with tf.name_scope("device_%s" % idx):
                    with variables_on_gpu0():
                        build_model_single_gpu(self, idx)
                        d_grads = d_opt.compute_gradients(self.d_losses[-1],
                                                          var_list=self.d_vars)
                        g_grads = g_opt.compute_gradients(self.g_losses[-1],
                                                          var_list=self.g_vars)
                        all_d_grads.append(d_grads)
                        all_g_grads.append(g_grads)
                        tf.get_variable_scope().reuse_variables()
    avg_d_grads = avg_grads(all_d_grads)
    avg_g_grads = avg_grads(all_g_grads)
    self.d_optim = d_opt.apply_gradients(avg_d_grads)
    self.g_optim = g_opt.apply_gradients(avg_g_grads)
Exemple #2
0
def build_model(self):
    all_d_grads = []
    all_g_grads = []
    config = self.config
    d_opt = tf.train.AdamOptimizer(config.discriminator_learning_rate, beta1=config.beta1)
    g_opt = tf.train.AdamOptimizer(config.generator_learning_rate, beta1=config.beta1)

    for idx, device in enumerate(self.devices):
        with tf.device("/%s" % device):
            with tf.name_scope("device_%s" % idx):
                with variables_on_gpu0():
                    build_model_single_gpu(self, idx)
                    d_grads = d_opt.compute_gradients(self.d_losses[-1], var_list=self.d_vars)
                    g_grads = g_opt.compute_gradients(self.g_losses[-1], var_list=self.g_vars)
                    all_d_grads.append(d_grads)
                    all_g_grads.append(g_grads)
                    tf.get_variable_scope().reuse_variables()
    avg_d_grads = avg_grads(all_d_grads)
    avg_g_grads = avg_grads(all_g_grads)
    self.d_optim = d_opt.apply_gradients(avg_d_grads)
    self.g_optim = g_opt.apply_gradients(avg_g_grads)
Exemple #3
0
    def build_model(self):
        config = self.config
        self.d_opt = tf.train.AdamOptimizer(FLAGS.discriminator_learning_rate,
                                            beta1=FLAGS.beta1)
        self.g_opt = tf.train.AdamOptimizer(FLAGS.generator_learning_rate,
                                            beta1=FLAGS.beta1)

        with tf.variable_scope('model') as model_scope:
            if config.num_towers > 1:
                all_d_grads = []
                all_g_grads = []
                for idx, device in enumerate(self.devices):
                    with tf.device('/%s' % device):
                        with tf.name_scope('device_%s' % idx):
                            with ops.variables_on_gpu0():
                                self.build_model_single_gpu(
                                    gpu_idx=idx,
                                    batch_size=config.batch_size,
                                    num_towers=config.num_towers)
                                d_grads = self.d_opt.compute_gradients(
                                    self.d_losses[-1], var_list=self.d_vars)
                                g_grads = self.g_opt.compute_gradients(
                                    self.g_losses[-1], var_list=self.g_vars)
                                all_d_grads.append(d_grads)
                                all_g_grads.append(g_grads)
                                model_scope.reuse_variables()
                d_grads = ops.avg_grads(all_d_grads)
                g_grads = ops.avg_grads(all_g_grads)
            else:
                self.build_model_single_gpu(batch_size=config.batch_size,
                                            num_towers=config.num_towers)
                d_grads = self.d_opt.compute_gradients(self.d_losses[-1],
                                                       var_list=self.d_vars)
                g_grads = self.g_opt.compute_gradients(self.g_losses[-1],
                                                       var_list=self.g_vars)

        d_step = tf.get_variable('d_step', initializer=0, trainable=False)
        self.d_optim = self.d_opt.apply_gradients(d_grads, global_step=d_step)
        g_step = tf.get_variable('g_step', initializer=0, trainable=False)
        self.g_optim = self.g_opt.apply_gradients(g_grads, global_step=g_step)
Exemple #4
0
    def build_model(self):
        """Builds a model."""
        config = self.config
        # If ps_tasks is zero, the local device is used. When using multiple
        # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
        # across the different devices.
        current_step = tf.cast(self.global_step, tf.float32)
        # g_ratio = (1.0 + 2e-5 * tf.maximum((current_step - 100000.0), 0.0))
        # g_ratio = tf.minimum(g_ratio, 4.0)
        self.d_learning_rate = FLAGS.discriminator_learning_rate
        self.g_learning_rate = FLAGS.generator_learning_rate
        # self.g_learning_rate = FLAGS.generator_learning_rate / (1.0 + 2e-5 * tf.cast(self.global_step, tf.float32))
        # self.g_learning_rate = FLAGS.generator_learning_rate / g_ratio
        with tf.device(tf.train.replica_device_setter(config.ps_tasks)):
            self.d_opt = tf.train.AdamOptimizer(self.d_learning_rate,
                                                beta1=FLAGS.beta1)
            self.g_opt = tf.train.AdamOptimizer(self.g_learning_rate,
                                                beta1=FLAGS.beta1)
            if config.sync_replicas and config.num_workers > 1:
                self.d_opt = tf.train.SyncReplicasOptimizer(
                    opt=self.d_opt,
                    replicas_to_aggregate=config.replicas_to_aggregate)
                self.g_opt = tf.train.SyncReplicasOptimizer(
                    opt=self.g_opt,
                    replicas_to_aggregate=config.replicas_to_aggregate)

        if config.num_towers > 1:
            all_d_grads = []
            all_g_grads = []
            for idx, device in enumerate(self.devices):
                with tf.device('/%s' % device):
                    with tf.name_scope('device_%s' % idx):
                        with ops.variables_on_gpu0():
                            self.build_model_single_gpu(
                                gpu_idx=idx,
                                batch_size=config.batch_size,
                                num_towers=config.num_towers)
                            d_grads = self.d_opt.compute_gradients(
                                self.d_losses[-1], var_list=self.d_vars)
                            g_grads = self.g_opt.compute_gradients(
                                self.g_losses[-1], var_list=self.g_vars)
                            all_d_grads.append(d_grads)
                            all_g_grads.append(g_grads)
            d_grads = ops.avg_grads(all_d_grads)
            g_grads = ops.avg_grads(all_g_grads)
        else:
            with tf.device(tf.train.replica_device_setter(config.ps_tasks)):
                # TODO(olganw): reusing virtual batchnorm doesn't work in the multi-
                # replica case.
                self.build_model_single_gpu(batch_size=config.batch_size,
                                            num_towers=config.num_towers)
                d_grads = self.d_opt.compute_gradients(self.d_losses[-1],
                                                       var_list=self.d_vars)
                g_grads = self.g_opt.compute_gradients(self.g_losses[-1],
                                                       var_list=self.g_vars)
        with tf.device(tf.train.replica_device_setter(config.ps_tasks)):
            update_moving_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            print('update_moving_ops', update_moving_ops)
            if config.sync_replicas:
                with tf.control_dependencies(update_moving_ops):
                    d_step = tf.get_variable('d_step',
                                             initializer=0,
                                             trainable=False)
                    self.d_optim = self.d_opt.apply_gradients(
                        d_grads, global_step=d_step)
                    g_step = tf.get_variable('g_step',
                                             initializer=0,
                                             trainable=False)
                    self.g_optim = self.g_opt.apply_gradients(
                        g_grads, global_step=g_step)
            else:
                # Don't create any additional counters, and don't update the global step
                with tf.control_dependencies(update_moving_ops):
                    self.d_optim = self.d_opt.apply_gradients(d_grads)
                    self.g_optim = self.g_opt.apply_gradients(g_grads)