Exemple #1
0
def get_optimizer(learning_rate, hparams):
    """Get the tf.train.Optimizer for this optimizer string.

  Args:
    learning_rate: The learning_rate tensor.
    hparams: tf.contrib.training.HParams object with the optimizer and
        momentum values.

  Returns:
    optimizer: The tf.train.Optimizer based on the optimizer string.
  """
    return {
        'rmsprop':
        tf.RMSPropOptimizer(
            learning_rate,
            decay=0.95,
            momentum=hparams.momentum,
            epsilon=1e-4,
        ),
        'adam':
        tf.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8),
        'adagrad':
        tf.AdagradOptimizer(learning_rate, initial_accumulator_value=1.0),
        'mom':
        tf.MomentumOptimizer(learning_rate, momentum=hparams.momentum),
        'sgd':
        tf.GradientDescentOptimizer(learning_rate),
    }.get(hparams.optimizer)
    def __init__(self, hps, net, output_layer, experiment_proto, input_paths):
        inputs, outputs = data.input_pipeline(input_paths,
                                              experiment_proto,
                                              hps.mbsz,
                                              hps=hps,
                                              num_threads=8)
        with tf.name_scope('neural_net'):
            logits = net.fprop(inputs, mode='train')
        with tf.name_scope('output_layer'):
            loss_per_target = output_layer.average_loss_per_target(
                logits, outputs, include_array=hps.train_on_array)
            loss = utils.reduce_nanmean(loss_per_target)

        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        if hps.optimizer == 'momentum':
            optimizer = tf.MomentumOptimizer(hps.learn_rate, hps.momentum)
        elif hps.optimizer == 'adam':
            optimizer = tf.AdamOptimizer(hps.learn_rate)
        else:
            raise ValueError('invalid optimizer: %s' % hps.optimizer)
        optimizer = tf.MomentumOptimizer(hps.learn_rate, hps.momentum)
        grads = optimizer.compute_gradients(loss,
                                            net.params + output_layer.params)
        opt_op = optimizer.apply_gradients(grads, global_step=self.global_step)
        self.train_op = tf.with_dependencies([opt_op], loss)

        contrib_deprecated.scalar_summary('loss/mean', loss)
        for target in loss_per_target.axes['target'].labels:
            contrib_deprecated.scalar_summary(
                'loss/' + six.ensure_str(target),
                lt.select(loss_per_target, {'target': target}))
        with tf.name_scope('summarize_grads'):
            slim.learning.add_gradients_summaries(grads)

        tf.add_to_collection(tf.GraphKeys.GLOBAL_STEP, self.global_step)
        tf.add_to_collection('train_op', self.train_op)
        tf.add_to_collection('loss', loss)

        self.mbsz = hps.mbsz
        # The log Poisson loss implemented in TensorFlow may sometimes be negative.
        if (hps.loss_name == output_layers.LOSS_POISSON_LOSS or hps.loss_name
                == output_layers.LOSS_ZERO_TRUNCATED_POISSON_LOSS):
            self.min_cost = -float('inf')
            self.min_is_inclusive = False
        else:
            self.min_cost = 0
            self.min_is_inclusive = True