def add_layer(inputs, in_size, out_size, n_layer, activation_function=None):
    # add one more layer and return the output of this layer
    layer_name = 'layer%s' % n_layer
    with tf.name_scope('layer'):
        with tf.name_scope('weights'):
            Weights = tf.Variable(tf.random_normal([in_size, out_size]),
                                  name='W')
            # histogram_summary(layer_name + '/weights', Weights)
            tf.summary.histogram(layer_name + '/weights',
                                 Weights)  # tensorflow >= 0.12

        with tf.name_scope('biases'):
            biases = tf.Variable(tf.zeros([1, out_size]) + 0.1, name='b')
            # histogram_summary(layer_name + '/biase', biases)
            tf.summary.histogram(layer_name + '/biases',
                                 biases)  # Tensorflow >= 0.12

        with tf.name_scope('Wx_plus_b'):
            Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases)

        if activation_function is None:
            outputs = Wx_plus_b
        else:
            outputs = activation_function(Wx_plus_b)

        histogram_summary(layer_name + '/outputs', outputs)
        # tf.summary.histogram(layer_name + '/outputs', outputs) # Tensorflow >= 0.12

    return outputs
Example #2
0
def add_gradients_summaries(grads_and_vars):
    """Add summaries to gradients.

  Args:
    grads_and_vars: A list of gradient to variable pairs (tuples).

  Returns:
    The list of created summaries.
  """
    summaries = []
    for grad, var in grads_and_vars:
        if grad is not None:
            if isinstance(grad, ops.IndexedSlices):
                grad_values = grad.values
            else:
                grad_values = grad
            summaries.append(
                logging_ops.histogram_summary(var.op.name + ':gradient',
                                              grad_values))
            summaries.append(
                logging_ops.histogram_summary(
                    var.op.name + ':gradient_norm',
                    clip_ops.global_norm([grad_values])))
        else:
            logging.info('Var %s has no gradient', var.op.name)

    return summaries
Example #3
0
def logistic_regression(X, y, class_weight=None, init_mean=None,
                        init_stddev=1.0):
    """Creates logistic regression TensorFlow subgraph.

    Args:
        X: tensor or placeholder for input features,
           shape should be [batch_size, n_features].
        y: tensor or placeholder for target,
           shape should be [batch_size, n_classes].
        class_weight: tensor, [n_classes], where for each class
                      it has weight of the class. If not provided
                      will check if graph contains tensor `class_weight:0`.
                      If that is not provided either all ones are used.
        init_mean: the mean value to use for initialization.
        init_stddev: the standard devation to use for initialization.

    Returns:
        Predictions and loss tensors.

    Side effects:
        The variables linear_regression.weights and linear_regression.bias are
        initialized as follows.  If init_mean is not None, then initialization
        will be done using a random normal initializer with the given init_mean
        and init_stddv.  (These may be set to 0.0 each if a zero initialization
        is desirable for convex use cases.)  If init_mean is None, then the
        uniform_unit_scaling_initialzer will be used.
    """
    with vs.variable_scope('logistic_regression'):
        logging_ops.histogram_summary('logistic_regression.X', X)
        logging_ops.histogram_summary('logistic_regression.y', y)
        # Set up the requested initialization.
        if (init_mean is None):
            weights = vs.get_variable('weights',
                                      [X.get_shape()[1], y.get_shape()[-1]])
            bias = vs.get_variable('bias',
                                   [y.get_shape()[-1]])
        else:
            weights = vs.get_variable('weights',
                                      [X.get_shape()[1], y.get_shape()[-1]],
                                      initializer=init_ops.random_normal_initializer(
                                          init_mean, init_stddev))
            bias = vs.get_variable('bias',
                                   [y.get_shape()[-1]],
                                   initializer=init_ops.random_normal_initializer(
                                       init_mean, init_stddev))
        logging_ops.histogram_summary('logistic_regression.weights', weights)
        logging_ops.histogram_summary('logistic_regression.bias', bias)
        # If no class weight provided, try to retrieve one from pre-defined
        # tensor name in the graph.
        if not class_weight:
            try:
                class_weight = ops.get_default_graph().get_tensor_by_name('class_weight:0')
            except KeyError:
                pass

        return losses_ops.softmax_classifier(X, y, weights, bias,
                                  class_weight=class_weight)
Example #4
0
def linear_regression(x, y, init_mean=None, init_stddev=1.0):
    """Creates linear regression TensorFlow subgraph.

  Args:
    x: tensor or placeholder for input features.
    y: tensor or placeholder for target.
    init_mean: the mean value to use for initialization.
    init_stddev: the standard devation to use for initialization.

  Returns:
    Predictions and loss tensors.

  Side effects:
    The variables linear_regression.weights and linear_regression.bias are
    initialized as follows.  If init_mean is not None, then initialization
    will be done using a random normal initializer with the given init_mean
    and init_stddv.  (These may be set to 0.0 each if a zero initialization
    is desirable for convex use cases.)  If init_mean is None, then the
    uniform_unit_scaling_initialzer will be used.
  """
    with vs.variable_scope('linear_regression'):
        scope_name = vs.get_variable_scope().name
        logging_ops.histogram_summary('%s.x' % scope_name, x)
        logging_ops.histogram_summary('%s.y' % scope_name, y)
        dtype = x.dtype.base_dtype
        y_shape = y.get_shape()
        if len(y_shape) == 1:
            output_shape = 1
        else:
            output_shape = y_shape[1]
        # Set up the requested initialization.
        if init_mean is None:
            weights = vs.get_variable('weights',
                                      [x.get_shape()[1], output_shape],
                                      dtype=dtype)
            bias = vs.get_variable('bias', [output_shape], dtype=dtype)
        else:
            weights = vs.get_variable(
                'weights', [x.get_shape()[1], output_shape],
                initializer=init_ops.random_normal_initializer(init_mean,
                                                               init_stddev,
                                                               dtype=dtype),
                dtype=dtype)
            bias = vs.get_variable(
                'bias', [output_shape],
                initializer=init_ops.random_normal_initializer(init_mean,
                                                               init_stddev,
                                                               dtype=dtype),
                dtype=dtype)
        logging_ops.histogram_summary('%s.weights' % scope_name, weights)
        logging_ops.histogram_summary('%s.bias' % scope_name, bias)
        return losses_ops.mean_squared_error_regressor(x, y, weights, bias)
Example #5
0
def linear_regression(x, y, init_mean=None, init_stddev=1.0):
  """Creates linear regression TensorFlow subgraph.

  Args:
    x: tensor or placeholder for input features.
    y: tensor or placeholder for target.
    init_mean: the mean value to use for initialization.
    init_stddev: the standard devation to use for initialization.

  Returns:
    Predictions and loss tensors.

  Side effects:
    The variables linear_regression.weights and linear_regression.bias are
    initialized as follows.  If init_mean is not None, then initialization
    will be done using a random normal initializer with the given init_mean
    and init_stddv.  (These may be set to 0.0 each if a zero initialization
    is desirable for convex use cases.)  If init_mean is None, then the
    uniform_unit_scaling_initialzer will be used.
  """
  with vs.variable_scope('linear_regression'):
    scope_name = vs.get_variable_scope().name
    logging_ops.histogram_summary('%s.x' % scope_name, x)
    logging_ops.histogram_summary('%s.y' % scope_name, y)
    dtype = x.dtype.base_dtype
    y_shape = y.get_shape()
    if len(y_shape) == 1:
      output_shape = 1
    else:
      output_shape = y_shape[1]
    # Set up the requested initialization.
    if init_mean is None:
      weights = vs.get_variable(
          'weights', [x.get_shape()[1], output_shape], dtype=dtype)
      bias = vs.get_variable('bias', [output_shape], dtype=dtype)
    else:
      weights = vs.get_variable('weights', [x.get_shape()[1], output_shape],
                                initializer=init_ops.random_normal_initializer(
                                    init_mean, init_stddev, dtype=dtype),
                                dtype=dtype)
      bias = vs.get_variable('bias', [output_shape],
                             initializer=init_ops.random_normal_initializer(
                                 init_mean, init_stddev, dtype=dtype),
                             dtype=dtype)
    logging_ops.histogram_summary('%s.weights' % scope_name, weights)
    logging_ops.histogram_summary('%s.bias' % scope_name, bias)
    return losses_ops.mean_squared_error_regressor(x, y, weights, bias)
Example #6
0
def add_gradients_summaries(grads_and_vars):
  """Add summaries to gradients.

  Args:
    grads_and_vars: A list of gradient to variable pairs (tuples).

  Returns:
    The list of created summaries.
  """
  summaries = []
  for grad, var in grads_and_vars:
    if grad is not None:
      if isinstance(grad, ops.IndexedSlices):
        grad_values = grad.values
      else:
        grad_values = grad
      summaries.append(logging_ops.histogram_summary(
          var.op.name + ':gradient', grad_values))
      summaries.append(logging_ops.histogram_summary(
          var.op.name + ':gradient_norm', clip_ops.global_norm([grad_values])))
    else:
      logging.info('Var %s has no gradient', var.op.name)

  return summaries
def _add_hidden_layer_summary(value, tag):
    logging_ops.scalar_summary("%s/fraction_of_zero_values" % tag,
                               nn.zero_fraction(value))
    logging_ops.histogram_summary("%s/activation" % tag, value)
Example #8
0
    def _setup_training(self):
        """Sets up graph, model and trainer."""
        # Create config if not given.
        if self._config is None:
            self._config = RunConfig(verbose=self.verbose)
        # Create new graph.
        self._graph = ops.Graph()
        self._graph.add_to_collection("IS_TRAINING", True)
        with self._graph.as_default():
            random_seed.set_random_seed(self._config.tf_random_seed)
            self._global_step = variables.Variable(
                0, name="global_step", trainable=False)

            # Setting up inputs and outputs.
            self._inp, self._out = self._data_feeder.input_builder()

            # If class weights are provided, add them to the graph.
            # Different loss functions can use this tensor by name.
            if self.class_weight:
                self._class_weight_node = constant_op.constant(
                    self.class_weight, name='class_weight')

            # Add histograms for X and y if they are floats.
            if self._data_feeder.input_dtype in (np.float32, np.float64):
                logging_ops.histogram_summary("X", self._inp)
            if self._data_feeder.output_dtype in (np.float32, np.float64):
                logging_ops.histogram_summary("y", self._out)

            # Create model's graph.
            self._model_predictions, self._model_loss = self.model_fn(
                self._inp, self._out)

            # Create trainer and augment graph with gradients and optimizer.
            # Additionally creates initialization ops.
            learning_rate = self.learning_rate
            optimizer = self.optimizer
            if callable(learning_rate):
                learning_rate = learning_rate(self._global_step)
            if callable(optimizer):
                optimizer = optimizer(learning_rate)
            self._train = optimizers.optimize_loss(self._model_loss, self._global_step,
                learning_rate=learning_rate,
                optimizer=optimizer, clip_gradients=self.clip_gradients)

            # Update ops during training, e.g. batch_norm_ops
            self._train = control_flow_ops.group(self._train, *ops.get_collection('update_ops'))

            # Merge all summaries into single tensor.
            self._summaries = logging_ops.merge_all_summaries()
 
            # Get all initializers for all trainable variables.
            self._initializers = variables.initialize_all_variables()

            # Create model's saver capturing all the nodes created up until now.
            self._saver = train.Saver(
                max_to_keep=self._config.keep_checkpoint_max,
                keep_checkpoint_every_n_hours=self._config.keep_checkpoint_every_n_hours)

            # Enable monitor to create validation data dict with appropriate tf placeholders
            self._monitor.create_val_feed_dict(self._inp, self._out)

            # Create session to run model with.
            self._session = session.Session(self._config.tf_master, config=self._config.tf_config)

            # Run parameter initializers.
            self._session.run(self._initializers)
Example #9
0
 def add_histogram_summary(self, v, name):
     logging_ops.histogram_summary("{}:{}".format(self.name, name), v)
Example #10
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  moving_average_decay=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None):
  """Given loss and parameters for optimizer, returns a training op.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of tf.Optimizer that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantion of tf.Optimizer sub-class
                 and have `compute_gradients` and `apply_gradients` functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float or `None`, clips gradients by this value.
    moving_average_decay: Deprecated. float or None, takes into account previous
                          loss to make learning smoother due to outliers.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: tf.train.exponential_decay.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection. The order of execution
                between `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
  with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
    # Update ops take UPDATE_OPS collection if not provided.
    if update_ops is None:
      update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
    # Make sure update ops are ran before computing loss.
    if update_ops:
      loss = control_flow_ops.with_dependencies(update_ops, loss)

    # Moving average of the loss with decay.
    # TODO(b/30439864): moving_average_decay should be removed.
    if moving_average_decay is not None:
      logging.warn("'moving_average_decay' is deprecated. Please use "
                   "tensorboard's builtin averaging instead.")
      # Generate moving averages of the loss.
      loss_averages = train.ExponentialMovingAverage(moving_average_decay,
                                                     name="avg")
      loss_averages_op = loss_averages.apply([loss])
      logging_ops.scalar_summary("loss/mean", loss_averages.average(loss))
      loss = control_flow_ops.with_dependencies([loss_averages_op], loss)

    # Learning rate variable, with possible decay.
    if (isinstance(learning_rate, ops.Tensor)
        and learning_rate.get_shape().ndims == 0):
      lr = learning_rate
    elif isinstance(learning_rate, float):
      lr = vs.get_variable(
          "learning_rate", [], trainable=False,
          initializer=init_ops.constant_initializer(learning_rate))
    else:
      raise ValueError("Learning rate should be 0d Tensor or float. "
                       "Got %s of type %s" % (
                           str(learning_rate), str(type(learning_rate))))
    if summaries is None:
      summaries = ["loss", "learning_rate"]
    if learning_rate_decay_fn is not None:
      lr = learning_rate_decay_fn(lr, global_step)
      if "learning_rate" in summaries:
        logging_ops.scalar_summary("learning_rate", lr)

    # Create optimizer, given specified parameters.
    if isinstance(optimizer, six.string_types):
      if optimizer not in OPTIMIZER_CLS_NAMES:
        raise ValueError(
            "Optimizer name should be one of [%s], you provided %s."
            % (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
      opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
    elif isinstance(optimizer, type) and issubclass(optimizer,
                                                    optimizer_.Optimizer):
      opt = optimizer(learning_rate=lr)
    elif isinstance(optimizer, optimizer_.Optimizer):
      opt = optimizer
    else:
      raise ValueError("Unrecognized optimizer: should be string, "
                       "subclass of Optimizer or instance of "
                       "subclass of Optimizer. Got %s." % str(optimizer))

    # All trainable variables, if specific variables are not specified.
    if variables is None:
      variables = vars_.trainable_variables()

    # Compute gradients.
    gradients = opt.compute_gradients(loss, variables)

    # Optionally add gradient noise.
    if gradient_noise_scale is not None:
      gradients = _add_scaled_noise_to_gradients(
          gradients, gradient_noise_scale)

    # Multiply some gradients.
    if gradient_multipliers is not None:
      gradients = _multiply_gradients(gradients, gradient_multipliers)

    # Optionally clip gradients by global norm.
    if clip_gradients is not None:
      gradients = _clip_gradients_by_norm(gradients, clip_gradients)

    # Add scalar summary for loss.
    if "loss" in summaries:
      logging_ops.scalar_summary("loss", loss)

    # Add histograms for variables, gradients and gradient norms.
    for gradient, variable in gradients:
      if isinstance(gradient, ops.IndexedSlices):
        grad_values = gradient.values
      else:
        grad_values = gradient

      if grad_values is not None:
        if "gradients" in summaries:
          logging_ops.histogram_summary(variable.name + "/gradients",
                                        grad_values)
        if "gradient_norm" in summaries:
          logging_ops.histogram_summary(variable.name + "/gradient_norm",
                                        clip_ops.global_norm([grad_values]))

    # Create gradient updates.
    grad_updates = opt.apply_gradients(gradients,
                                       global_step=global_step,
                                       name="train")

    # Ensure the train_tensor computes grad_updates.
    train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

    return train_tensor
Example #11
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None,
                  colocate_gradients_with_ops=False):
    """Given loss and parameters for optimizer, returns a training op.

  Various ways of passing optimizers, include:

  - string, name of the optimizer like 'SGD', 'Adam', see OPTIMIZER_CLS_NAMES
      for full list. E.g. `optimize_loss(..., optimizer='Adam')`.
  - function, takes learning rate `Tensor` as argument and must return
      `Optimizer` instance. E.g. `optimize_loss(...,
      optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`.
    Alternatively, if `learning_rate` is `None`, the function takes no
    arguments. E.g. `optimize_loss(..., learning_rate=None,
      optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`.
  - class, subclass of `Optimizer` that takes only one required argument -
      learning rate, such as AdamOptimizer, AdagradOptimizer.
      E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`.
  - object, instance of subclass of `Optimizer`.
      E.g., `optimizer_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of `tf.Optimizer` that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantiation of `tf.Optimizer`
                 sub-class and have `compute_gradients` and `apply_gradients`
                 functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float or `None`, clips gradients by this value.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: `tf.train.exponential_decay`.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection. The order of execution
                between `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.
    colocate_gradients_with_ops: If True, try colocating gradients with the 
                                 corresponding op.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
    with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
        # Update ops take UPDATE_OPS collection if not provided.
        if update_ops is None:
            update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
        # Make sure update ops are ran before computing loss.
        if update_ops:
            loss = control_flow_ops.with_dependencies(list(update_ops), loss)

        # Learning rate variable, with possible decay.
        lr = None
        if learning_rate is not None:
            if (isinstance(learning_rate, ops.Tensor)
                    and learning_rate.get_shape().ndims == 0):
                lr = learning_rate
            elif isinstance(learning_rate, float):
                lr = vs.get_variable(
                    "learning_rate", [],
                    trainable=False,
                    initializer=init_ops.constant_initializer(learning_rate))
            else:
                raise ValueError(
                    "Learning rate should be 0d Tensor or float. "
                    "Got %s of type %s" %
                    (str(learning_rate), str(type(learning_rate))))
        if summaries is None:
            summaries = ["loss", "learning_rate"]
        if learning_rate is not None and learning_rate_decay_fn is not None:
            lr = learning_rate_decay_fn(lr, global_step)
            if "learning_rate" in summaries:
                logging_ops.scalar_summary("learning_rate", lr)

        # Create optimizer, given specified parameters.
        if isinstance(optimizer, six.string_types):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if "
                    "optimizer is string (%s)." % optimizer)
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError(
                    "Optimizer name should be one of [%s], you provided %s." %
                    (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
            opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
        elif (isinstance(optimizer, type)
              and issubclass(optimizer, optimizer_.Optimizer)):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if "
                    "optimizer is class (%s)." % optimizer)
            opt = optimizer(learning_rate=lr)
        elif isinstance(optimizer, optimizer_.Optimizer):
            opt = optimizer
        elif callable(optimizer):
            if learning_rate is not None:
                opt = optimizer(lr)
            else:
                opt = optimizer()
            if not isinstance(opt, optimizer_.Optimizer):
                raise ValueError(
                    "Unrecognized optimizer: function should return "
                    "subclass of Optimizer. Got %s." % str(opt))
        else:
            raise ValueError(
                "Unrecognized optimizer: should be string, "
                "subclass of Optimizer, instance of "
                "subclass of Optimizer or function with one argument. "
                "Got %s." % str(optimizer))

        # All trainable variables, if specific variables are not specified.
        if variables is None:
            variables = vars_.trainable_variables()

        # Compute gradients.
        gradients = opt.compute_gradients(
            loss,
            variables,
            colocate_gradients_with_ops=colocate_gradients_with_ops)

        # Optionally add gradient noise.
        if gradient_noise_scale is not None:
            gradients = _add_scaled_noise_to_gradients(gradients,
                                                       gradient_noise_scale)

        # Multiply some gradients.
        if gradient_multipliers is not None:
            gradients = _multiply_gradients(gradients, gradient_multipliers)

        # Optionally clip gradients by global norm.
        if clip_gradients is not None:
            gradients = _clip_gradients_by_norm(gradients, clip_gradients)

        # Add scalar summary for loss.
        if "loss" in summaries:
            logging_ops.scalar_summary("loss", loss)

        # Add histograms for variables, gradients and gradient norms.
        for gradient, variable in gradients:
            if isinstance(gradient, ops.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient

            if grad_values is not None:
                if "gradients" in summaries:
                    logging_ops.histogram_summary(variable.name + "/gradients",
                                                  grad_values)
                if "gradient_norm" in summaries:
                    logging_ops.histogram_summary(
                        variable.name + "/gradient_norm",
                        clip_ops.global_norm([grad_values]))

        # Create gradient updates.
        grad_updates = opt.apply_gradients(gradients,
                                           global_step=global_step,
                                           name="train")

        # Ensure the train_tensor computes grad_updates.
        train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

        return train_tensor
Example #12
0
def optimize_loss(
    loss,
    global_step,
    learning_rate,
    optimizer,
    gradient_noise_scale=None,
    gradient_multipliers=None,
    clip_gradients=None,
    learning_rate_decay_fn=None,
    update_ops=None,
    variables=None,
    name=None,
    summaries=None,
):
    """Given loss and parameters for optimizer, returns a training op.

  Various ways of passing optimizers, include:
    - string, name of the optimizer like 'SGD', 'Adam', see OPTIMIZER_CLS_NAMES
        for full list. E.g. `optimize_loss(..., optimizer='Adam')`.
    - function, takes learning rate `Tensor` as argument and must return
        `Optimizer` instance. E.g. `optimize_loss(...,
        optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`.
      Alternatively, if `learning_rate` is `None`, the function takes no
      arguments. E.g. `optimize_loss(..., learning_rate=None,
        optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`.
    - class, subclass of `Optimizer` that takes only one required argument -
        learning rate, such as AdamOptimizer, AdagradOptimizer.
        E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`.
    - object, instance of subclass of `Optimizer`.
        E.g., `optimizer_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of tf.Optimizer that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantion of `tf.Optimizer`
                 sub-class and have `compute_gradients` and `apply_gradients`
                 functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float or `None`, clips gradients by this value.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: tf.train.exponential_decay.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection. The order of execution
                between `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
    with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
        # Update ops take UPDATE_OPS collection if not provided.
        if update_ops is None:
            update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
        # Make sure update ops are ran before computing loss.
        if update_ops:
            loss = control_flow_ops.with_dependencies(list(update_ops), loss)

        # Learning rate variable, with possible decay.
        lr = None
        if learning_rate is not None:
            if isinstance(learning_rate, ops.Tensor) and learning_rate.get_shape().ndims == 0:
                lr = learning_rate
            elif isinstance(learning_rate, float):
                lr = vs.get_variable(
                    "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)
                )
            else:
                raise ValueError(
                    "Learning rate should be 0d Tensor or float. "
                    "Got %s of type %s" % (str(learning_rate), str(type(learning_rate)))
                )
        if summaries is None:
            summaries = ["loss", "learning_rate"]
        if learning_rate is not None and learning_rate_decay_fn is not None:
            lr = learning_rate_decay_fn(lr, global_step)
            if "learning_rate" in summaries:
                logging_ops.scalar_summary("learning_rate", lr)

        # Create optimizer, given specified parameters.
        if isinstance(optimizer, six.string_types):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer
                )
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError(
                    "Optimizer name should be one of [%s], you provided %s."
                    % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)
                )
            opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
        elif isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer
                )
            opt = optimizer(learning_rate=lr)
        elif isinstance(optimizer, optimizer_.Optimizer):
            opt = optimizer
        elif callable(optimizer):
            if learning_rate is not None:
                opt = optimizer(lr)
            else:
                opt = optimizer()
            if not isinstance(opt, optimizer_.Optimizer):
                raise ValueError(
                    "Unrecognized optimizer: function should return " "subclass of Optimizer. Got %s." % str(opt)
                )
        else:
            raise ValueError(
                "Unrecognized optimizer: should be string, "
                "subclass of Optimizer, instance of "
                "subclass of Optimizer or function with one argument. "
                "Got %s." % str(optimizer)
            )

        # All trainable variables, if specific variables are not specified.
        if variables is None:
            variables = vars_.trainable_variables()

        # Compute gradients.
        gradients = opt.compute_gradients(loss, variables)

        # Optionally add gradient noise.
        if gradient_noise_scale is not None:
            gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale)

        # Multiply some gradients.
        if gradient_multipliers is not None:
            gradients = _multiply_gradients(gradients, gradient_multipliers)

        # Optionally clip gradients by global norm.
        if clip_gradients is not None:
            gradients = _clip_gradients_by_norm(gradients, clip_gradients)

        # Add scalar summary for loss.
        if "loss" in summaries:
            logging_ops.scalar_summary("loss", loss)

        # Add histograms for variables, gradients and gradient norms.
        for gradient, variable in gradients:
            if isinstance(gradient, ops.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient

            if grad_values is not None:
                if "gradients" in summaries:
                    logging_ops.histogram_summary(variable.name + "/gradients", grad_values)
                if "gradient_norm" in summaries:
                    logging_ops.histogram_summary(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values]))

        # Create gradient updates.
        grad_updates = opt.apply_gradients(gradients, global_step=global_step, name="train")

        # Ensure the train_tensor computes grad_updates.
        train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

        return train_tensor
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  clip_gradients=None,
                  moving_average_decay=0.9,
                  learning_rate_decay_fn=None,
                  variables=None):
  """Given loss and parameters for optimizer, returns a training op.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of tf.Optimizer that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantion of tf.Optimizer sub-class
                 and have `compute_gradients` and `apply_gradients` functions.
    clip_gradients: float or None, clips gradients by this value.
    moving_average_decay: float or None, takes into account previous loss
                          to make learning smoother due to outliers.
    learning_rate_decay_fn: function, takes learning_rate and global_step
                            Tensors, returns Tensor. Can be used to implement
                            any learning rate decay funcitons.
                            For example: tf.train.exponential_decay.
    variables: list of variables to optimizer or none.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
  # Moving average of the loss with decay.
  if moving_average_decay is not None:
    # Generate moving averages of the loss.
    loss_averages = train.ExponentialMovingAverage(moving_average_decay,
                                                   name="avg")
    loss_averages_op = loss_averages.apply([loss])
    logging_ops.scalar_summary("loss/mean", loss_averages.average(loss))
    loss = control_flow_ops.with_dependencies([loss_averages_op], loss)

  # Learning rate variable, with possible decay.
  if isinstance(learning_rate, ops.Tensor) and len(learning_rate.get_shape()) == 0:
    lr = learning_rate
  elif isinstance(learning_rate, float):
    lr = vs.get_variable("learning_rate",
                         [],
                         trainable=False,
                         initializer=init_ops.constant_initializer(learning_rate))
  else:
    raise ValueError("Learning rate should be 0d Tensor or float. Got %s" %
        str(learning_rate))
  if learning_rate_decay_fn is not None:
    lr = learning_rate_decay_fn(lr, global_step)

  # Create optimizer, given specified parameters.
  if isinstance(optimizer, six.string_types):
    if optimizer not in OPTIMIZER_CLS_NAMES:
      raise ValueError("Optimizer name should be one of [%s], you provided %s."
                       % (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
    opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
  elif isinstance(optimizer, type) and issubclass(optimizer,
                                                  optimizer_.Optimizer):
    opt = optimizer(learning_rate=lr)
  elif isinstance(optimizer, optimizer_.Optimizer):
    opt = optimizer
  else:
    raise ValueError("Unrecognized optimizer: should be string, "
                     "subclass of Optimizer or instance of "
                     "subclass of Optimizer. Got %s." % str(optimizer))

  # All trainable variables, if specific variables are not specified.
  if variables is None:
    variables = vars_.trainable_variables()

  # Compute gradients and clip them if provided.
  gradients = opt.compute_gradients(loss, variables)
  if clip_gradients is not None:
    gradients, variables = zip(*gradients)
    clipped_gradients, _ = clip_ops.clip_by_global_norm(gradients,
                                                        clip_gradients)
    gradients = list(zip(clipped_gradients, variables))

  # Add scalar summary for loss.
  logging_ops.scalar_summary("loss", loss)

  # Add histograms for variables, gradients and gradient norms.
  for gradient, variable in gradients:
    if isinstance(gradient, ops.IndexedSlices):
      grad_values = gradient.values
    else:
      grad_values = gradient

    if grad_values is not None:
      logging_ops.histogram_summary(variable.name, variable)
      logging_ops.histogram_summary(variable.name + "/gradients", grad_values)
      logging_ops.histogram_summary(variable.name + "/gradient_norm",
                                    clip_ops.global_norm([grad_values]))

  # Create gradient updates.
  grad_updates = opt.apply_gradients(gradients,
                                     global_step=global_step,
                                     name="train")
  # Make sure total_loss is valid.
  final_loss = array_ops.check_numerics(loss, "Loss is inf or nan")

  # Ensure the train_tensor computes grad_updates.
  train_tensor = control_flow_ops.with_dependencies([grad_updates], final_loss)

  return train_tensor
def _add_hidden_layer_summary(value, tag):
  logging_ops.scalar_summary("%s/fraction_of_zero_values" % tag,
                             nn.zero_fraction(value))
  logging_ops.histogram_summary("%s/activation" % tag, value)
Example #15
0
def logistic_regression(X,
                        y,
                        class_weight=None,
                        init_mean=None,
                        init_stddev=1.0):
    """Creates logistic regression TensorFlow subgraph.

    Args:
        X: tensor or placeholder for input features,
           shape should be [batch_size, n_features].
        y: tensor or placeholder for target,
           shape should be [batch_size, n_classes].
        class_weight: tensor, [n_classes], where for each class
                      it has weight of the class. If not provided
                      will check if graph contains tensor `class_weight:0`.
                      If that is not provided either all ones are used.
        init_mean: the mean value to use for initialization.
        init_stddev: the standard devation to use for initialization.

    Returns:
        Predictions and loss tensors.

    Side effects:
        The variables linear_regression.weights and linear_regression.bias are
        initialized as follows.  If init_mean is not None, then initialization
        will be done using a random normal initializer with the given init_mean
        and init_stddv.  (These may be set to 0.0 each if a zero initialization
        is desirable for convex use cases.)  If init_mean is None, then the
        uniform_unit_scaling_initialzer will be used.
    """
    with vs.variable_scope('logistic_regression'):
        logging_ops.histogram_summary('logistic_regression.X', X)
        logging_ops.histogram_summary('logistic_regression.y', y)
        # Set up the requested initialization.
        if (init_mean is None):
            weights = vs.get_variable(
                'weights',
                [X.get_shape()[1], y.get_shape()[-1]])
            bias = vs.get_variable('bias', [y.get_shape()[-1]])
        else:
            weights = vs.get_variable(
                'weights',
                [X.get_shape()[1], y.get_shape()[-1]],
                initializer=init_ops.random_normal_initializer(
                    init_mean, init_stddev))
            bias = vs.get_variable(
                'bias', [y.get_shape()[-1]],
                initializer=init_ops.random_normal_initializer(
                    init_mean, init_stddev))
        logging_ops.histogram_summary('logistic_regression.weights', weights)
        logging_ops.histogram_summary('logistic_regression.bias', bias)
        # If no class weight provided, try to retrieve one from pre-defined
        # tensor name in the graph.
        if not class_weight:
            try:
                class_weight = ops.get_default_graph().get_tensor_by_name(
                    'class_weight:0')
            except KeyError:
                pass

        return softmax_classifier(X,
                                  y,
                                  weights,
                                  bias,
                                  class_weight=class_weight)
 def testHistogramSummary(self):
     with self.cached_session():
         c = constant_op.constant(3)
         s = logging_ops.histogram_summary('tag', c)
         self.assertEqual(s.op.type, u'HistogramSummary')
Example #17
0
    def _setup_training(self):
        """Sets up graph, model and trainer."""
        # Create config if not given.
        if self._config is None:
            self._config = RunConfig(verbose=self.verbose)
        # Create new graph.
        self._graph = ops.Graph()
        self._graph.add_to_collection("IS_TRAINING", True)
        with self._graph.as_default():
            random_seed.set_random_seed(self._config.tf_random_seed)
            self._global_step = variables.Variable(0,
                                                   name="global_step",
                                                   trainable=False)

            # Setting up inputs and outputs.
            self._inp, self._out = self._data_feeder.input_builder()

            # If class weights are provided, add them to the graph.
            # Different loss functions can use this tensor by name.
            if self.class_weight:
                self._class_weight_node = constant_op.constant(
                    self.class_weight, name='class_weight')

            # Add histograms for X and y if they are floats.
            if self._data_feeder.input_dtype in (np.float32, np.float64):
                logging_ops.histogram_summary("X", self._inp)
            if self._data_feeder.output_dtype in (np.float32, np.float64):
                logging_ops.histogram_summary("y", self._out)

            # Create model's graph.
            self._model_predictions, self._model_loss = self.model_fn(
                self._inp, self._out)

            # Set up a single operator to merge all the summaries
            self._summaries = logging_ops.merge_all_summaries()

            # Create trainer and augment graph with gradients and optimizer.
            # Additionally creates initialization ops.
            learning_rate = self.learning_rate
            optimizer = self.optimizer
            if callable(learning_rate):
                learning_rate = learning_rate(self._global_step)
            if callable(optimizer):
                optimizer = optimizer(learning_rate)
            self._train = optimizers.optimize_loss(
                self._model_loss,
                self._global_step,
                learning_rate=learning_rate,
                optimizer=optimizer,
                clip_gradients=self.clip_gradients)

            # Update ops during training, e.g. batch_norm_ops
            self._train = control_flow_ops.group(
                self._train, *ops.get_collection('update_ops'))

            # Get all initializers for all trainable variables.
            self._initializers = variables.initialize_all_variables()

            # Create model's saver capturing all the nodes created up until now.
            self._saver = train.Saver(
                max_to_keep=self._config.keep_checkpoint_max,
                keep_checkpoint_every_n_hours=self._config.
                keep_checkpoint_every_n_hours)

            # Enable monitor to create validation data dict with appropriate tf placeholders
            self._monitor.create_val_feed_dict(self._inp, self._out)

            # Create session to run model with.
            self._session = session.Session(self._config.tf_master,
                                            config=self._config.tf_config)

            # Run parameter initializers.
            self._session.run(self._initializers)
Example #18
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  clip_gradients=None,
                  moving_average_decay=0.9,
                  learning_rate_decay_fn=None,
                  variables=None):
  """Given loss and parameters for optimizer, returns a training op.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string or function, used as optimizer for training.
    clip_gradients: float or None, clips gradients by this value.
    moving_average_decay: float or None, takes into account previous loss
                          to make learning smoother due to outliers.
    learning_rate_decay_fn: function, takes learning_rate and global_step
                            Tensors, returns Tensor. Can be used to implement
                            any learning rate decay funcitons.
                            For example: tf.train.exponential_decay.
    variables: list of variables to optimizer or none.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
  # Moving average of the loss with decay.
  if moving_average_decay is not None:
    # Generate moving averages of the loss.
    loss_averages = train.ExponentialMovingAverage(moving_average_decay,
                                                   name="avg")
    loss_averages_op = loss_averages.apply([loss])
    logging_ops.scalar_summary("loss/mean", loss_averages.average(loss))
    loss = control_flow_ops.with_dependencies([loss_averages_op], loss)

  # Convert optimizer into the optimizer class.
  if isinstance(optimizer, str):
    opt_cls = OPTIMIZER_CLS_NAMES[optimizer]
  elif callable(optimizer):
    opt_cls = optimizer
  else:
    raise ValueError("Unrecognized optimizer: should be string or function.")

  # Learning rate variable, with possible decay.
  lr = vs.get_variable("learning_rate",
                       [],
                       trainable=False,
                       initializer=init_ops.constant_initializer(learning_rate))
  if learning_rate_decay_fn is not None:
    lr = learning_rate_decay_fn(lr, global_step)

  # Create optimizer.
  opt = opt_cls(learning_rate=lr)

  # All trainable variables, if specific variables are not specified.
  if variables is None:
    variables = vars_.trainable_variables()

  # Compute gradients and clip them if provided.
  gradients = opt.compute_gradients(loss, variables)
  if clip_gradients is not None:
    clipped_gradients, _ = clip_ops.clip_by_global_norm(gradients,
                                                        clip_gradients)
    gradients = zip(clipped_gradients, variables)

  # Add scalar summary for loss.
  logging_ops.scalar_summary("loss", loss)

  # Add histograms for variables, gradients and gradient norms.
  for gradient, variable in gradients:
    if isinstance(gradient, ops.IndexedSlices):
      grad_values = gradient.values
    else:
      grad_values = gradient
    logging_ops.histogram_summary(variable.name, variable)
    logging_ops.histogram_summary(variable.name + "/gradients", grad_values)
    logging_ops.histogram_summary(variable.name + "/gradient_norm",
                                  clip_ops.global_norm([grad_values]))

  # Create gradient updates.
  grad_updates = opt.apply_gradients(gradients,
                                     global_step=global_step,
                                     name="train")
  # Make sure total_loss is valid.
  final_loss = array_ops.check_numerics(loss, "Loss is inf or nan")

  # Ensure the train_tensor computes grad_updates.
  train_tensor = control_flow_ops.with_dependencies([grad_updates], final_loss)

  return train_tensor
    def __init__(self, params, infer=False):

        self.is_training = tf.placeholder(tf.bool)
        self.output_keep_prob = tf.placeholder(tf.float32)

        num_layers = params['nlayer']
        rnn_size = params['n_hidden']
        grad_clip = 10

        cell_fn = tf.nn.rnn_cell.BasicLSTMCell
        cell = cell_fn(rnn_size)  #RNN size
        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers)
        # cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob = self.output_keep_prob)
        self.cell = cell

        NOUT = params['n_output']  # end_of_stroke + prob + 2*(mu + sig) + corr
        self.input_data = tf.placeholder(
            dtype=tf.float32,
            shape=[None, params['seq_length'], params['n_input']])
        self.input_zero = tf.placeholder(
            dtype=tf.float32,
            shape=[None, params['seq_length'], params['n_input']])
        self.target_data = tf.placeholder(
            tf.float32,
            [params["batch_size"] * params["seq_length"], params["n_output"]])
        self.initial_state = cell.zero_state(batch_size=params['batch_size'],
                                             dtype=tf.float32)

        ran_noise = tf.random_normal(shape=[
            params["batch_size"], params['seq_length'], params['n_input']
        ],
                                     mean=0,
                                     stddev=0.00008)
        ran_noise = tf.mul(ran_noise, self.input_zero)
        tmp_input = tf.nn.relu(self.input_data + ran_noise)
        self.input_data = tf.select(self.is_training, tmp_input,
                                    self.input_data)

        outputs = []
        state = self.initial_state
        with tf.variable_scope("rnnlm"):
            for time_step in range(params['seq_length']):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(self.input_data[:, time_step, :],
                                            state)
                outputs.append(cell_output)
        rnn_output = tf.reshape(tf.concat(1, outputs),
                                [-1, params['n_hidden']])

        with tf.variable_scope('rnnlm'):
            output_w1 = tf.get_variable("output_w1", [rnn_size, rnn_size])
            output_b1 = tf.get_variable("output_b1", [rnn_size])

            output_w2 = tf.get_variable("output_w2", [rnn_size, NOUT])
            output_b2 = tf.get_variable("output_b3", [NOUT])

        hidden_1 = tf.add(tf.matmul(rnn_output, output_w1), output_b1)
        self.final_output = tf.add(tf.matmul(hidden_1, output_w2), output_b2)

        tmp = self.final_output - self.target_data
        loss = tf.nn.l2_loss(tmp)
        self.cost = tf.reduce_mean(loss)
        self.final_state = state
        tf.scalar_summary('losses/total_loss', loss)

        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          grad_clip)
        for grad in grads:
            # if isinstance(grad, ops.grads):
            #   grad_values = grad.values
            # else:
            #   grad_values = grad
            grad_values = grad
            logging_ops.histogram_summary(grad.op.name + ':gradient',
                                          grad_values)
            logging_ops.histogram_summary(grad.op.name + ':gradient_norm',
                                          clip_ops.global_norm([grad_values]))
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Example #20
0
 def testHistogramSummary(self):
   with self.cached_session():
     c = constant_op.constant(3)
     s = logging_ops.histogram_summary('tag', c)
     self.assertEqual(s.op.type, u'HistogramSummary')
Example #21
0
def create_train_op(
    total_loss,
    optimizer,
    global_step=None,
    update_ops=None,
    variables_to_train=None,
    clip_gradient_norm=0,
    summarize_gradients=False,
    gate_gradients=tf_optimizer.Optimizer.GATE_OP,
    aggregation_method=None,
    colocate_gradients_with_ops=False):
  """Creates an `Operation` that evaluates the gradients and returns the loss.

  Args:
    total_loss: A `Tensor` representing the total loss.
    optimizer: A tf.Optimizer to use for computing the gradients.
    global_step: A `Tensor` representing the global step variable. If left as
      `None`, then slim.variables.global_step() is used.
    update_ops: an optional list of updates to execute. Note that the update_ops
      that are used are the union of those update_ops passed to the function and
      the value of slim.ops.GetUpdateOps(). Therefore, if `update_ops` is None,
      then the value of slim.ops.GetUpdateOps() is still used.
    variables_to_train: an optional list of variables to train. If None, it will
      default to all tf.trainable_variables().
    clip_gradient_norm: If greater than 0 then the gradients would be clipped
      by it.
    summarize_gradients: Whether or not add summaries for each gradient.
    gate_gradients: How to gate the computation of gradients. See tf.Optimizer.
    aggregation_method: Specifies the method used to combine gradient terms.
      Valid values are defined in the class `AggregationMethod`.
    colocate_gradients_with_ops: Whether or not to try colocating the gradients
      with the ops that generated them.

  Returns:
    A `Tensor` that when evaluated, computes the gradients and returns the total
      loss value.
  """
  if global_step is None:
    global_step = variables.get_or_create_global_step()

  update_ops = set(update_ops or [])

  # Make sure update_ops are computed before total_loss.
  if update_ops:
    with control_flow_ops.control_dependencies(update_ops):
      barrier = control_flow_ops.no_op(name='update_barrier')
    total_loss = control_flow_ops.with_dependencies([barrier], total_loss)

  if variables_to_train is None:
    # Default to tf.trainable_variables()
    variables_to_train = tf_variables.trainable_variables()
  else:
    # Make sure that variables_to_train are in tf.trainable_variables()
    for v in variables_to_train:
      assert v in tf_variables.trainable_variables()

  assert variables_to_train

  # Create the gradients. Note that apply_gradients adds the gradient
  # computation to the current graph.
  grads = optimizer.compute_gradients(
      total_loss, variables_to_train, gate_gradients=gate_gradients,
      aggregation_method=aggregation_method,
      colocate_gradients_with_ops=colocate_gradients_with_ops)

  # Clip gradients.
  if clip_gradient_norm > 0:
    grads = clip_gradient_norms(grads, clip_gradient_norm)

  # Summarize gradients.
  if summarize_gradients:
    for grad, var in grads:
      if grad is not None:
        if isinstance(grad, ops.IndexedSlices):
          grad_values = grad.values
        else:
          grad_values = grad
        logging_ops.histogram_summary(var.op.name + ':gradient', grad_values)
        logging_ops.histogram_summary(var.op.name + ':gradient_norm',
                                      clip_ops.global_norm([grad_values]))
      else:
        logging.info('Var %s has no gradient', var.op.name)

  # Create gradient updates.
  grad_updates = optimizer.apply_gradients(grads, global_step=global_step)

  # Make sure total_loss is valid.
  total_loss = array_ops.check_numerics(total_loss, 'LossTensor is inf or nan')

  # Ensure the train_tensor computes grad_updates.
  return control_flow_ops.with_dependencies([grad_updates], total_loss)
 def _add_hidden_layer_summary(self, value, tag):
     # TODO(zakaria): Move this code to tf.learn and add test.
     logging_ops.scalar_summary("%s:fraction_of_zero_values" % tag, nn.zero_fraction(value))
     logging_ops.histogram_summary("%s:activation" % tag, value)
Example #23
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  moving_average_decay=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None):
    """Given loss and parameters for optimizer, returns a training op.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of tf.Optimizer that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantion of tf.Optimizer sub-class
                 and have `compute_gradients` and `apply_gradients` functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float or `None`, clips gradients by this value.
    moving_average_decay: Deprecated. float or None, takes into account previous
                          loss to make learning smoother due to outliers.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: tf.train.exponential_decay.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection. The order of execution
                between `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
    with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
        # Update ops take UPDATE_OPS collection if not provided.
        if update_ops is None:
            update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
        # Make sure update ops are ran before computing loss.
        if update_ops:
            loss = control_flow_ops.with_dependencies(list(update_ops), loss)

        # Moving average of the loss with decay.
        # TODO(b/30439864): moving_average_decay should be removed.
        if moving_average_decay is not None:
            logging.warn("'moving_average_decay' is deprecated. Please use "
                         "tensorboard's builtin averaging instead.")
            # Generate moving averages of the loss.
            loss_averages = train.ExponentialMovingAverage(
                moving_average_decay, name="avg")
            loss_averages_op = loss_averages.apply([loss])
            logging_ops.scalar_summary("loss/mean",
                                       loss_averages.average(loss))
            loss = control_flow_ops.with_dependencies([loss_averages_op], loss)

        # Learning rate variable, with possible decay.
        if (isinstance(learning_rate, ops.Tensor)
                and learning_rate.get_shape().ndims == 0):
            lr = learning_rate
        elif isinstance(learning_rate, float):
            lr = vs.get_variable(
                "learning_rate", [],
                trainable=False,
                initializer=init_ops.constant_initializer(learning_rate))
        else:
            raise ValueError("Learning rate should be 0d Tensor or float. "
                             "Got %s of type %s" %
                             (str(learning_rate), str(type(learning_rate))))
        if summaries is None:
            summaries = ["loss", "learning_rate"]
        if learning_rate_decay_fn is not None:
            lr = learning_rate_decay_fn(lr, global_step)
            if "learning_rate" in summaries:
                logging_ops.scalar_summary("learning_rate", lr)

        # Create optimizer, given specified parameters.
        if isinstance(optimizer, six.string_types):
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError(
                    "Optimizer name should be one of [%s], you provided %s." %
                    (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
            opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
        elif isinstance(optimizer, type) and issubclass(
                optimizer, optimizer_.Optimizer):
            opt = optimizer(learning_rate=lr)
        elif isinstance(optimizer, optimizer_.Optimizer):
            opt = optimizer
        else:
            raise ValueError("Unrecognized optimizer: should be string, "
                             "subclass of Optimizer or instance of "
                             "subclass of Optimizer. Got %s." % str(optimizer))

        # All trainable variables, if specific variables are not specified.
        if variables is None:
            variables = vars_.trainable_variables()

        # Compute gradients.
        gradients = opt.compute_gradients(loss, variables)

        # Optionally add gradient noise.
        if gradient_noise_scale is not None:
            gradients = _add_scaled_noise_to_gradients(gradients,
                                                       gradient_noise_scale)

        # Multiply some gradients.
        if gradient_multipliers is not None:
            gradients = _multiply_gradients(gradients, gradient_multipliers)

        # Optionally clip gradients by global norm.
        if clip_gradients is not None:
            gradients = _clip_gradients_by_norm(gradients, clip_gradients)

        # Add scalar summary for loss.
        if "loss" in summaries:
            logging_ops.scalar_summary("loss", loss)

        # Add histograms for variables, gradients and gradient norms.
        for gradient, variable in gradients:
            if isinstance(gradient, ops.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient

            if grad_values is not None:
                if "gradients" in summaries:
                    logging_ops.histogram_summary(variable.name + "/gradients",
                                                  grad_values)
                if "gradient_norm" in summaries:
                    logging_ops.histogram_summary(
                        variable.name + "/gradient_norm",
                        clip_ops.global_norm([grad_values]))

        # Create gradient updates.
        grad_updates = opt.apply_gradients(gradients,
                                           global_step=global_step,
                                           name="train")

        # Ensure the train_tensor computes grad_updates.
        train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

        return train_tensor
Example #24
0
 def _add_hidden_layer_summary(self, value, tag):
     # TODO(zakaria): Move this code to tf.learn and add test.
     logging_ops.scalar_summary("%s:fraction_of_zero_values" % tag,
                                nn.zero_fraction(value))
     logging_ops.histogram_summary("%s:activation" % tag, value)
Example #25
0
    def __init__(self, params, infer=False):

        self.is_training = tf.placeholder(tf.bool)
        self.output_keep_prob = tf.placeholder(tf.float32)

        num_layers = params['nlayer']
        rnn_size = params['n_hidden']
        grad_clip = 10

        cell_lst = []
        for i in range(num_layers):
            cell = tf.nn.rnn_cell.LSTMCell(
                rnn_size,
                initializer=tf.contrib.layers.xavier_initializer(
                    uniform=False),
                forget_bias=1.0)
            # if i==0:
            #   cell_drop = tf.nn.rnn_cell.DropoutWrapper(cell,input_keep_prob= self.output_keep_prob)
            #   cell=cell_drop
            cell_drop = tf.nn.rnn_cell.DropoutWrapper(
                cell, output_keep_prob=self.output_keep_prob)
            cell = cell_drop
            cell_lst.append(cell)

        cell = tf.nn.rnn_cell.MultiRNNCell(cell_lst)

        # cell_drop = tf.nn.rnn_cell.DropoutWrapper(cell,output_keep_prob= self.output_keep_prob)
        # cell=cell_drop
        self.cell = cell

        NOUT = params['n_output']  # end_of_stroke + prob + 2*(mu + sig) + corr
        self.input_data = tf.placeholder(
            dtype=tf.float32,
            shape=[None, params['seq_length'], params['n_input']])
        self.input_zero = tf.placeholder(
            dtype=tf.float32,
            shape=[None, params['seq_length'], params['n_input']])
        self.repeat_data = tf.placeholder(dtype=tf.int32,
                                          shape=[None, params['seq_length']])
        self.target_data = tf.placeholder(
            tf.float32, [None, params["seq_length"], params["n_output"]])
        self.initial_state = cell.zero_state(batch_size=params['batch_size'],
                                             dtype=tf.float32)

        #Noise applied only training phase and if only std bigger than 0
        if (params["noise_std"] > 0.0):
            ran_noise = tf.random_normal(shape=[
                params["batch_size"], params['seq_length'], params['n_input']
            ],
                                         mean=0,
                                         stddev=params['noise_std'])
            # ran_noise=tf.mul(ran_noise,self.input_zero)
            tmp_input = tf.nn.relu(self.input_data + ran_noise)
            self.input_data = tf.select(self.is_training, tmp_input,
                                        self.input_data)

        outputs = []
        state = self.initial_state
        with tf.variable_scope("rnnlm"):
            for time_step in range(params['seq_length']):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(self.input_data[:, time_step, :],
                                            state)
                outputs.append(cell_output)
        rnn_output = tf.reshape(tf.transpose(tf.pack(outputs), [1, 0, 2]),
                                [-1, params['n_hidden']])

        with tf.variable_scope('rnnlm'):
            output_w1 = tf.get_variable(
                "output_w1", [rnn_size, NOUT],
                initializer=tf.contrib.layers.xavier_initializer())
            output_b1 = tf.get_variable("output_b1", [NOUT])

        self.final_output = tf.add(tf.matmul(rnn_output, output_w1), output_b1)

        flt = tf.squeeze(tf.reshape(self.repeat_data, [-1, 1]), [1])
        where_flt = tf.not_equal(flt, 0)
        indices = tf.where(where_flt)
        tmp = self.final_output - tf.reshape(self.target_data,
                                             [-1, params["n_output"]])
        tmp = tf.gather(tmp, tf.squeeze(indices, [1]))
        loss = tf.nn.l2_loss(tmp)
        self.cost = tf.reduce_mean(loss)
        self.final_state = state
        tf.scalar_summary('losses/total_loss', loss)

        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        total_parameters = 0
        for variable in tvars:
            # shape is an array of tf.Dimension
            shape = variable.get_shape()
            variable_parametes = 1
            for dim in shape:
                variable_parametes *= dim.value
            total_parameters += variable_parametes
        self.total_parameters = total_parameters
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          grad_clip)
        for grad in grads:
            # if isinstance(grad, ops.grads):
            #   grad_values = grad.values
            # else:
            #   grad_values = grad
            grad_values = grad
            logging_ops.histogram_summary(grad.op.name + ':gradient',
                                          grad_values)
            logging_ops.histogram_summary(grad.op.name + ':gradient_norm',
                                          clip_ops.global_norm([grad_values]))
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))