def testWithTensorDependencies(self):
    with self.test_session():
      v = tf.Variable(0.0)
      c1 = tf.constant(10)
      c2 = tf.constant(20)

      # c1_with_init_v depends on the init op for v
      c1_with_init_v = control_flow_ops.with_dependencies(
          name="c1_with_init_v",
          output_tensor=c1,
          dependencies=[v.initializer])
      # c2_with_c1 depends on the value of c1_with_init_v
      c2_with_c1_dep = control_flow_ops.with_dependencies(
          name="c2_with_c1_dep",
          output_tensor=c2,
          dependencies=[c1_with_init_v])

      # Fetching v directly will result in an uninitialized error
      with self.assertRaisesOpError("Attempting to use uninitialized value"):
        v.eval()

      # Get the value of 'c2_with_c1_dep', which should cause 'v'
      # to be initialized.
      self.assertAllEqual(20, c2_with_c1_dep.eval())

      # Ensure that 'v' is initialized
      self.assertAllClose(0.0, v.eval())
    def testIndexedSlices(self):
        for v1_first in [True, False]:
            with self.test_session():
                v1 = tf.Variable(np.array([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]]).astype(np.float32))
                v1_at_1 = tf.IndexedSlices(
                    control_flow_ops.with_dependencies([v1.initializer], v1.ref()), tf.constant([1])
                )

                v2 = tf.Variable(np.array([[0.1, 1.1], [10.1, 11.1], [20.1, 21.1]]).astype(np.float32))
                v2_at_1 = tf.IndexedSlices(
                    control_flow_ops.with_dependencies([v2.initializer], v2.ref()), tf.constant([1])
                )

                st1, st2 = control_flow_ops.tuple([v1_at_1, v2_at_1])
                g1 = tf.gather(st1.values, st1.indices)
                g2 = tf.gather(st2.values, st2.indices)

                # v1 is not initialized.
                with self.assertRaisesOpError("Attempting to use uninitialized value"):
                    v1.eval()

                # v2 is not initialized.
                with self.assertRaisesOpError("Attempting to use uninitialized value"):
                    v2.eval()

                if v1_first:
                    # Getting g1 initializes v2.
                    self.assertAllClose([[10.0, 11.0]], g1.eval())
                    self.assertAllClose([[0.1, 1.1], [10.1, 11.1], [20.1, 21.1]], v2.eval())
                else:
                    # Getting g2 initializes v1.
                    self.assertAllClose([[10.1, 11.1]], g2.eval())
                    self.assertAllClose([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]], v1.eval())
  def _check_shapes_dynamic(self, operator, v, diag):
    """Return (v, diag) with Assert dependencies, which check shape."""
    checks = []
    with ops.op_scope([operator, v, diag], 'check_shapes'):
      s_v = array_ops.shape(v)
      r_op = operator.rank()
      r_v = array_ops.rank(v)
      if diag is not None:
        s_d = array_ops.shape(diag)
        r_d = array_ops.rank(diag)

      # Check tensor rank.
      checks.append(check_ops.assert_rank(v, r_op))
      if diag is not None:
        checks.append(check_ops.assert_rank(diag, r_op - 1))

      # Check batch shape
      checks.append(check_ops.assert_equal(
          operator.batch_shape(), array_ops.slice(s_v, [0], [r_v - 2])))
      if diag is not None:
        checks.append(check_ops.assert_equal(
            operator.batch_shape(), array_ops.slice(s_d, [0], [r_d - 1])))

      # Check event shape
      checks.append(check_ops.assert_equal(
          operator.vector_space_dimension(), array_ops.gather(s_v, r_v - 2)))
      if diag is not None:
        checks.append(check_ops.assert_equal(
            array_ops.gather(s_v, r_v - 1), array_ops.gather(s_d, r_d - 1)))

      v = control_flow_ops.with_dependencies(checks, v)
      if diag is not None:
        diag = control_flow_ops.with_dependencies(checks, diag)
      return v, diag
  def testTensors(self):
    for v1_first in [True, False]:
      with self.test_session():
        v1 = tf.Variable([1.0])
        add1 = tf.add(
            control_flow_ops.with_dependencies([v1.initializer], v1.ref()),
            2.0)
        v2 = tf.Variable([10.0])
        add2 = tf.add(
            control_flow_ops.with_dependencies([v2.initializer], v2.ref()),
            20.0)
        t1, _, t2 = control_flow_ops.tuple([add1, None, add2])

        # v1 is not initialized.
        with self.assertRaisesOpError("Attempting to use uninitialized value"):
          v1.eval()

        # v2 is not initialized.
        with self.assertRaisesOpError("Attempting to use uninitialized value"):
          v2.eval()

        if v1_first:
          # Getting t1 initializes v2.
          self.assertAllClose([3.0], t1.eval())
          self.assertAllClose([10.0], v2.eval())
        else:
          # Getting t2 initializes v1.
          self.assertAllClose([30.0], t2.eval())
          self.assertAllClose([1.0], v1.eval())
예제 #5
0
파일: util.py 프로젝트: TalkingData/edward
def kl_multivariate_normal(loc_one, scale_one, loc_two=0.0, scale_two=1.0):
    """Calculate the KL of multivariate normal distributions with
    diagonal covariances.

    Parameters
    ----------
    loc_one : tf.Tensor
        A 0-D tensor, 1-D tensor of length n, or 2-D tensor of shape M
        x n where each row represents the mean of a n-dimensional
        Gaussian.
    scale_one : tf.Tensor
        A tensor of same shape as ``loc_one``, representing the
        standard deviation.
    loc_two : tf.Tensor, optional
        A tensor of same shape as ``loc_one``, representing the
        mean of another Gaussian.
    scale_two : tf.Tensor, optional
        A tensor of same shape as ``loc_one``, representing the
        standard deviation of another Gaussian.

    Returns
    -------
    tf.Tensor
        For 0-D or 1-D tensor inputs, outputs the 0-D tensor
        ``KL( N(z; loc_one, scale_one) || N(z; loc_two, scale_two) )``
        For 2-D tensor inputs, outputs the 1-D tensor
        ``[KL( N(z; loc_one[m,:], scale_one[m,:]) || N(z; loc_two[m,:], scale_two[m,:]) )]_{m=1}^M``

    Raises
    ------
    InvalidArgumentError
        If the location variables have Inf or NaN values, or if the scale
        variables are not positive.
    """
    dependencies = [tf.verify_tensor_all_finite(loc_one, msg=''),
                    tf.verify_tensor_all_finite(loc_two, msg=''),
                    tf.assert_positive(scale_one),
                    tf.assert_positive(scale_two)]
    loc_one = control_flow_ops.with_dependencies(dependencies, loc_one)
    scale_one = control_flow_ops.with_dependencies(dependencies, scale_one)
    loc_one = tf.cast(loc_one, tf.float32)
    scale_one = tf.cast(scale_one, tf.float32)

    if loc_two == 0.0 and scale_two == 1.0:
        # With default arguments, we can avoid some intermediate computation.
        out = tf.square(scale_one) + tf.square(loc_one) - \
              1.0 - 2.0 * tf.log(scale_one)
    else:
        loc_two = control_flow_ops.with_dependencies(dependencies, loc_two)
        scale_two = control_flow_ops.with_dependencies(dependencies, scale_two)
        loc_two = tf.cast(loc_two, tf.float32)
        scale_two = tf.cast(scale_two, tf.float32)
        out = tf.square(scale_one/scale_two) + \
              tf.square((loc_two - loc_one)/scale_two) - \
              1.0 + 2.0 * tf.log(scale_two) - 2.0 * tf.log(scale_one)

    if len(out.get_shape()) <= 1: # scalar or vector
        return 0.5 * tf.reduce_sum(out)
    else: # matrix
        return 0.5 * tf.reduce_sum(out, 1)
예제 #6
0
def _verify_input(tensor_list, labels, probs_list):
  """Verify that batched inputs are well-formed."""
  checked_probs_list = []
  for probs in probs_list:
    # Since number of classes shouldn't change at runtime, probabilities shape
    # should be fully defined.
    probs.get_shape().assert_is_fully_defined()

    # Probabilities must be 1D.
    probs.get_shape().assert_has_rank(1)

    # Probabilities must be nonnegative and sum to one.
    tol = 1e-6
    prob_sum = math_ops.reduce_sum(probs)
    checked_probs = control_flow_ops.with_dependencies([
        check_ops.assert_non_negative(probs),
        check_ops.assert_less(prob_sum, 1.0 + tol),
        check_ops.assert_less(1.0 - tol, prob_sum)
    ], probs)
    checked_probs_list.append(checked_probs)

  # All probabilities should be the same length.
  prob_length = checked_probs_list[0].get_shape().num_elements()
  for checked_prob in checked_probs_list:
    if checked_prob.get_shape().num_elements() != prob_length:
      raise ValueError('Probability parameters must have the same length.')

  # Labels tensor should only have batch dimension.
  labels.get_shape().assert_has_rank(1)

  for tensor in tensor_list:
    # Data tensor should have a batch dimension.
    shape = tensor.get_shape().with_rank_at_least(1)

    # Data and label batch dimensions must be compatible.
    tensor_shape.dimension_at_index(shape, 0).assert_is_compatible_with(
        labels.get_shape()[0])

  # Data and labels must have the same, strictly positive batch size. Since we
  # can't assume we know the batch size at graph creation, add runtime checks.
  labels_batch_size = array_ops.shape(labels)[0]
  lbl_assert = check_ops.assert_positive(labels_batch_size)

  # Make each tensor depend on its own checks.
  labels = control_flow_ops.with_dependencies([lbl_assert], labels)
  tensor_list = [
      control_flow_ops.with_dependencies([
          lbl_assert,
          check_ops.assert_equal(array_ops.shape(x)[0], labels_batch_size)
      ], x) for x in tensor_list
  ]

  # Label's classes must be integers 0 <= x < num_classes.
  labels = control_flow_ops.with_dependencies([
      check_ops.assert_integer(labels), check_ops.assert_non_negative(labels),
      check_ops.assert_less(labels, math_ops.cast(prob_length, labels.dtype))
  ], labels)

  return tensor_list, labels, checked_probs_list
예제 #7
0
 def _maybe_attach_assertion(x):
   if not validate_args:
     return x
   if assert_positive:
     return control_flow_ops.with_dependencies([
         tf.assert_positive(x, message="diagonal part must be positive"),
     ], x)
   return control_flow_ops.with_dependencies([
       tf.assert_none_equal(
           x, tf.zeros([], x.dtype), message="diagonal part must be non-zero")
   ], x)
예제 #8
0
def rbf(X, X2=None, lengthscale=1.0, variance=1.0):
  """Radial basis function kernel, also known as the squared
  exponential or exponentiated quadratic. It is defined as

  $k(x, x') = \sigma^2 \exp\Big(
      -\\frac{1}{2} \sum_{d=1}^D \\frac{1}{\ell_d^2} (x_d - x'_d)^2 \Big)$

  for output variance $\sigma^2$ and lengthscale $\ell^2$.

  The kernel is evaluated over all pairs of rows, `k(X[i, ], X2[j, ])`.
  If `X2` is not specified, then it evaluates over all pairs
  of rows in `X`, `k(X[i, ], X[j, ])`. The output is a matrix
  where each entry (i, j) is the kernel over the ith and jth rows.

  Args:
    X: tf.Tensor.
      N x D matrix of N data points each with D features.
    X2: tf.Tensor.
      N x D matrix of N data points each with D features.
    lengthscale: tf.Tensor.
      Lengthscale parameter, a positive scalar or D-dimensional vector.
    variance: tf.Tensor.
      Output variance parameter, a positive scalar.

  #### Examples

  ```python
  X = tf.random_normal([100, 5])
  K = ed.rbf(X)
  assert K.shape == (100, 100)
  ```
  """
  lengthscale = tf.convert_to_tensor(lengthscale)
  variance = tf.convert_to_tensor(variance)
  dependencies = [tf.assert_positive(lengthscale),
                  tf.assert_positive(variance)]
  lengthscale = control_flow_ops.with_dependencies(dependencies, lengthscale)
  variance = control_flow_ops.with_dependencies(dependencies, variance)

  X = tf.convert_to_tensor(X)
  X = X / lengthscale
  Xs = tf.reduce_sum(tf.square(X), 1)
  if X2 is None:
    X2 = X
    X2s = Xs
  else:
    X2 = tf.convert_to_tensor(X2)
    X2 = X2 / lengthscale
    X2s = tf.reduce_sum(tf.square(X2), 1)

  square = tf.reshape(Xs, [-1, 1]) + tf.reshape(X2s, [1, -1]) - \
      2 * tf.matmul(X, X2, transpose_b=True)
  output = variance * tf.exp(-square / 2)
  return output
예제 #9
0
파일: util.py 프로젝트: leezqcst/edward
def kl_multivariate_normal(loc_one, scale_one, loc_two=0.0, scale_two=1.0):
    """Calculate the KL of multivariate normal distributions with
    diagonal covariances.

    Parameters
    ----------
    loc_one : tf.Tensor
        n-dimensional vector, or M x n-dimensional matrix where each
        row represents the mean of a n-dimensional Gaussian
    scale_one : tf.Tensor
        n-dimensional vector, or M x n-dimensional matrix where each
        row represents the standard deviation of a n-dimensional Gaussian
    loc_two : tf.Tensor, optional
        n-dimensional vector, or M x n-dimensional matrix where each
        row represents the mean of a n-dimensional Gaussian
    scale_two : tf.Tensor, optional
        n-dimensional vector, or M x n-dimensional matrix where each
        row represents the standard deviation of a n-dimensional Gaussian

    Returns
    -------
    tf.Tensor
        for scalar or vector inputs, outputs the scalar
        ``KL( N(z; loc_one, scale_one) || N(z; loc_two, scale_two) )``
        for matrix inputs, outputs the vector
        ``[KL( N(z; loc_one[m,:], scale_one[m,:]) || N(z; loc_two[m,:], scale_two[m,:]) )]_{m=1}^M``

    Raises
    ------
    InvalidArgumentError
        If the location variables have Inf or NaN values, or if the scale
        variables are not positive.
    """
    dependencies = [tf.verify_tensor_all_finite(loc_one, msg=''),
                  tf.verify_tensor_all_finite(loc_two, msg=''),
                  tf.assert_positive(scale_one),
                  tf.assert_positive(scale_two)]
    loc_one = control_flow_ops.with_dependencies(dependencies, loc_one)
    loc_two = control_flow_ops.with_dependencies(dependencies, loc_two)
    scale_one = control_flow_ops.with_dependencies(dependencies, scale_one)
    scale_two = control_flow_ops.with_dependencies(dependencies, scale_two)

    if loc_two == 0.0 and scale_two == 1.0:
        return 0.5 * tf.reduce_sum(
            tf.square(scale_one) + tf.square(loc_one) - \
            1.0 - 2.0 * tf.log(scale_one))
    else:
        return 0.5 * tf.reduce_sum(
            tf.square(scale_one/scale_two) + \
            tf.square((loc_two - loc_one)/scale_two) - \
            1.0 + 2.0 * tf.log(scale_two) - 2.0 * tf.log(scale_one), 1)
예제 #10
0
  def _check_domain_range_possibly_add_asserts(self):
    """Static check of init arg `num_rows`, possibly add asserts."""
    # Possibly add asserts.
    if self._assert_proper_shapes:
      self._num_rows = control_flow_ops.with_dependencies([
          check_ops.assert_rank(
              self._num_rows,
              0,
              message="Argument num_rows must be a 0-D Tensor."),
          check_ops.assert_non_negative(
              self._num_rows,
              message="Argument num_rows must be non-negative."),
      ], self._num_rows)
      self._num_columns = control_flow_ops.with_dependencies([
          check_ops.assert_rank(
              self._num_columns,
              0,
              message="Argument num_columns must be a 0-D Tensor."),
          check_ops.assert_non_negative(
              self._num_columns,
              message="Argument num_columns must be non-negative."),
      ], self._num_columns)

    # Static checks.
    if not self._num_rows.dtype.is_integer:
      raise TypeError("Argument num_rows must be integer type.  Found:"
                      " %s" % self._num_rows)

    if not self._num_columns.dtype.is_integer:
      raise TypeError("Argument num_columns must be integer type.  Found:"
                      " %s" % self._num_columns)

    num_rows_static = self._num_rows_static
    num_columns_static = self._num_columns_static

    if num_rows_static is not None:
      if num_rows_static.ndim != 0:
        raise ValueError("Argument num_rows must be a 0-D Tensor.  Found:"
                         " %s" % num_rows_static)

      if num_rows_static < 0:
        raise ValueError("Argument num_rows must be non-negative.  Found:"
                         " %s" % num_rows_static)
    if num_columns_static is not None:
      if num_columns_static.ndim != 0:
        raise ValueError("Argument num_columns must be a 0-D Tensor.  Found:"
                         " %s" % num_columns_static)

      if num_columns_static < 0:
        raise ValueError("Argument num_columns must be non-negative.  Found:"
                         " %s" % num_columns_static)
예제 #11
0
    def setUpClass(cls):
        cls._dump_root = tempfile.mkdtemp()

        cls._is_gpu_available = test.is_gpu_available()
        if cls._is_gpu_available:
            cls._main_device = "/job:localhost/replica:0/task:0/gpu:0"
        else:
            cls._main_device = "/job:localhost/replica:0/task:0/cpu:0"

        with session.Session() as sess:
            x_init_val = np.array([5.0, 3.0])
            x_init = constant_op.constant(x_init_val, shape=[2])
            x = variables.Variable(x_init, name="control_deps/x")

            y = math_ops.add(x, x, name="control_deps/y")
            y = control_flow_ops.with_dependencies([x], y, name="control_deps/ctrl_dep_y")

            z = math_ops.mul(x, y, name="control_deps/z")

            z = control_flow_ops.with_dependencies([x, y], z, name="control_deps/ctrl_dep_z")

            x.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(
                run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls="file://%s" % cls._dump_root
            )

            # Invoke Session.run().
            run_metadata = config_pb2.RunMetadata()
            sess.run(z, options=run_options, run_metadata=run_metadata)

        debug_dump = debug_data.DebugDumpDir(cls._dump_root, partition_graphs=run_metadata.partition_graphs)

        # Construct the analyzer.
        analyzer = analyzer_cli.DebugAnalyzer(debug_dump)

        # Construct the handler registry.
        cls._registry = debugger_cli_common.CommandHandlerRegistry()

        # Register command handlers.
        cls._registry.register_command_handler(
            "node_info", analyzer.node_info, analyzer.get_help("node_info"), prefix_aliases=["ni"]
        )
        cls._registry.register_command_handler(
            "list_inputs", analyzer.list_inputs, analyzer.get_help("list_inputs"), prefix_aliases=["li"]
        )
        cls._registry.register_command_handler(
            "list_outputs", analyzer.list_outputs, analyzer.get_help("list_outputs"), prefix_aliases=["lo"]
        )
예제 #12
0
  def _initialize_variables(self, data, initial_means=None):
    """Initializes variables.

    Args:
      data: a list of Tensors with data, each row is a new example.
      initial_means: a Tensor with a matrix of means.
    """
    first_shard = data[0]
    # Initialize means: num_classes X 1 X dimensions.
    if initial_means is not None:
      means = array_ops.expand_dims(initial_means, 1)
    else:
      # Sample data randomly
      means = array_ops.expand_dims(
          _init_clusters_random(data, self._num_classes, self._random_seed), 1)

    # Initialize covariances.
    if self._covariance_type == FULL_COVARIANCE:
      cov = _covariance(first_shard, False) + self._min_var
      # A matrix per class, num_classes X dimensions X dimensions
      covs = array_ops.tile(
          array_ops.expand_dims(cov, 0), [self._num_classes, 1, 1])
    elif self._covariance_type == DIAG_COVARIANCE:
      cov = _covariance(first_shard, True) + self._min_var
      # A diagonal per row, num_classes X dimensions.
      covs = array_ops.tile(
          array_ops.expand_dims(array_ops.diag_part(cov), 0),
          [self._num_classes, 1])

    with ops.colocate_with(self._cluster_centers_initialized):
      initialized = control_flow_ops.with_dependencies(
          [means, covs],
          array_ops.identity(self._cluster_centers_initialized))
    self._init_ops = []
    with ops.colocate_with(self._means):
      init_means = state_ops.assign(self._means, means, validate_shape=False)
      init_means = control_flow_ops.with_dependencies(
          [init_means],
          state_ops.assign(self._cluster_centers_initialized, True))
      self._init_ops.append(control_flow_ops.cond(initialized,
                                                  control_flow_ops.no_op,
                                                  lambda: init_means).op)
    with ops.colocate_with(self._covs):
      init_covs = state_ops.assign(self._covs, covs, validate_shape=False)
      init_covs = control_flow_ops.with_dependencies(
          [init_covs],
          state_ops.assign(self._cluster_centers_initialized, True))
      self._init_ops.append(control_flow_ops.cond(initialized,
                                                  control_flow_ops.no_op,
                                                  lambda: init_covs).op)
예제 #13
0
def _verify_input(data, labels, probs_list):
  """Verify that batched inputs are well-formed."""
  checked_probs_list = []
  for probs in probs_list:
    # Probabilities must be able to be converted to non-object numpy array.
    np_probs = np.asarray(probs)
    if np_probs.dtype == np.dtype('object'):
      raise ValueError('Probabilities must be able to be converted to a numpy '
                       'array.')
    checked_probs_list.append(np_probs)

    # Probabilities must sum to one.
    # TODO(joelshor): Investigate whether logits should be passed instead of
    # probs.
    if not np.isclose(np.sum(probs), 1.0):
      raise ValueError('Probabilities must sum to one.')

  # All probabilities should be the same length.
  if not np.array_equiv([probs.shape for probs in checked_probs_list],
                        checked_probs_list[0].shape):
    raise ValueError('Probability parameters must have the same length.')

  # Labels tensor should only have batch dimension.
  labels.get_shape().assert_has_rank(1)

  # Data tensor should have a batch dimension.
  data_shape = data.get_shape().with_rank_at_least(1)

  # Data and label batch dimensions must be compatible.
  data_shape[0].assert_is_compatible_with(labels.get_shape()[0])

  # Data and labels must have the same, strictly positive batch size. Since we
  # can't assume we know the batch size at graph creation, add runtime checks.
  data_batch_size = array_ops.shape(data)[0]
  labels_batch_size = array_ops.shape(labels)[0]

  data = control_flow_ops.with_dependencies(
      [check_ops.assert_positive(data_batch_size),
       check_ops.assert_equal(data_batch_size, labels_batch_size)],
      data)

  # Label's classes must be integers 0 <= x < num_classes.
  labels = control_flow_ops.with_dependencies(
      [check_ops.assert_integer(labels),
       check_ops.assert_non_negative(labels),
       check_ops.assert_less(labels, math_ops.cast(len(probs), labels.dtype))],
      labels)

  return data, labels, checked_probs_list
예제 #14
0
 def _maybe_attach_assertion(x):
   if not validate_args:
     return x
   if assert_positive:
     return control_flow_ops.with_dependencies([
         check_ops.assert_positive(
             array_ops.matrix_diag_part(x),
             message="diagonal part must be positive"),
     ], x)
   return control_flow_ops.with_dependencies([
       check_ops.assert_none_equal(
           array_ops.matrix_diag_part(x),
           array_ops.zeros([], x.dtype),
           message="diagonal part must be non-zero"),
   ], x)
예제 #15
0
 def _maybe_attach_assertion(x):
   if not validate_args:
     return x
   if assert_positive:
     return control_flow_ops.with_dependencies([
         check_ops.assert_positive(
             x, message="diagonal part must be positive"),
     ], x)
   # TODO(b/35157376): Use `assert_none_equal` once it exists.
   return control_flow_ops.with_dependencies([
       check_ops.assert_greater(
           math_ops.abs(x),
           array_ops.zeros([], x.dtype),
           message="diagonal part must be non-zero"),
   ], x)
 def _check_alpha(self, alpha):
   alpha = ops.convert_to_tensor(alpha, name='alpha')
   if not self.strict:
     return alpha
   return control_flow_ops.with_dependencies(
       [check_ops.assert_rank_at_least(alpha, 1),
        check_ops.assert_positive(alpha)], alpha)
예제 #17
0
 def _check_diag(self, diag):
   """Verify that `diag` is positive."""
   diag = ops.convert_to_tensor(diag, name="diag")
   if not self.verify_pd:
     return diag
   deps = [check_ops.assert_positive(diag)]
   return control_flow_ops.with_dependencies(deps, diag)
예제 #18
0
  def log_prob(self, x, name="log_prob"):
    """Log prob of observations in `x` under these Gamma distribution(s).

    Args:
      x: tensor of dtype `dtype`, must be broadcastable with `alpha` and `beta`.
      name: The name to give this op.

    Returns:
      log_prob: tensor of dtype `dtype`, the log-PDFs of `x`.

    Raises:
      TypeError: if `x` and `alpha` are different dtypes.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self._alpha, self._beta, x], name):
        alpha = self._alpha
        beta = self._beta
        x = ops.convert_to_tensor(x)
        x = control_flow_ops.with_dependencies(
            [check_ops.assert_positive(x)] if self.strict else [],
            x)
        contrib_tensor_util.assert_same_float_dtype(tensors=[x,],
                                                    dtype=self.dtype)

        return (alpha * math_ops.log(beta) + (alpha - 1) * math_ops.log(x) -
                beta * x - math_ops.lgamma(self._alpha))
예제 #19
0
  def mode(self, name="mode"):
    """Mode of each batch member.

    The mode of a gamma distribution is `(alpha - 1) / beta` when `alpha > 1`,
    and `NaN` otherwise.  If `self.strict_statistics` is `True`, an exception
    will be raised rather than returning `NaN`.

    Args:
      name:  A name to give this op.

    Returns:
      The mode for every batch member, a `Tensor` with same `dtype` as self.
    """
    alpha = self._alpha
    beta = self._beta
    with ops.name_scope(self.name):
      with ops.op_scope([alpha, beta], name):
        mode_if_defined = (alpha - 1.0) / beta
        if self.strict_statistics:
          one = ops.convert_to_tensor(1.0, dtype=self.dtype)
          return control_flow_ops.with_dependencies(
              [check_ops.assert_less(one, alpha)], mode_if_defined)
        else:
          alpha_ge_1 = alpha >= 1.0
          nan = np.nan * self._ones()
          return math_ops.select(alpha_ge_1, mode_if_defined, nan)
예제 #20
0
def maybe_check_quadrature_param(param, name, validate_args):
  """Helper which checks validity of `loc` and `scale` init args."""
  with ops.name_scope(name="check_" + name, values=[param]):
    assertions = []
    if param.shape.ndims is not None:
      if param.shape.ndims == 0:
        raise ValueError("Mixing params must be a (batch of) vector; "
                         "{}.rank={} is not at least one.".format(
                             name, param.shape.ndims))
    elif validate_args:
      assertions.append(check_ops.assert_rank_at_least(
          param, 1,
          message=("Mixing params must be a (batch of) vector; "
                   "{}.rank is not at least one.".format(
                       name))))

    # TODO(jvdillon): Remove once we support k-mixtures.
    if param.shape.with_rank_at_least(1)[-1] is not None:
      if param.shape[-1].value != 1:
        raise NotImplementedError("Currently only bimixtures are supported; "
                                  "{}.shape[-1]={} is not 1.".format(
                                      name, param.shape[-1].value))
    elif validate_args:
      assertions.append(check_ops.assert_equal(
          array_ops.shape(param)[-1], 1,
          message=("Currently only bimixtures are supported; "
                   "{}.shape[-1] is not 1.".format(name))))

    if assertions:
      return control_flow_ops.with_dependencies(assertions, param)
    return param
예제 #21
0
  def mode(self, name="mode"):
    """Mode of the distribution.

    Note that the mode for the Beta distribution is only defined
    when `alpha > 1`. This returns the mode when `alpha > 1`,
    and NaN otherwise. If `self.allow_nan_stats` is `False`, an exception
    will be raised rather than returning `NaN`.

    Args:
      name: The name for this op.

    Returns:
      Mode of the Dirichlet distribution.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self._alpha, self._alpha_0], name):
        one = constant_op.constant(1, self.dtype)
        mode = (self._alpha - 1)/ (
            array_ops.expand_dims(self._alpha_0, -1) - math_ops.cast(
                self.event_shape()[0], self.dtype))

        if self.allow_nan_stats:
          return math_ops.select(
              math_ops.greater(self._alpha, 1),
              mode,
              (constant_op.constant(float("NaN"), dtype=self.dtype) *
               array_ops.ones_like(self._alpha, dtype=self.dtype)))
        else:
          return control_flow_ops.with_dependencies([
              check_ops.assert_less(
                  one, self._alpha,
                  message="mode not defined for components of alpha <= 1")
          ], mode)
예제 #22
0
 def testShape(self):
   with ops.Graph().as_default():
     tensor = tf.constant([1.0, 2.0])
     self.assertEquals([2], tensor.get_shape())
     self.assertEquals([2],
                       control_flow_ops.with_dependencies(
                           [tf.constant(1.0)], tensor).get_shape())
예제 #23
0
 def _maybe_assert_valid_x(self, x):
   if not self.validate_args:
     return x
   is_valid = check_ops.assert_non_negative(
       x,
       message="Forward transformation input must be at least {}.".format(0))
   return control_flow_ops.with_dependencies([is_valid], x)
예제 #24
0
 def testComputeMovingVars(self):
   height, width = 3, 3
   with self.test_session() as sess:
     image_shape = (10, height, width, 3)
     image_values = np.random.rand(*image_shape)
     expected_mean = np.mean(image_values, axis=(0, 1, 2))
     expected_var = np.var(image_values, axis=(0, 1, 2))
     images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
     output = ops.batch_norm(images, decay=0.1)
     update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
     with tf.control_dependencies(update_ops):
       barrier = tf.no_op(name='gradient_barrier')
       output = control_flow_ops.with_dependencies([barrier], output)
     # Initialize all variables
     sess.run(tf.global_variables_initializer())
     moving_mean = variables.get_variables('BatchNorm/moving_mean')[0]
     moving_variance = variables.get_variables('BatchNorm/moving_variance')[0]
     mean, variance = sess.run([moving_mean, moving_variance])
     # After initialization moving_mean == 0 and moving_variance == 1.
     self.assertAllClose(mean, [0] * 3)
     self.assertAllClose(variance, [1] * 3)
     for _ in range(10):
       sess.run([output])
     mean = moving_mean.eval()
     variance = moving_variance.eval()
     # After 10 updates with decay 0.1 moving_mean == expected_mean and
     # moving_variance == expected_var.
     self.assertAllClose(mean, expected_mean)
     self.assertAllClose(variance, expected_var)
예제 #25
0
 def testReuseVars(self):
   height, width = 3, 3
   with self.test_session() as sess:
     image_shape = (10, height, width, 3)
     image_values = np.random.rand(*image_shape)
     expected_mean = np.mean(image_values, axis=(0, 1, 2))
     expected_var = np.var(image_values, axis=(0, 1, 2))
     images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
     output = ops.batch_norm(images, decay=0.1, is_training=False)
     update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
     with tf.control_dependencies(update_ops):
       barrier = tf.no_op(name='gradient_barrier')
       output = control_flow_ops.with_dependencies([barrier], output)
     # Initialize all variables
     sess.run(tf.global_variables_initializer())
     moving_mean = variables.get_variables('BatchNorm/moving_mean')[0]
     moving_variance = variables.get_variables('BatchNorm/moving_variance')[0]
     mean, variance = sess.run([moving_mean, moving_variance])
     # After initialization moving_mean == 0 and moving_variance == 1.
     self.assertAllClose(mean, [0] * 3)
     self.assertAllClose(variance, [1] * 3)
     # Simulate assigment from saver restore.
     init_assigns = [tf.assign(moving_mean, expected_mean),
                     tf.assign(moving_variance, expected_var)]
     sess.run(init_assigns)
     for _ in range(10):
       sess.run([output], {images: np.random.rand(*image_shape)})
     mean = moving_mean.eval()
     variance = moving_variance.eval()
     # Although we feed different images, the moving_mean and moving_variance
     # shouldn't change.
     self.assertAllClose(mean, expected_mean)
     self.assertAllClose(variance, expected_var)
예제 #26
0
파일: util.py 프로젝트: TalkingData/edward
def cumprod(xs):
    """Cumulative product of a tensor along its outer dimension.

    https://github.com/tensorflow/tensorflow/issues/813

    Parameters
    ----------
    xs : tf.Tensor
        A 1-D or higher tensor.

    Returns
    -------
    tf.Tensor
        A tensor with `cumprod` applied along its outer dimension.

    Raises
    ------
    InvalidArgumentError
        If the input has Inf or NaN values.
    """
    dependencies = [tf.verify_tensor_all_finite(xs, msg='')]
    xs = control_flow_ops.with_dependencies(dependencies, xs)
    xs = tf.cast(xs, dtype=tf.float32)

    values = tf.unpack(xs)
    out = []
    prev = tf.ones_like(values[0])
    for val in values:
        s = prev * val
        out.append(s)
        prev = s

    result = tf.pack(out)
    return result
예제 #27
0
  def __init__(self, event_ndims=0, validate_args=False, name="absolute_value"):
    """Instantiates the `AbsoluteValue` bijector.

    Args:
      event_ndims: Python scalar indicating the number of dimensions associated
        with a particular draw from the distribution.  Currently only zero is
        supported.
      validate_args: Python `bool` indicating whether arguments should be
        checked for correctness.
      name: Python `str` name given to ops managed by this object.

    Raises:
      ValueError:  If `event_ndims` is not zero.
    """
    self._graph_parents = []
    self._name = name

    event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
    event_ndims_const = tensor_util.constant_value(event_ndims)
    if event_ndims_const is not None and event_ndims_const not in (0,):
      raise ValueError("event_ndims(%s) was not 0" % event_ndims_const)
    else:
      if validate_args:
        event_ndims = control_flow_ops.with_dependencies(
            [check_ops.assert_equal(
                event_ndims, 0, message="event_ndims was not 0")],
            event_ndims)

    with self._name_scope("init"):
      super(AbsoluteValue, self).__init__(
          event_ndims=event_ndims,
          validate_args=validate_args,
          name=name)
예제 #28
0
 def _entropy(self):
   probs = self._probs
   if self.validate_args:
     probs = control_flow_ops.with_dependencies(
         [check_ops.assert_less(
             probs,
             constant_op.constant(1., probs.dtype),
             message="Entropy is undefined when logits = inf or probs = 1.")],
         probs)
   # Claim: entropy(p) = softplus(s)/p - s
   # where s=logits and p=probs.
   #
   # Proof:
   #
   # entropy(p)
   # := -[(1-p)log(1-p) + plog(p)]/p
   # = -[log(1-p) + plog(p/(1-p))]/p
   # = -[-softplus(s) + ps]/p
   # = softplus(s)/p - s
   #
   # since,
   # log[1-sigmoid(s)]
   # = log[1/(1+exp(s)]
   # = -log[1+exp(s)]
   # = -softplus(s)
   #
   # using the fact that,
   # 1-sigmoid(s) = sigmoid(-s) = 1/(1+exp(s))
   return nn.softplus(self.logits) / probs - self.logits
예제 #29
0
  def _check_batch_shape_possibly_add_asserts(self):
    """Static check of init arg `batch_shape`, possibly add asserts."""
    if self._batch_shape_arg is None:
      return

    # Possibly add asserts
    if self._assert_proper_shapes:
      self._batch_shape_arg = control_flow_ops.with_dependencies(
          [
              check_ops.assert_rank(
                  self._batch_shape_arg,
                  1,
                  message="Argument batch_shape must be a 1-D Tensor."),
              check_ops.assert_non_negative(
                  self._batch_shape_arg,
                  message="Argument batch_shape must be non-negative."),
          ],
          self._batch_shape_arg)

    # Static checks
    if not self._batch_shape_arg.dtype.is_integer:
      raise TypeError("Argument batch_shape must be integer type.  Found:"
                      " %s" % self._batch_shape_arg)

    if self._batch_shape_static is None:
      return  # Cannot do any other static checks.

    if self._batch_shape_static.ndim != 1:
      raise ValueError("Argument batch_shape must be a 1-D Tensor.  Found:"
                       " %s" % self._batch_shape_static)

    if np.any(self._batch_shape_static < 0):
      raise ValueError("Argument batch_shape must be non-negative.  Found:"
                       "%s" % self._batch_shape_static)
예제 #30
0
파일: util.py 프로젝트: TalkingData/edward
def log_sum_exp(input_tensor, reduction_indices=None, keep_dims=False):
    """Compute the ``log_sum_exp`` of elements in a tensor, taking
    the sum across axes given by ``reduction_indices``.

    Parameters
    ----------
    input_tensor : tf.Tensor
        The tensor to reduce. Should have numeric type.
    reduction_indices : int or list of int, optional
        The dimensions to reduce. If `None` (the default), reduces all
        dimensions.
    keep_dims : bool, optional
        If true, retains reduced dimensions with length 1.

    Returns
    -------
    tf.Tensor
        The reduced tensor.

    Raises
    ------
    InvalidArgumentError
        If the input has Inf or NaN values.
    """
    dependencies = [tf.verify_tensor_all_finite(input_tensor, msg='')]
    input_tensor = control_flow_ops.with_dependencies(dependencies, input_tensor);
    input_tensor = tf.cast(input_tensor, dtype=tf.float32)

    x_max = tf.reduce_max(input_tensor, reduction_indices, keep_dims=True)
    return tf.squeeze(x_max) + tf.log(tf.reduce_sum(
        tf.exp(input_tensor - x_max), reduction_indices, keep_dims))
예제 #31
0
  def __init__(self,
               df,
               scale_operator,
               cholesky_input_output_matrices=False,
               validate_args=False,
               allow_nan_stats=True,
               name=None):
    """Construct Wishart distributions.

    Args:
      df: `float` or `double` tensor, the degrees of freedom of the
        distribution(s). `df` must be greater than or equal to `k`.
      scale_operator: `float` or `double` instance of `LinearOperator`.
      cholesky_input_output_matrices: Python `bool`. Any function which whose
        input or output is a matrix assumes the input is Cholesky and returns a
        Cholesky factored matrix. Example `log_prob` input takes a Cholesky and
        `sample_n` returns a Cholesky when
        `cholesky_input_output_matrices=True`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      TypeError: if scale is not floating-type
      TypeError: if scale.dtype != df.dtype
      ValueError: if df < k, where scale operator event shape is
        `(k, k)`
    """
    parameters = dict(locals())
    self._cholesky_input_output_matrices = cholesky_input_output_matrices
    with ops.name_scope(name) as name:
      with ops.name_scope("init", values=[df, scale_operator]):
        if not scale_operator.dtype.is_floating:
          raise TypeError(
              "scale_operator.dtype=%s is not a floating-point type" %
              scale_operator.dtype)
        if not scale_operator.is_square:
          print(scale_operator.to_dense().eval())
          raise ValueError("scale_operator must be square.")

        self._scale_operator = scale_operator
        self._df = ops.convert_to_tensor(
            df,
            dtype=scale_operator.dtype,
            name="df")
        contrib_tensor_util.assert_same_float_dtype(
            (self._df, self._scale_operator))
        if (self._scale_operator.shape.ndims is None or
            self._scale_operator.shape.dims[-1].value is None):
          self._dimension = math_ops.cast(
              self._scale_operator.domain_dimension_tensor(),
              dtype=self._scale_operator.dtype, name="dimension")
        else:
          self._dimension = ops.convert_to_tensor(
              self._scale_operator.shape.dims[-1].value,
              dtype=self._scale_operator.dtype, name="dimension")
        df_val = tensor_util.constant_value(self._df)
        dim_val = tensor_util.constant_value(self._dimension)
        if df_val is not None and dim_val is not None:
          df_val = np.asarray(df_val)
          if not df_val.shape:
            df_val = [df_val]
          if any(df_val < dim_val):
            raise ValueError(
                "Degrees of freedom (df = %s) cannot be less than "
                "dimension of scale matrix (scale.dimension = %s)"
                % (df_val, dim_val))
        elif validate_args:
          assertions = check_ops.assert_less_equal(
              self._dimension, self._df,
              message=("Degrees of freedom (df = %s) cannot be "
                       "less than dimension of scale matrix "
                       "(scale.dimension = %s)" %
                       (self._dimension, self._df)))
          self._df = control_flow_ops.with_dependencies(
              [assertions], self._df)
    super(_WishartLinearOperator, self).__init__(
        dtype=self._scale_operator.dtype,
        validate_args=validate_args,
        allow_nan_stats=allow_nan_stats,
        reparameterization_type=distribution.FULLY_REPARAMETERIZED,
        parameters=parameters,
        graph_parents=([self._df, self._dimension] +
                       self._scale_operator.graph_parents),
        name=name)
예제 #32
0
def create_clones(batch_queue):        
    with tf.device('/cpu:0'):
        global_step = slim.create_global_step()
        learning_rate = tf.constant(FLAGS.learning_rate, name='learning_rate')
        tf.summary.scalar('learning_rate', learning_rate)
        optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=FLAGS.momentum, name='Momentum')
        
    # place clones
    seglink_loss = 0; # for summary only
    gradients = []
    for clone_idx, gpu in enumerate(config.gpus):
        do_summary = clone_idx == 0 # only summary on the first clone
        with tf.variable_scope(tf.get_variable_scope(), reuse = True):# the variables has been created in config.init_config
            with tf.name_scope(config.clone_scopes[clone_idx]) as clone_scope:
                with tf.device(gpu) as clone_device:
                    b_image, b_seg_label, b_seg_loc, b_link_label = batch_queue.dequeue()
                    net = seglink_symbol.SegLinkNet(inputs = b_image, data_format = config.data_format)
                    
                    # build seglink loss
                    net.build_loss(seg_labels = b_seg_label, 
                                   seg_offsets = b_seg_loc, 
                                   link_labels = b_link_label,
                                   do_summary = do_summary)
                    
                    
                    # gather seglink losses
                    losses = tf.get_collection(tf.GraphKeys.LOSSES, clone_scope)
                    assert len(losses) ==  3  # 3 is the number of seglink losses: seg_cls, seg_loc, link_cls
                    total_clone_loss = tf.add_n(losses) / config.num_clones
                    seglink_loss = seglink_loss + total_clone_loss

                    # gather regularization loss and add to clone_0 only
                    if clone_idx == 0:
                        regularization_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
                        total_clone_loss = total_clone_loss + regularization_loss
                    
                    # compute clone gradients
                    clone_gradients = optimizer.compute_gradients(total_clone_loss)# all variables will be updated.
                    gradients.append(clone_gradients)
                    
    tf.summary.scalar('seglink_loss', seglink_loss)
    tf.summary.scalar('regularization_loss', regularization_loss)
    
    # add all gradients together
    # note that the gradients do not need to be averaged, because the average operation has been done on loss.
    averaged_gradients = sum_gradients(gradients)
    
    update_op = optimizer.apply_gradients(averaged_gradients, global_step=global_step)
    
    train_ops = [update_op]
    
    # moving average
    if FLAGS.using_moving_average:
        tf.logging.info('using moving average in training, \
        with decay = %f'%(FLAGS.moving_average_decay))
        ema = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay)
        ema_op = ema.apply(tf.trainable_variables())
        with tf.control_dependencies([update_op]):# ema after updating
            train_ops.append(tf.group(ema_op))
            
    train_op = control_flow_ops.with_dependencies(train_ops, seglink_loss, name='train_op')
    return train_op
예제 #33
0
             args.learning_rate,
             global_step,
             args.learning_rate_decay_steps,
             args.learning_rate_decay,
             staircase=True,
             name='exponential_decay_learning_rate')
         optimizer = tf.train.AdamOptimizer(learning_rate)
     variables_to_train = tf.trainable_variables()
     total_loss, clones_gradients = model_deploy.optimize_clones(
         clones, optimizer, var_list=variables_to_train)
     grad_updates = optimizer.apply_gradients(clones_gradients,
                                              global_step=global_step)
     update_ops.append(grad_updates)
     update_op = tf.group(*update_ops)
     train_tensor = control_flow_ops.with_dependencies([update_op],
                                                       total_loss,
                                                       name='train_op')
 with tf.name_scope('summaries'):
     end_points = clones[0].outputs
     for end_point in end_points:
         x = end_points[end_point]
         summaries.add(tf.histogram_summary('activations/' + end_point, x))
         summaries.add(
             tf.scalar_summary('sparsity/' + end_point,
                               tf.nn.zero_fraction(x)))
     for variable in slim.get_model_variables():
         summaries.add(tf.histogram_summary(variable.op.name, variable))
     summaries.add(
         tf.scalar_summary('learning_rate',
                           learning_rate,
                           name='learning_rate'))
 def _log_prob(self, x):
     x = control_flow_ops.with_dependencies(
         [check_ops.assert_positive(x)] if self.validate_args else [], x)
     return (self.alpha * math_ops.log(self.beta) -
             math_ops.lgamma(self.alpha) -
             (self.alpha + 1.) * math_ops.log(x) - self.beta / x)
 def _cdf(self, x):
     x = control_flow_ops.with_dependencies(
         [check_ops.assert_positive(x)] if self.validate_args else [], x)
     # Note that igammac returns the upper regularized incomplete gamma
     # function Q(a, x), which is what we want for the CDF.
     return math_ops.igammac(self.alpha, self.beta / x)
예제 #36
0
def build_graph(top_k):
    keep_prob = tf.placeholder(dtype=tf.float32, shape=[],
                               name='keep_prob')  # dropout打开概率
    images = tf.placeholder(dtype=tf.float32,
                            shape=[None, 64, 64, 1],
                            name='image_batch')
    labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')
    is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag')
    with tf.device(tf.test.gpu_device_name()):
        # network: conv2d->max_pool2d->conv2d->max_pool2d->conv2d->max_pool2d->conv2d->conv2d->
        # max_pool2d->fully_connected->fully_connected
        #给slim.conv2d和slim.fully_connected准备了默认参数:batch_norm
        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            normalizer_fn=slim.batch_norm,
                            normalizer_params={'is_training': is_training}):
            conv3_1 = slim.conv2d(images,
                                  64, [3, 3],
                                  1,
                                  padding='SAME',
                                  scope='conv3_1')
            max_pool_1 = slim.max_pool2d(conv3_1, [2, 2], [2, 2],
                                         padding='SAME',
                                         scope='pool1')
            conv3_2 = slim.conv2d(max_pool_1,
                                  128, [3, 3],
                                  padding='SAME',
                                  scope='conv3_2')
            max_pool_2 = slim.max_pool2d(conv3_2, [2, 2], [2, 2],
                                         padding='SAME',
                                         scope='pool2')
            conv3_3 = slim.conv2d(max_pool_2,
                                  256, [3, 3],
                                  padding='SAME',
                                  scope='conv3_3')
            max_pool_3 = slim.max_pool2d(conv3_3, [2, 2], [2, 2],
                                         padding='SAME',
                                         scope='pool3')
            conv3_4 = slim.conv2d(max_pool_3,
                                  512, [3, 3],
                                  padding='SAME',
                                  scope='conv3_4')
            conv3_5 = slim.conv2d(conv3_4,
                                  512, [3, 3],
                                  padding='SAME',
                                  scope='conv3_5')
            max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2],
                                         padding='SAME',
                                         scope='pool4')

            flatten = slim.flatten(max_pool_4)
            fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob),
                                       1000,
                                       activation_fn=tf.nn.relu,
                                       scope='fc1')
            logits = slim.fully_connected(slim.dropout(fc1, keep_prob),
                                          FLAGS.charset_size,
                                          activation_fn=None,
                                          scope='fc2')
        # 因为我们没有做热编码,所以使用sparse_softmax_cross_entropy_with_logits
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                           labels=labels))
        accuracy = tf.reduce_mean(
            tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32))

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if update_ops:
            updates = tf.group(*update_ops)
            loss = control_flow_ops.with_dependencies([updates], loss)

        global_step = tf.get_variable("step", [],
                                      initializer=tf.constant_initializer(0.0),
                                      trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
        train_op = slim.learning.create_train_op(loss,
                                                 optimizer,
                                                 global_step=global_step)
        probabilities = tf.nn.softmax(logits)

        # 绘制loss accuracy曲线
        tf.summary.scalar('loss', loss)
        tf.summary.scalar('accuracy', accuracy)
        merged_summary_op = tf.summary.merge_all()
        # 返回top k 个预测结果及其概率;返回top K accuracy
        predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities,
                                                                 k=top_k)
        accuracy_in_top_k = tf.reduce_mean(
            tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32))

    return {
        'images': images,
        'labels': labels,
        'keep_prob': keep_prob,
        'top_k': top_k,
        'global_step': global_step,
        'train_op': train_op,
        'loss': loss,
        'is_training': is_training,
        'accuracy': accuracy,
        'accuracy_top_k': accuracy_in_top_k,
        'merged_summary_op': merged_summary_op,
        'predicted_distribution': probabilities,
        'predicted_index_top_k': predicted_index_top_k,
        'predicted_val_top_k': predicted_val_top_k
    }
예제 #37
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        ######################
        # Config model_deploy#
        ######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir,
                                              splits_to_sizes={
                                                  'train':
                                                  FLAGS.num_train_samples,
                                                  'validation':
                                                  FLAGS.num_validation_samples,
                                                  'test':
                                                  FLAGS.num_test_samples
                                              })

        ####################
        # Select the network #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image, label, bbox] = provider.get(['image', 'label', 'bbox'])
            label -= FLAGS.labels_offset
            bbx = tf.reshape(bbox, [1, 1, 4])

            train_image_size = FLAGS.train_image_size or network_fn.default_image_size

            image = image_preprocessing_fn(image,
                                           train_image_size,
                                           train_image_size,
                                           bbox=bbx)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - FLAGS.labels_offset)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images, labels = batch_queue.dequeue()
            logits, end_points = network_fn(images)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                slim.losses.softmax_cross_entropy(
                    end_points['AuxLogits'],
                    labels,
                    label_smoothing=FLAGS.label_smoothing,
                    weight=0.4,
                    scope='aux_loss')
            slim.losses.softmax_cross_entropy(
                logits,
                labels,
                label_smoothing=FLAGS.label_smoothing,
                weight=1.0)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.histogram_summary('activations/' + end_point, x))
            summaries.add(
                tf.scalar_summary('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.scalar_summary('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.histogram_summary(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(
                tf.scalar_summary('learning_rate',
                                  learning_rate,
                                  name='learning_rate'))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables,
                replica_id=tf.constant(FLAGS.task, tf.int32, shape=()),
                total_num_replicas=FLAGS.worker_replicas)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(
            tf.scalar_summary('total_loss', total_loss, name='total_loss'))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op],
                                                          total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.merge_summary(list(summaries), name='summary_op')

        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master=FLAGS.master,
            is_chief=(FLAGS.task == 0),
            init_fn=_get_init_fn(),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            save_interval_secs=FLAGS.save_interval_secs,
            sync_optimizer=optimizer if FLAGS.sync_replicas else None)
예제 #38
0
    def __init__(self,
                 df,
                 scale=None,
                 scale_tril=None,
                 input_output_cholesky=False,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="Wishart"):
        """Construct Wishart distributions.

    Args:
      df: `float` or `double` `Tensor`. Degrees of freedom, must be greater than
        or equal to dimension of the scale matrix.
      scale: `float` or `double` `Tensor`. The symmetric positive definite
        scale matrix of the distribution. Exactly one of `scale` and
        'scale_tril` must be passed.
      scale_tril: `float` or `double` `Tensor`. The Cholesky factorization
        of the symmetric positive definite scale matrix of the distribution.
        Exactly one of `scale` and 'scale_tril` must be passed.
      input_output_cholesky: Python `bool`. If `True`, functions whose input or
        output have the semantics of samples assume inputs are in Cholesky form
        and return outputs in Cholesky form. In particular, if this flag is
        `True`, input to `log_prob` is presumed of Cholesky form and output from
        `sample`, `mean`, and `mode` are of Cholesky form.  Setting this
        argument to `True` is purely a computational optimization and does not
        change the underlying distribution; for instance, `mean` returns the
        Cholesky of the mean, not the mean of Cholesky factors. The `variance`
        and `stddev` methods are unaffected by this flag.
        Default value: `False` (i.e., input/output does not have Cholesky
        semantics).
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    Raises:
      ValueError: if zero or both of 'scale' and 'scale_tril' are passed in.
    """
        parameters = dict(locals())

        with tf.name_scope(name, values=[scale, scale_tril]) as name:
            with tf.name_scope("init", values=[scale, scale_tril]):
                if (scale is None) == (scale_tril is None):
                    raise ValueError(
                        "Must pass scale or scale_tril, but not both.")

                if scale is not None:
                    scale = tf.convert_to_tensor(scale)
                    if validate_args:
                        scale = distribution_util.assert_symmetric(scale)
                    scale_tril = tf.cholesky(scale)
                else:  # scale_tril is not None
                    scale_tril = tf.convert_to_tensor(scale_tril)
                    if validate_args:
                        scale_tril = control_flow_ops.with_dependencies([
                            tf.assert_positive(
                                tf.matrix_diag_part(scale_tril),
                                message="scale_tril must be positive definite"
                            ),
                            tf.assert_equal(
                                tf.shape(scale_tril)[-1],
                                tf.shape(scale_tril)[-2],
                                message="scale_tril must be square")
                        ], scale_tril)

            super(Wishart, self).__init__(
                df=df,
                scale_operator=tf.linalg.LinearOperatorLowerTriangular(
                    tril=scale_tril,
                    is_non_singular=True,
                    is_positive_definite=True,
                    is_square=True),
                input_output_cholesky=input_output_cholesky,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                name=name)
        self._parameters = parameters
예제 #39
0
    def __init__(self,
                 df,
                 scale_operator,
                 input_output_cholesky=False,
                 validate_args=False,
                 allow_nan_stats=True,
                 name=None):
        """Construct Wishart distributions.

    Args:
      df: `float` or `double` tensor, the degrees of freedom of the
        distribution(s). `df` must be greater than or equal to `k`.
      scale_operator: `float` or `double` instance of `LinearOperator`.
      input_output_cholesky: Python `bool`. If `True`, functions whose input or
        output have the semantics of samples assume inputs are in Cholesky form
        and return outputs in Cholesky form. In particular, if this flag is
        `True`, input to `log_prob` is presumed of Cholesky form and output from
        `sample`, `mean`, and `mode` are of Cholesky form.  Setting this
        argument to `True` is purely a computational optimization and does not
        change the underlying distribution; for instance, `mean` returns the
        Cholesky of the mean, not the mean of Cholesky factors. The `variance`
        and `stddev` methods are unaffected by this flag.
        Default value: `False` (i.e., input/output does not have Cholesky
        semantics).
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      TypeError: if scale is not floating-type
      TypeError: if scale.dtype != df.dtype
      ValueError: if df < k, where scale operator event shape is
        `(k, k)`
    """
        parameters = dict(locals())
        self._input_output_cholesky = input_output_cholesky
        with tf.name_scope(name) as name:
            with tf.name_scope("init", values=[df, scale_operator]):
                if not scale_operator.dtype.is_floating:
                    raise TypeError(
                        "scale_operator.dtype=%s is not a floating-point type"
                        % scale_operator.dtype)
                if not scale_operator.is_square:
                    print(scale_operator.to_dense().eval())
                    raise ValueError("scale_operator must be square.")

                self._scale_operator = scale_operator
                self._df = tf.convert_to_tensor(df,
                                                dtype=scale_operator.dtype,
                                                name="df")
                contrib_tensor_util.assert_same_float_dtype(
                    (self._df, self._scale_operator))
                if (self._scale_operator.shape.ndims is None
                        or self._scale_operator.shape[-1].value is None):
                    self._dimension = tf.cast(
                        self._scale_operator.domain_dimension_tensor(),
                        dtype=self._scale_operator.dtype,
                        name="dimension")
                else:
                    self._dimension = tf.convert_to_tensor(
                        self._scale_operator.shape[-1].value,
                        dtype=self._scale_operator.dtype,
                        name="dimension")
                df_val = tensor_util.constant_value(self._df)
                dim_val = tensor_util.constant_value(self._dimension)
                if df_val is not None and dim_val is not None:
                    df_val = np.asarray(df_val)
                    if not df_val.shape:
                        df_val = [df_val]
                    if any(df_val < dim_val):
                        raise ValueError(
                            "Degrees of freedom (df = %s) cannot be less than "
                            "dimension of scale matrix (scale.dimension = %s)"
                            % (df_val, dim_val))
                elif validate_args:
                    assertions = tf.assert_less_equal(
                        self._dimension,
                        self._df,
                        message=("Degrees of freedom (df = %s) cannot be "
                                 "less than dimension of scale matrix "
                                 "(scale.dimension = %s)" %
                                 (self._dimension, self._df)))
                    self._df = control_flow_ops.with_dependencies([assertions],
                                                                  self._df)
        super(_WishartLinearOperator, self).__init__(
            dtype=self._scale_operator.dtype,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            reparameterization_type=tf.distributions.FULLY_REPARAMETERIZED,
            parameters=parameters,
            graph_parents=([self._df, self._dimension] +
                           self._scale_operator.graph_parents),
            name=name)
예제 #40
0
def squeeze(input: ragged_tensor.Ragged, axis=None, name=None):  # pylint: disable=redefined-builtin
    """Ragged compatible squeeze.

  If `input` is a `tf.Tensor`, then this calls `tf.squeeze`.

  If `input` is a `tf.RaggedTensor`, then this operation takes `O(N)` time,
  where `N` is the number of elements in the squeezed dimensions.

  Args:
    input: A potentially ragged tensor. The input to squeeze.
    axis: An optional list of ints. Defaults to `None`. If the `input` is
      ragged, it only squeezes the dimensions listed. It fails if `input` is
      ragged and axis is []. If `input` is not ragged it calls tf.squeeze. Note
      that it is an error to squeeze a dimension that is not 1. It must be in
      the range of [-rank(input), rank(input)).
   name: A name for the operation (optional).

  Returns:
    A potentially ragged tensor. Contains the same data as input,
    but has one or more dimensions of size 1 removed.
  """
    with ops.name_scope(name, 'RaggedSqueeze', [input]):
        input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input)
        if isinstance(input, ops.Tensor):
            return array_ops.squeeze(input, axis, name)

        if axis is None:
            raise ValueError('Ragged.squeeze must have an axis argument.')
        if isinstance(axis, int):
            axis = [axis]
        elif ((not isinstance(axis, (list, tuple)))
              or (not all(isinstance(d, int) for d in axis))):
            raise TypeError('Axis must be a list or tuple of integers.')

        dense_dims = []
        ragged_dims = []
        # Normalize all the dims in axis to be positive
        axis = [
            array_ops.get_positive_axis(d, input.shape.ndims, 'axis[%d]' % i,
                                        'rank(input)')
            for i, d in enumerate(axis)
        ]
        for dim in axis:
            if dim > input.ragged_rank:
                dense_dims.append(dim - input.ragged_rank)
            else:
                ragged_dims.append(dim)

        # Make sure the specified ragged dimensions are squeezable.
        assertion_list = []
        scalar_tensor_one = constant_op.constant(1,
                                                 dtype=input.row_splits.dtype)
        for i, r in enumerate(input.nested_row_lengths()):
            if i + 1 in ragged_dims:
                assertion_list.append(
                    control_flow_ops.Assert(
                        math_ops.reduce_all(
                            math_ops.equal(r, scalar_tensor_one)),
                        [
                            'the given axis (axis = %d) is not squeezable!' %
                            (i + 1)
                        ]))
        if 0 in ragged_dims:
            scalar_tensor_two = constant_op.constant(2, dtype=dtypes.int32)
            assertion_list.append(
                control_flow_ops.Assert(
                    math_ops.equal(array_ops.size(input.row_splits),
                                   scalar_tensor_two),
                    ['the given axis (axis = 0) is not squeezable!']))

        # Till now, we are sure that the ragged dimensions are squeezable.
        squeezed_rt = None
        squeezed_rt = control_flow_ops.with_dependencies(
            assertion_list, input.flat_values)

        if dense_dims:
            # Gives error if the dense dimension is not squeezable.
            squeezed_rt = array_ops.squeeze(squeezed_rt, dense_dims)

        remaining_row_splits = []
        remaining_row_splits = list()
        for i, row_split in enumerate(input.nested_row_splits):
            # each row_splits tensor is for dimension #(i+1) .
            if (i + 1) not in ragged_dims:
                remaining_row_splits.append(row_split)
        # Take care of the first row if it is to be squeezed.
        if remaining_row_splits and 0 in ragged_dims:
            remaining_row_splits.pop(0)

        squeezed_rt = RaggedTensor.from_nested_row_splits(
            squeezed_rt, remaining_row_splits)

        # Corner case: when removing all the ragged dimensions and the output is
        # a scalar tensor e.g. ragged.squeeze(ragged.constant([[[1]]])).
        if set(range(0, input.ragged_rank + 1)).issubset(set(ragged_dims)):
            squeezed_rt = array_ops.squeeze(squeezed_rt, [0], name)

        return squeezed_rt
예제 #41
0
def detection_model(features, labels, mode, params):
    num_classes = params['num_classes']
    initial_weights_path = params.get('initial_weights_path', '')
    log_dir = params['log_dir']
    collect_priors_summary = params['collect_priors_summary']

    data_format = params.get('data_format', 'NHWC')
    depth_multiplier = params.get('depth_multiplier', 1.0)
    priors_rule = params.get('priors_rule', 'caffe')
    custom_priors = params.get('priors', [])
    learning_rate = params.get('learning_rate', 0.01)
    steps_per_epoch = params.get('steps_per_epoch', 1)
    mobilenet_version = params.get('mobilenet_version', 'v2')
    weight_regularization = params.get('weight_regularization', 4e-5)
    optimizer_func = params.get(
        'optimizer', lambda learning_rate: tf.train.AdagradOptimizer(
            learning_rate=learning_rate))

    # Override default FileWriter. Don't store the graph definition.
    # pylint: disable=protected-access
    tf.summary.FileWriterCache._cache[log_dir] = tf.summary.FileWriter(
        log_dir, graph=None)

    if callable(learning_rate):
        learning_rate = learning_rate()

    is_training = mode == tf.estimator.ModeKeys.TRAIN

    ssd = MobileNetSSD(
        input_tensor=features,
        num_classes=num_classes,
        depth_multiplier=depth_multiplier,
        is_training=is_training,
        data_format=data_format,
        priors_rule=priors_rule,
        priors=custom_priors,
        mobilenet_version=mobilenet_version,
        weight_regularization=weight_regularization)  # 1. Build model

    if mode == tf.estimator.ModeKeys.PREDICT:
        decoded_predictions = ssd.detection_output(
            use_plain_caffe_format=False)
        return tf.estimator.EstimatorSpec(mode,
                                          predictions=decoded_predictions)

    assert mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL)
    targets = ssd.create_targets(labels)  # 2. Build GT from annotation

    if collect_priors_summary:
        with tf.name_scope('summary/'):
            assigned_priors = create_tensors_and_streaming_ops_for_assigned_priors(
                targets, ssd.priors_info, num_classes)
            detailed_assigned_priors = get_detailed_assigned_priors_summary_tf(
                assigned_priors, ssd.priors_info)

    loss_func = MultiboxLoss(neg_pos_ratio=3.0)  # 3. Build loss-object

    eval_iteration = tf.get_variable('eval_iteration',
                                     initializer=0,
                                     dtype=tf.int32,
                                     trainable=False)
    if mode == tf.estimator.ModeKeys.EVAL:
        eval_print_steps = steps_per_epoch // 50
        eval_print_steps = 1 if eval_print_steps == 0 else eval_print_steps

        every_eval_print_steps = tf.equal(
            tf.mod(eval_iteration + 1, eval_print_steps), 0)
        eval_iteration = tf.assign(eval_iteration, eval_iteration + 1)
        targets = with_dependencies([eval_iteration], targets)

        loss = loss_func.eval_summary(targets, ssd.predictions)
        loss = tf.cond(
            every_eval_print_steps, lambda: tf.Print(loss, [
                tf.round(100 * eval_iteration / steps_per_epoch), loss
            ], '[%][loss]: '), lambda: loss)

        eval_metric_ops = {}
        for key, val in loss_func.eval_tensors.items():
            eval_metric_ops['loss_function/' + key] = tf.metrics.mean(val)

        if collect_priors_summary:
            for key, metric_ops in assigned_priors.items(
            ):  # We need only update ops
                eval_metric_ops[key] = metric_ops

            for key, assigned_priors_tensor in detailed_assigned_priors.items(
            ):
                eval_metric_ops['prior_histogram/' +
                                key] = (assigned_priors_tensor, tf.no_op())

        decoded_predictions = ssd.detection_output(
            use_plain_caffe_format=False)
        eval_metric_ops['predictions'] = tf.contrib.metrics.streaming_concat(
            decoded_predictions)

        return tf.estimator.EstimatorSpec(mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)

    assert mode == tf.estimator.ModeKeys.TRAIN
    if initial_weights_path:
        tf.logging.info('Initialize from: ' + initial_weights_path)
        ssd.load_weights(initial_weights_path)

    bboxes = ssd._decode_boxes(ssd.predictions['locs'],
                               priors=ssd.priors[0, 0],
                               variance=ssd.priors[0, 1])
    loss = loss_func.loss(targets, ssd.predictions,
                          bboxes)  # 4. Compute loss with NMS

    if collect_priors_summary:
        with tf.name_scope('summary/'):
            loss = with_dependencies(
                [operation for key, (_, operation) in assigned_priors.items()],
                loss)

        for name, assigned_priors_tensor in detailed_assigned_priors.items():
            tf.summary.scalar(name, tf.reduce_sum(assigned_priors_tensor))

        py_func_ops = []
        priors_dir = os.path.join(log_dir, 'priors')

        with tf.name_scope('write_histogram'):
            every_epoch = tf.equal(
                tf.mod(tf.train.get_global_step() + 1, steps_per_epoch), 0)
            for name, (group, _) in assigned_priors.items():

                def write_hist2d():
                    # pylint: disable=cell-var-from-loop
                    return tf.py_func(write_histogram_2d_tf, [
                        group,
                        pickle.dumps(ssd.priors_info), name,
                        tf.train.get_global_step(), priors_dir
                    ], tf.bool)

                write_hist2d_once_per_epoch = tf.cond(every_epoch,
                                                      write_hist2d, tf.no_op)
                py_func_ops.append(write_hist2d_once_per_epoch)

            loss = with_dependencies(py_func_ops, loss)

    optimizer = optimizer_func(learning_rate)
    tf.summary.scalar('learning_rate', learning_rate)

    regularization_losses = tf.get_collection(
        tf.GraphKeys.REGULARIZATION_LOSSES)
    regularization_loss = tf.add_n(
        regularization_losses, name='loss_function/regularization_losses_sum')
    total_loss = tf.add(loss,
                        regularization_loss,
                        name='loss_function/total_loss')

    tf.summary.scalar('loss_function/regularization_loss', regularization_loss)

    with tf.variable_scope('train_loop'):
        train_op = optimizer.minimize(total_loss,
                                      global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          train_op=train_op)
예제 #42
0
    def _setup_model_loss(self, update_ops=None, num_classes=6):
        self.learning_rate_d = tf.placeholder(tf.float32,
                                              shape=[],
                                              name="learning_rate_placeholder")
        self.learning_rate_g = tf.placeholder(tf.float32,
                                              shape=[],
                                              name="learning_rate_placeholder")

        d_optimizer = self._optimizer(self.learning_rate_d,
                                      optname=self.cnf.get(
                                          'optname', 'momentum'),
                                      **self.cnf.get('opt_kwargs',
                                                     {'decay': 0.9}))
        g_optimizer = self._optimizer(self.learning_rate_g,
                                      optname=self.cnf.get(
                                          'optname', 'momentum'),
                                      **self.cnf.get('opt_kwargs',
                                                     {'decay': 0.9}))
        # Get images and labels for ImageNet and split the batch across GPUs.
        assert self.cnf['batch_size_train'] % self.cnf.get(
            'num_gpus', 1) == 0, (
                'Batch size must be divisible by number of GPUs')

        self.inputs = tf.placeholder(tf.float32,
                                     shape=(None, self.model.image_size[0],
                                            self.model.image_size[0], 3),
                                     name="input")
        self.labels = tf.placeholder(tf.int32, shape=(None, ))

        self._tower_loss_semi_supervised(self.inputs,
                                         self.labels,
                                         num_classes=num_classes)

        # global_update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
        global_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if update_ops is None:
            update_ops = global_update_ops
        else:
            update_ops = set(update_ops)
        # Make sure update_ops are computed before total_loss.
        if update_ops:
            with tf.control_dependencies(update_ops):
                barrier = tf.no_op(name='update_barrier')
                self.d_losses[-1] = control_flow_ops.with_dependencies(
                    [barrier], self.d_losses[-1])
                self.g_losses[-1] = control_flow_ops.with_dependencies(
                    [barrier], self.g_losses[-1])
                self.d_loss_real = control_flow_ops.with_dependencies(
                    [barrier], self.d_loss_real)
                self.d_loss_fake = control_flow_ops.with_dependencies(
                    [barrier], self.d_loss_fake)
                self.d_loss_class = control_flow_ops.with_dependencies(
                    [barrier], self.d_loss_class)
        t_vars = self._get_vars_semi_supervised()
        if self.clip_by_global_norm:
            self.capped_d_grads = self._clip_grad_global_norms(
                t_vars['d_vars'],
                self.d_losses[-1],
                d_optimizer,
                gradient_noise_scale=0.0)
            self.capped_g_grads = self._clip_grad_global_norms(
                t_vars['g_vars'],
                self.g_losses[-1],
                g_optimizer,
                gradient_noise_scale=0.0)
        else:
            self.capped_d_grads = self._clip_grad_norms(
                d_optimizer.compute_gradients(self.d_losses[-1],
                                              t_vars['d_vars']))
            self.capped_g_grads = self._clip_grad_norms(
                g_optimizer.compute_gradients(self.g_losses[-1],
                                              t_vars['g_vars']))
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        if self.gradient_multipliers is not None:
            with tf.name_scope('multiply_grads'):
                self.capped_d_grads = self._multiply_gradients(
                    self.capped_d_grads, self.gradient_multipliers)
        apply_d_gradient_op = d_optimizer.apply_gradients(
            self.capped_d_grads, global_step=global_step)
        apply_g_gradient_op = g_optimizer.apply_gradients(
            self.capped_g_grads, global_step=global_step)
        self.train_op_d = control_flow_ops.with_dependencies(
            [apply_d_gradient_op], self.d_losses[-1])
        self.train_op_g = control_flow_ops.with_dependencies(
            [apply_g_gradient_op], self.g_losses[-1])
예제 #43
0
def create_train_op(total_loss,
                    optimizer,
                    global_step=None,
                    update_ops=None,
                    variables_to_train=None,
                    clip_gradient_norm=0,
                    summarize_gradients=False,
                    gate_gradients=tf_optimizer.Optimizer.GATE_OP,
                    aggregation_method=None,
                    colocate_gradients_with_ops=False,
                    gradient_multipliers=None):
    """Creates an `Operation` that evaluates the gradients and returns the loss.

  Args:
    total_loss: A `Tensor` representing the total loss.
    optimizer: A tf.Optimizer to use for computing the gradients.
    global_step: A `Tensor` representing the global step variable. If left as
      `None`, then slim.variables.global_step() is used.
    update_ops: an optional list of updates to execute. Note that the update_ops
      that are used are the union of those update_ops passed to the function and
      the value of slim.ops.GetUpdateOps(). Therefore, if `update_ops` is None,
      then the value of slim.ops.GetUpdateOps() is still used.
    variables_to_train: an optional list of variables to train. If None, it will
      default to all tf.trainable_variables().
    clip_gradient_norm: If greater than 0 then the gradients would be clipped
      by it.
    summarize_gradients: Whether or not add summaries for each gradient.
    gate_gradients: How to gate the computation of gradients. See tf.Optimizer.
    aggregation_method: Specifies the method used to combine gradient terms.
      Valid values are defined in the class `AggregationMethod`.
    colocate_gradients_with_ops: Whether or not to try colocating the gradients
      with the ops that generated them.
    gradient_multipliers: A dictionary of either `Variables` or `Variable` op
      names to the coefficient by which the associated gradient should be
      scaled.
  Returns:
    A `Tensor` that when evaluated, computes the gradients and returns the total
      loss value.
  """
    if global_step is None:
        global_step = variables.get_or_create_global_step()

    # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None.
    global_update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
    if update_ops is None:
        update_ops = global_update_ops
    else:
        update_ops = set(update_ops)
    if not global_update_ops.issubset(update_ops):
        logging.warning(
            'update_ops in create_train_op does not contain all the '
            ' update_ops in GraphKeys.UPDATE_OPS')

    # Make sure update_ops are computed before total_loss.
    if update_ops:
        with ops.control_dependencies(update_ops):
            barrier = control_flow_ops.no_op(name='update_barrier')
        total_loss = control_flow_ops.with_dependencies([barrier], total_loss)

    if variables_to_train is None:
        # Default to tf.trainable_variables()
        variables_to_train = tf_variables.trainable_variables()
    else:
        # Make sure that variables_to_train are in tf.trainable_variables()
        for v in variables_to_train:
            assert v in tf_variables.trainable_variables()

    assert variables_to_train

    # Create the gradients. Note that apply_gradients adds the gradient
    # computation to the current graph.
    grads = optimizer.compute_gradients(
        total_loss,
        variables_to_train,
        gate_gradients=gate_gradients,
        aggregation_method=aggregation_method,
        colocate_gradients_with_ops=colocate_gradients_with_ops)

    # Scale gradients.
    if gradient_multipliers:
        grads = multiply_gradients(grads, gradient_multipliers)

    # Clip gradients.
    if clip_gradient_norm > 0:
        grads = clip_gradient_norms(grads, clip_gradient_norm)

    # Summarize gradients.
    if summarize_gradients:
        add_gradients_summaries(grads)

    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(grads, global_step=global_step)

    # Make sure total_loss is valid.
    total_loss = array_ops.check_numerics(total_loss,
                                          'LossTensor is inf or nan')

    # Ensure the train_tensor computes grad_updates.
    return control_flow_ops.with_dependencies([grad_updates], total_loss)
예제 #44
0
  def _create_scale_operator(self, identity_multiplier, diag, tril,
                             perturb_diag, perturb_factor, shift,
                             validate_args):
    """Construct `scale` from various components.

    Args:
      identity_multiplier: floating point rank 0 `Tensor` representing a scaling
        done to the identity matrix.
      diag: Floating-point `Tensor` representing the diagonal matrix.
        `scale_diag` has shape [N1, N2, ...  k], which represents a k x k
        diagonal matrix.
      tril: Floating-point `Tensor` representing the diagonal matrix.
        `scale_tril` has shape [N1, N2, ...  k], which represents a k x k lower
        triangular matrix.
      perturb_diag: Floating-point `Tensor` representing the diagonal matrix of
        the low rank update.
      perturb_factor: Floating-point `Tensor` representing factor matrix.
      shift: Floating-point `Tensor` representing `shift in `scale @ X + shift`.
      validate_args: Python `bool` indicating whether arguments should be
        checked for correctness.

    Returns:
      scale. In the case of scaling by a constant, scale is a
      floating point `Tensor`. Otherwise, scale is a `LinearOperator`.

    Raises:
      ValueError: if all of `tril`, `diag` and `identity_multiplier` are `None`.
    """
    identity_multiplier = _as_tensor(identity_multiplier, "identity_multiplier")
    diag = _as_tensor(diag, "diag")
    tril = _as_tensor(tril, "tril")
    perturb_diag = _as_tensor(perturb_diag, "perturb_diag")
    perturb_factor = _as_tensor(perturb_factor, "perturb_factor")

    # If possible, use the low rank update to infer the shape of
    # the identity matrix, when scale represents a scaled identity matrix
    # with a low rank update.
    shape_hint = None
    if perturb_factor is not None:
      shape_hint = distribution_util.dimension_size(perturb_factor, axis=-2)

    if self._is_only_identity_multiplier:
      if validate_args:
        return control_flow_ops.with_dependencies(
            [check_ops.assert_none_equal(
                identity_multiplier,
                array_ops.zeros([], identity_multiplier.dtype),
                ["identity_multiplier should be non-zero."])],
            identity_multiplier)
      return identity_multiplier

    scale = distribution_util.make_tril_scale(
        loc=shift,
        scale_tril=tril,
        scale_diag=diag,
        scale_identity_multiplier=identity_multiplier,
        validate_args=validate_args,
        assert_positive=False,
        shape_hint=shape_hint)

    if perturb_factor is not None:
      return linalg.LinearOperatorLowRankUpdate(
          scale,
          u=perturb_factor,
          diag_update=perturb_diag,
          is_diag_update_positive=perturb_diag is None,
          is_non_singular=True,  # Implied by is_positive_definite=True.
          is_self_adjoint=True,
          is_positive_definite=True,
          is_square=True)

    return scale
예제 #45
0
def resize_image_bboxes_with_crop_or_pad(image, bboxes, target_height,
                                         target_width):
    """Crops and/or pads an image to a target width and height.
    Resizes an image to a target width and height by either centrally
    cropping the image or padding it evenly with zeros.

    If `width` or `height` is greater than the specified `target_width` or
    `target_height` respectively, this op centrally crops along that dimension.
    If `width` or `height` is smaller than the specified `target_width` or
    `target_height` respectively, this op centrally pads with 0 along that
    dimension.
    Args:
      image: 3-D tensor of shape `[height, width, channels]`
      target_height: Target height.
      target_width: Target width.
    Raises:
      ValueError: if `target_height` or `target_width` are zero or negative.
    Returns:
      Cropped and/or padded image of shape
        `[target_height, target_width, channels]`
    """
    with tf.name_scope('resize_with_crop_or_pad'):
        image = ops.convert_to_tensor(image, name='image')

        assert_ops = []
        assert_ops += _Check3DImage(image, require_static=False)
        assert_ops += _assert(target_width > 0, ValueError,
                              'target_width must be > 0.')
        assert_ops += _assert(target_height > 0, ValueError,
                              'target_height must be > 0.')

        image = control_flow_ops.with_dependencies(assert_ops, image)
        # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
        # Make sure our checks come first, so that error messages are clearer.
        if _is_tensor(target_height):
            target_height = control_flow_ops.with_dependencies(
                assert_ops, target_height)
        if _is_tensor(target_width):
            target_width = control_flow_ops.with_dependencies(
                assert_ops, target_width)

        def max_(x, y):
            if _is_tensor(x) or _is_tensor(y):
                return math_ops.maximum(x, y)
            else:
                return max(x, y)

        def min_(x, y):
            if _is_tensor(x) or _is_tensor(y):
                return math_ops.minimum(x, y)
            else:
                return min(x, y)

        def equal_(x, y):
            if _is_tensor(x) or _is_tensor(y):
                return math_ops.equal(x, y)
            else:
                return x == y

        height, width, _ = _ImageDimensions(image)
        width_diff = target_width - width
        offset_crop_width = max_(-width_diff // 2, 0)
        offset_pad_width = max_(width_diff // 2, 0)

        height_diff = target_height - height
        offset_crop_height = max_(-height_diff // 2, 0)
        offset_pad_height = max_(height_diff // 2, 0)

        # Maybe crop if needed.
        height_crop = min_(target_height, height)
        width_crop = min_(target_width, width)
        cropped = tf.image.crop_to_bounding_box(image, offset_crop_height,
                                                offset_crop_width, height_crop,
                                                width_crop)
        bboxes = bboxes_crop_or_pad(bboxes, height, width, -offset_crop_height,
                                    -offset_crop_width, height_crop,
                                    width_crop)
        # Maybe pad if needed.
        resized = tf.image.pad_to_bounding_box(cropped, offset_pad_height,
                                               offset_pad_width, target_height,
                                               target_width)
        bboxes = bboxes_crop_or_pad(bboxes, height_crop, width_crop,
                                    offset_pad_height, offset_pad_width,
                                    target_height, target_width)

        # In theory all the checks below are redundant.
        if resized.get_shape().ndims is None:
            raise ValueError('resized contains no shape.')

        resized_height, resized_width, _ = _ImageDimensions(resized)

        assert_ops = []
        assert_ops += _assert(equal_(resized_height, target_height),
                              ValueError, 'resized height is not correct.')
        assert_ops += _assert(equal_(resized_width, target_width), ValueError,
                              'resized width is not correct.')

        resized = control_flow_ops.with_dependencies(assert_ops, resized)
        return resized, bboxes
예제 #46
0
 def body(i):
     new_u = state_ops.assign_add(u, v)
     new_i = math_ops.add(i, 1)
     op = control_flow_ops.group(new_u)
     new_i = control_flow_ops.with_dependencies([op], new_i)
     return [new_i]
예제 #47
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  moving_average_decay=0.9,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None):
    """Given loss and parameters for optimizer, returns a training op.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of tf.Optimizer that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantion of tf.Optimizer sub-class
                 and have `compute_gradients` and `apply_gradients` functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float or `None`, clips gradients by this value.
    moving_average_decay: float or None, takes into account previous loss
                          to make learning smoother due to outliers.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: tf.train.exponential_decay.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
    with vs.variable_op_scope([loss, global_step], name, "OptimizeLoss"):
        # Update ops take UPDATE_OPS collection if not provided.
        update_ops = (set(update_ops or [])
                      or set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)))
        # Make sure update ops are ran before computing loss.
        if update_ops:
            with ops.control_dependencies(update_ops):
                barrier = control_flow_ops.no_op(name="update_barrier")
            loss = control_flow_ops.with_dependencies([barrier], loss)

        # Moving average of the loss with decay.
        if moving_average_decay is not None:
            # Generate moving averages of the loss.
            loss_averages = train.ExponentialMovingAverage(
                moving_average_decay, name="avg")
            loss_averages_op = loss_averages.apply([loss])
            logging_ops.scalar_summary("loss/mean",
                                       loss_averages.average(loss))
            loss = control_flow_ops.with_dependencies([loss_averages_op], loss)

        # Learning rate variable, with possible decay.
        if (isinstance(learning_rate, ops.Tensor)
                and learning_rate.get_shape().ndims == 0):
            lr = learning_rate
        elif isinstance(learning_rate, float):
            lr = vs.get_variable(
                "learning_rate", [],
                trainable=False,
                initializer=init_ops.constant_initializer(learning_rate))
        else:
            raise ValueError("Learning rate should be 0d Tensor or float. "
                             "Got %s of type %s" %
                             (str(learning_rate), str(type(learning_rate))))
        if learning_rate_decay_fn is not None:
            lr = learning_rate_decay_fn(lr, global_step)

        # Create optimizer, given specified parameters.
        if isinstance(optimizer, six.string_types):
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError(
                    "Optimizer name should be one of [%s], you provided %s." %
                    (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
            opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
        elif isinstance(optimizer, type) and issubclass(
                optimizer, optimizer_.Optimizer):
            opt = optimizer(learning_rate=lr)
        elif isinstance(optimizer, optimizer_.Optimizer):
            opt = optimizer
        else:
            raise ValueError("Unrecognized optimizer: should be string, "
                             "subclass of Optimizer or instance of "
                             "subclass of Optimizer. Got %s." % str(optimizer))

        # All trainable variables, if specific variables are not specified.
        if variables is None:
            variables = vars_.trainable_variables()

        # Compute gradients.
        gradients = opt.compute_gradients(loss, variables)

        # Optionally add gradient noise.
        if gradient_noise_scale is not None:
            gradients = _add_scaled_noise_to_gradients(gradients,
                                                       gradient_noise_scale)

        # Multiply some gradients.
        if gradient_multipliers is not None:
            gradients = _multiply_gradients(gradients, gradient_multipliers)

        # Optionally clip gradients by global norm.
        if clip_gradients is not None:
            gradients = _clip_gradients_by_norm(gradients, clip_gradients)

        # Add scalar summary for loss.
        logging_ops.scalar_summary("loss", loss)

        # Add histograms for variables, gradients and gradient norms.
        for gradient, variable in gradients:
            if isinstance(gradient, ops.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient

            if grad_values is not None:
                logging_ops.histogram_summary(variable.name, variable)
                logging_ops.histogram_summary(variable.name + "/gradients",
                                              grad_values)
                logging_ops.histogram_summary(
                    variable.name + "/gradient_norm",
                    clip_ops.global_norm([grad_values]))

        # Create gradient updates.
        grad_updates = opt.apply_gradients(gradients,
                                           global_step=global_step,
                                           name="train")
        # Make sure total_loss is valid.
        final_loss = array_ops.check_numerics(loss, "Loss is inf or nan")

        # Ensure the train_tensor computes grad_updates.
        train_tensor = control_flow_ops.with_dependencies([grad_updates],
                                                          final_loss)

        return train_tensor
예제 #48
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError('You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.DEBUG)
    with tf.Graph().as_default():
        # Config model_deploy. Keep TF Slim Models structure.
        # Useful if want to need multiple GPUs and/or servers in the future.
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=0,
            num_replicas=1,
            num_ps_tasks=0)
        # Create global_step.
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        # Select the dataset.
        dataset = dataset_factory.get_dataset(
            FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

        # Get the SSD network and its anchors.
        ssd_class = nets_factory.get_network(FLAGS.model_name)
        ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes)
        ssd_net = ssd_class(ssd_params)
        ssd_shape = ssd_net.params.img_shape
        ssd_anchors = ssd_net.anchors(ssd_shape)

        # Select the preprocessing function.
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        tf_utils.print_configuration(FLAGS.__flags, ssd_params,
                                     dataset.data_sources, FLAGS.train_dir)
        # =================================================================== #
        # Create a dataset provider and batches.
        # =================================================================== #
        with tf.device(deploy_config.inputs_device()):
            with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
                provider = slim.dataset_data_provider.DatasetDataProvider(
                    dataset,
                    num_readers=FLAGS.num_readers,
                    common_queue_capacity=20 * FLAGS.batch_size,
                    common_queue_min=10 * FLAGS.batch_size,
                    shuffle=True)
            # Get for SSD network: image, labels, bboxes.
            [image, shape, glabels, gbboxes] = provider.get(['image', 'shape',
                                                             'object/label',
                                                             'object/bbox'])
            # Pre-processing image, labels and bboxes.
            image, glabels, gbboxes = \
                image_preprocessing_fn(image, glabels, gbboxes,
                                       out_shape=ssd_shape,
                                       data_format=DATA_FORMAT)
            # Encode groundtruth labels and bboxes.
            gclasses, glocalisations, gscores = \
                ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors)
            batch_shape = [1] + [len(ssd_anchors)] * 3

            # Training batches and queue.
            r = tf.train.batch(
                tf_utils.reshape_list([image, gclasses, glocalisations, gscores]),
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            b_image, b_gclasses, b_glocalisations, b_gscores = \
                tf_utils.reshape_list(r, batch_shape)

            # Intermediate queueing: unique batch computation pipeline for all
            # GPUs running the training.
            batch_queue = slim.prefetch_queue.prefetch_queue(
                tf_utils.reshape_list([b_image, b_gclasses, b_glocalisations, b_gscores]),
                capacity=2 * deploy_config.num_clones)

        # =================================================================== #
        # Define the model running on every GPU.
        # =================================================================== #
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple
            clones of network_fn."""
            # Dequeue batch.
            b_image, b_gclasses, b_glocalisations, b_gscores = \
                tf_utils.reshape_list(batch_queue.dequeue(), batch_shape)

            # Construct SSD network.
            arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay,
                                          data_format=DATA_FORMAT)
            with slim.arg_scope(arg_scope):
                predictions, localisations, logits, end_points = \
                    ssd_net.net(b_image, is_training=True)
            # Add loss function.
            ssd_net.losses(logits, localisations,
                           b_gclasses, b_glocalisations, b_gscores,
                           match_threshold=FLAGS.match_threshold,
                           negative_ratio=FLAGS.negative_ratio,
                           alpha=FLAGS.loss_alpha,
                           label_smoothing=FLAGS.label_smoothing)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # =================================================================== #
        # Add summaries from first clone.
        # =================================================================== #
        clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                            tf.nn.zero_fraction(x)))
        # Add summaries for losses and extra losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar(loss.op.name, loss))
        for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope):
            summaries.add(tf.summary.scalar(loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        # =================================================================== #
        # Configure the moving averages.
        # =================================================================== #
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        # =================================================================== #
        # Configure the optimization procedure.
        # =================================================================== #
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = tf_utils.configure_learning_rate(FLAGS,
                                                             dataset.num_samples,
                                                             global_step)
            optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = tf_utils.get_variables_to_train(FLAGS)

        # and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones,
            optimizer,
            var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)
        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                           first_clone_scope))
        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # =================================================================== #
        # Kicks off the training.
        # =================================================================== #
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
        config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options)
        saver = tf.train.Saver(max_to_keep=5,
                               keep_checkpoint_every_n_hours=1.0,
                               write_version=2,
                               pad_step_number=False)
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master='',
            is_chief=True,
            init_fn=tf_utils.get_init_fn(FLAGS),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            saver=saver,
            save_interval_secs=FLAGS.save_interval_secs,
            session_config=config,
            sync_optimizer=None)
예제 #49
0
def deploy(config,
           model_fn,
           args=None,
           kwargs=None,
           optimizer=None,
           summarize_gradients=False):
    """Deploys a Slim-constructed model across multiple clones.

    The deployment options are specified by the config object and support
    deploying one or several clones on different GPUs and one or several replicas
    of such clones.

    The argument `model_fn` is called `config.num_clones` times to create the
    model clones as `model_fn(*args, **kwargs)`.

    The optional argument `optimizer` is an `Optimizer` object.  If not `None`,
    the deployed model is configured for training with that optimizer.

    If `config` specifies deployment on multiple replicas then the default
    tensorflow device is set appropriatly for each call to `model_fn` and for the
    slim variable creation functions: model and global variables will be created
    on the `ps` device, the clone operations will be on the `worker` device.

    Args:
      config: A `DeploymentConfig` object.
      model_fn: A callable. Called as `model_fn(*args, **kwargs)`
      args: Optional list of arguments to pass to `model_fn`.
      kwargs: Optional list of keyword arguments to pass to `model_fn`.
      optimizer: Optional `Optimizer` object.  If passed the model is deployed
          for training with that optimizer.
      summarize_gradients: Whether or not add summaries to the gradients.

    Returns:
      A `DeployedModel` namedtuple.

    """
    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    # Create Clones.
    clones = create_clones(config, model_fn, args, kwargs)
    first_clone = clones[0]

    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by model_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone.scope)

    train_op = None
    total_loss = None
    with tf.device(config.optimizer_device()):
        if optimizer:
            # Place the global step on the device storing the variables.
            with tf.device(config.variables_device()):
                global_step = slim.get_or_create_global_step()

            # Compute the gradients for the clones.
            total_loss, clones_gradients = optimize_clones(clones, optimizer)

            if clones_gradients:
                if summarize_gradients:
                    # Add summaries to the gradients.
                    summaries |= set(
                        _add_gradients_summaries(clones_gradients))

                # Create gradient updates.
                grad_updates = optimizer.apply_gradients(
                    clones_gradients, global_step=global_step)
                update_ops.append(grad_updates)

                update_op = tf.group(*update_ops)
                train_op = control_flow_ops.with_dependencies([update_op],
                                                              total_loss,
                                                              name='train_op')
        else:
            clones_losses = []
            regularization_losses = tf.get_collection(
                tf.GraphKeys.REGULARIZATION_LOSSES)
            for clone in clones:
                with tf.name_scope(clone.scope):
                    clone_loss = _gather_clone_loss(clone, len(clones),
                                                    regularization_losses)
                    if clone_loss is not None:
                        clones_losses.append(clone_loss)
                    # Only use regularization_losses for the first clone
                    regularization_losses = None
            if clones_losses:
                total_loss = tf.add_n(clones_losses, name='total_loss')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone.scope))

        if total_loss is not None:
            # Add total_loss to summary.
            summaries.add(tf.summary.scalar('total_loss', total_loss))

        if summaries:
            # Merge all summaries together.
            summary_op = tf.merge_summary(list(summaries), name='summary_op')
        else:
            summary_op = None

    return DeployedModel(train_op, summary_op, total_loss, clones)
    def _forward_log_det_jacobian(self, x):
        # Let Y be a symmetric, positive definite matrix and write:
        #   Y = X X.T
        # where X is lower-triangular.
        #
        # Observe that,
        #   dY[i,j]/dX[a,b]
        #   = d/dX[a,b] { X[i,:] X[j,:] }
        #   = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] }
        #
        # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is
        # symmetric and X is lower-triangular, we need vectors of dimension:
        #   d = p (p + 1) / 2
        # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e.,
        #   k = { i (i + 1) / 2 + j   i>=j
        #       { undef               i<j
        # and assume zero-based indexes. When k is undef, the element is dropped.
        # Example:
        #           j      k
        #        0 1 2 3  /
        #    0 [ 0 . . . ]
        # i  1 [ 1 2 . . ]
        #    2 [ 3 4 5 . ]
        #    3 [ 6 7 8 9 ]
        # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With
        # slight abuse: k(i,j)=undef means the element is dropped.)
        #
        # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are
        # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b.
        # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since:
        # (1) j<=i<a thus i,j!=a.
        # (2) i=a>j  thus i,j!=a.
        #
        # Since the Jacobian is lower-triangular, we need only compute the product
        # of diagonal elements:
        #   d vec[Y] / d vec[X] @[k(i,j), k(i,j)]
        #   = X[j,j] + I[i=j] X[i,j]
        #   = 2 X[j,j].
        # Since there is a 2 X[j,j] term for every lower-triangular element of X we
        # conclude:
        #   |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
        diag = array_ops.matrix_diag_part(x)

        # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output
        # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the
        # output is unchanged.
        diag = self._make_columnar(diag)

        if self.validate_args:
            is_matrix = check_ops.assert_rank_at_least(
                x, 2, message="Input must be a (batch of) matrix.")
            shape = array_ops.shape(x)
            is_square = check_ops.assert_equal(
                shape[-2],
                shape[-1],
                message="Input must be a (batch of) square matrix.")
            # Assuming lower-triangular means we only need check diag>0.
            is_positive_definite = check_ops.assert_positive(
                diag, message="Input must be positive definite.")
            x = control_flow_ops.with_dependencies(
                [is_matrix, is_square, is_positive_definite], x)

        # Create a vector equal to: [p, p-1, ..., 2, 1].
        if x.get_shape().ndims is None or x.get_shape().dims[-1].value is None:
            p_int = array_ops.shape(x)[-1]
            p_float = math_ops.cast(p_int, dtype=x.dtype)
        else:
            p_int = x.get_shape().dims[-1].value
            p_float = np.array(p_int, dtype=x.dtype.as_numpy_dtype)
        exponents = math_ops.linspace(p_float, 1., p_int)

        sum_weighted_log_diag = array_ops.squeeze(math_ops.matmul(
            math_ops.log(diag), exponents[..., array_ops.newaxis]),
                                                  axis=-1)
        fldj = p_float * np.log(2.) + sum_weighted_log_diag

        # We finally need to undo adding an extra column in non-scalar cases
        # where there is a single matrix as input.
        if x.get_shape().ndims is not None:
            if x.get_shape().ndims == 2:
                fldj = array_ops.squeeze(fldj, axis=-1)
            return fldj

        shape = array_ops.shape(fldj)
        maybe_squeeze_shape = array_ops.concat([
            shape[:-1],
            distribution_util.pick_vector(math_ops.equal(array_ops.rank(x), 2),
                                          np.array([], dtype=np.int32),
                                          shape[-1:])
        ], 0)
        return array_ops.reshape(fldj, maybe_squeeze_shape)
예제 #51
0
def random_crop(image_list, crop_height, crop_width):
    """Crops the given list of images.
    The function applies the same crop to each image in the list. This can be
    effectively applied when there are multiple image inputs of the same
    dimension such as:
      image, depths, normals = _random_crop([image, depths, normals], 120, 150)
    Args:
      image_list: a list of image tensors of the same dimension but possibly varying channel.
      crop_height: the new height.
      crop_width: the new width.
    Returns:
      the image_list with cropped images.
    Raises:
      ValueError: if there are multiple image inputs provided with different size or the images are smaller than the
       crop dimensions.
    """
    if not image_list:
        raise ValueError('Empty image_list.')

    # Compute the rank assertions.
    rank_assertions = []
    for i in range(len(image_list)):
        image_rank = tf.rank(image_list[i])
        rank_assert = tf.Assert(tf.equal(image_rank, 3), [
            'Wrong rank for tensor  %s [expected] [actual]',
            image_list[i].name, 3, image_rank
        ])
        rank_assertions.append(rank_assert)

    image_shape = control_flow_ops.with_dependencies([rank_assertions[0]],
                                                     tf.shape(image_list[0]))
    image_height = image_shape[0]
    image_width = image_shape[1]
    crop_size_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(image_height, crop_height),
                       tf.greater_equal(image_width, crop_width)),
        ['Crop size greater than the image size.'])
    asserts = [rank_assertions[0], crop_size_assert]

    for i in range(1, len(image_list)):
        image = image_list[i]
        asserts.append(rank_assertions[i])
        shape = control_flow_ops.with_dependencies([rank_assertions[i]],
                                                   tf.shape(image))
        height = shape[0]
        width = shape[1]

        height_assert = tf.Assert(tf.equal(height, image_height), [
            'Wrong height for tensor %s [expected][actual]', image.name,
            height, image_height
        ])
        width_assert = tf.Assert(tf.equal(width, image_width), [
            'Wrong width for tensor %s [expected][actual]', image.name, width,
            image_width
        ])
        asserts.extend([height_assert, width_assert])

    # Create a random bounding box.
    max_offset_height = control_flow_ops.with_dependencies(
        asserts, tf.reshape(image_height - crop_height + 1, []))
    max_offset_width = control_flow_ops.with_dependencies(
        asserts, tf.reshape(image_width - crop_width + 1, []))
    offset_height = tf.random_uniform([],
                                      maxval=max_offset_height,
                                      dtype=tf.int32)
    offset_width = tf.random_uniform([],
                                     maxval=max_offset_width,
                                     dtype=tf.int32)

    return [
        crop(image, offset_height, offset_width, crop_height, crop_width)
        for image in image_list
    ]
예제 #52
0
  def __init__(self,
               mix_loc,
               temperature,
               distribution,
               loc=None,
               scale=None,
               quadrature_size=8,
               quadrature_fn=quadrature_scheme_softmaxnormal_quantiles,
               validate_args=False,
               allow_nan_stats=True,
               name="VectorDiffeomixture"):
    """Constructs the VectorDiffeomixture on `R^d`.

    The vector diffeomixture (VDM) approximates the compound distribution

    ```none
    p(x) = int p(x | z) p(z) dz,
    where z is in the K-simplex, and
    p(x | z) := p(x | loc=sum_k z[k] loc[k], scale=sum_k z[k] scale[k])
    ```

    Args:
      mix_loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`.
        In terms of samples, larger `mix_loc[..., k]` ==>
        `Z` is more likely to put more weight on its `kth` component.
      temperature: `float`-like `Tensor`. Broadcastable with `mix_loc`.
        In terms of samples, smaller `temperature` means one component is more
        likely to dominate.  I.e., smaller `temperature` makes the VDM look more
        like a standard mixture of `K` components.
      distribution: `tf.Distribution`-like instance. Distribution from which `d`
        iid samples are used as input to the selected affine transformation.
        Must be a scalar-batch, scalar-event distribution.  Typically
        `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
        a function of non-trainable parameters. WARNING: If you backprop through
        a VectorDiffeomixture sample and the `distribution` is not
        `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then
        the gradient will be incorrect!
      loc: Length-`K` list of `float`-type `Tensor`s. The `k`-th element
        represents the `shift` used for the `k`-th affine transformation.  If
        the `k`-th item is `None`, `loc` is implicitly `0`.  When specified,
        must have shape `[B1, ..., Bb, d]` where `b >= 0` and `d` is the event
        size.
      scale: Length-`K` list of `LinearOperator`s. Each should be
        positive-definite and operate on a `d`-dimensional vector space. The
        `k`-th element represents the `scale` used for the `k`-th affine
        transformation. `LinearOperator`s must have shape `[B1, ..., Bb, d, d]`,
        `b >= 0`, i.e., characterizes `b`-batches of `d x d` matrices
      quadrature_size: Python `int` scalar representing number of
        quadrature points.  Larger `quadrature_size` means `q_N(x)` better
        approximates `p(x)`.
      quadrature_fn: Python callable taking `normal_loc`, `normal_scale`,
        `quadrature_size`, `validate_args` and returning `tuple(grid, probs)`
        representing the SoftmaxNormal grid and corresponding normalized weight.
        normalized) weight.
        Default value: `quadrature_scheme_softmaxnormal_quantiles`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if `not scale or len(scale) < 2`.
      ValueError: if `len(loc) != len(scale)`
      ValueError: if `quadrature_grid_and_probs is not None` and
        `len(quadrature_grid_and_probs[0]) != len(quadrature_grid_and_probs[1])`
      ValueError: if `validate_args` and any not scale.is_positive_definite.
      TypeError: if any scale.dtype != scale[0].dtype.
      TypeError: if any loc.dtype != scale[0].dtype.
      NotImplementedError: if `len(scale) != 2`.
      ValueError: if `not distribution.is_scalar_batch`.
      ValueError: if `not distribution.is_scalar_event`.
    """
    parameters = dict(locals())
    with ops.name_scope(name, values=[mix_loc, temperature]) as name:
      if not scale or len(scale) < 2:
        raise ValueError("Must specify list (or list-like object) of scale "
                         "LinearOperators, one for each component with "
                         "num_component >= 2.")

      if loc is None:
        loc = [None]*len(scale)

      if len(loc) != len(scale):
        raise ValueError("loc/scale must be same-length lists "
                         "(or same-length list-like objects).")

      dtype = scale[0].dtype.base_dtype

      loc = [ops.convert_to_tensor(loc_, dtype=dtype, name="loc{}".format(k))
             if loc_ is not None else None
             for k, loc_ in enumerate(loc)]

      for k, scale_ in enumerate(scale):
        if validate_args and not scale_.is_positive_definite:
          raise ValueError("scale[{}].is_positive_definite = {} != True".format(
              k, scale_.is_positive_definite))
        if scale_.dtype.base_dtype != dtype:
          raise TypeError(
              "dtype mismatch; scale[{}].base_dtype=\"{}\" != \"{}\"".format(
                  k, scale_.dtype.base_dtype.name, dtype.name))

      self._endpoint_affine = [
          AffineLinearOperator(shift=loc_,
                               scale=scale_,
                               validate_args=validate_args,
                               name="endpoint_affine_{}".format(k))
          for k, (loc_, scale_) in enumerate(zip(loc, scale))]

      # TODO(jvdillon): Remove once we support k-mixtures.
      # We make this assertion here because otherwise `grid` would need to be a
      # vector not a scalar.
      if len(scale) != 2:
        raise NotImplementedError("Currently only bimixtures are supported; "
                                  "len(scale)={} is not 2.".format(len(scale)))

      mix_loc = ops.convert_to_tensor(
          mix_loc, dtype=dtype, name="mix_loc")
      temperature = ops.convert_to_tensor(
          temperature, dtype=dtype, name="temperature")
      self._grid, probs = tuple(quadrature_fn(
          mix_loc / temperature,
          1. / temperature,
          quadrature_size,
          validate_args))

      # Note: by creating the logits as `log(prob)` we ensure that
      # `self.mixture_distribution.logits` is equivalent to
      # `math_ops.log(self.mixture_distribution.probs)`.
      self._mixture_distribution = categorical_lib.Categorical(
          logits=math_ops.log(probs),
          validate_args=validate_args,
          allow_nan_stats=allow_nan_stats)

      asserts = distribution_util.maybe_check_scalar_distribution(
          distribution, dtype, validate_args)
      if asserts:
        self._grid = control_flow_ops.with_dependencies(
            asserts, self._grid)
      self._distribution = distribution

      self._interpolated_affine = [
          AffineLinearOperator(shift=loc_,
                               scale=scale_,
                               validate_args=validate_args,
                               name="interpolated_affine_{}".format(k))
          for k, (loc_, scale_) in enumerate(zip(
              interpolate_loc(self._grid, loc),
              interpolate_scale(self._grid, scale)))]

      [
          self._batch_shape_,
          self._batch_shape_tensor_,
          self._event_shape_,
          self._event_shape_tensor_,
      ] = determine_batch_event_shapes(self._grid,
                                       self._endpoint_affine)

      super(VectorDiffeomixture, self).__init__(
          dtype=dtype,
          # We hard-code `FULLY_REPARAMETERIZED` because when
          # `validate_args=True` we verify that indeed
          # `distribution.reparameterization_type == FULLY_REPARAMETERIZED`. A
          # distribution which is a function of only non-trainable parameters
          # also implies we can use `FULLY_REPARAMETERIZED`. However, we cannot
          # easily test for that possibility thus we use `validate_args=False`
          # as a "back-door" to allow users a way to use non
          # `FULLY_REPARAMETERIZED` distribution. In such cases IT IS THE USERS
          # RESPONSIBILITY to verify that the base distribution is a function of
          # non-trainable parameters.
          reparameterization_type=distribution_lib.FULLY_REPARAMETERIZED,
          validate_args=validate_args,
          allow_nan_stats=allow_nan_stats,
          parameters=parameters,
          graph_parents=(
              distribution._graph_parents  # pylint: disable=protected-access
              + [loc_ for loc_ in loc if loc_ is not None]
              + [p for scale_ in scale for p in scale_.graph_parents]),
          name=name)
예제 #53
0
    def model_fn(self, features, mode, config):
        """Model function for the estimator.

    Note that this does not take a `labels` arg. This works, but `input_fn` must
    return either `features` or, equivalently, `(features, None)`.

    Args:
      features: The input points. See `tf.estimator.Estimator`.
      mode: See `tf.estimator.Estimator`.
      config: See `tf.estimator.Estimator`.

    Returns:
      A `tf.estimator.EstimatorSpec` (see `tf.estimator.Estimator`) specifying
      this behavior:
        * `train_op`: Execute one mini-batch or full-batch run of Lloyd's
             algorithm.
        * `loss`: The sum of the squared distances from each input point to its
             closest center.
        * `eval_metric_ops`: Maps `SCORE` to `loss`.
        * `predictions`: Maps `ALL_DISTANCES` to the distance from each input
             point to each cluster center; maps `CLUSTER_INDEX` to the index of
             the closest cluster center for each input point.
    """
        # input_points is a single Tensor. Therefore, the sharding functionality
        # in clustering_ops is unused, and some of the values below are lists of a
        # single item.
        input_points = _parse_features_if_necessary(features,
                                                    self._feature_columns)

        # Let N = the number of input_points.
        # all_distances: A list of one matrix of shape (N, num_clusters). Each value
        #   is the distance from an input point to a cluster center.
        # model_predictions: A list of one vector of shape (N). Each value is the
        #   cluster id of an input point.
        # losses: Similar to cluster_idx but provides the distance to the cluster
        #   center.
        # is_initialized: scalar indicating whether the initial cluster centers
        #   have been chosen; see init_op.
        # init_op: an op to choose the initial cluster centers. A single worker
        #   repeatedly executes init_op until is_initialized becomes True.
        # training_op: an op that runs an iteration of training, either an entire
        #   Lloyd iteration or a mini-batch of a Lloyd iteration. Multiple workers
        #   may execute this op, but only after is_initialized becomes True.
        (all_distances, model_predictions, losses, is_initialized, init_op,
         training_op) = clustering_ops.KMeans(
             inputs=input_points,
             num_clusters=self._num_clusters,
             initial_clusters=self._initial_clusters,
             distance_metric=self._distance_metric,
             use_mini_batch=self._use_mini_batch,
             mini_batch_steps_per_iteration=self.
             _mini_batch_steps_per_iteration,
             random_seed=self._random_seed,
             kmeans_plus_plus_num_retries=self._kmeans_plus_plus_num_retries
         ).training_graph()

        loss = math_ops.reduce_sum(losses)
        summary.scalar('loss/raw', loss)

        incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
        training_op = control_flow_ops.with_dependencies(
            [training_op, incr_step], loss)

        training_hooks = [
            _InitializeClustersHook(init_op, is_initialized, config.is_chief)
        ]
        if self._relative_tolerance is not None:
            training_hooks.append(
                _LossRelativeChangeHook(loss, self._relative_tolerance))

        export_outputs = {
            KMeansClustering.ALL_DISTANCES:
            export_output.PredictOutput(all_distances[0]),
            KMeansClustering.CLUSTER_INDEX:
            export_output.PredictOutput(model_predictions[0]),
            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
            export_output.PredictOutput(model_predictions[0])
        }

        return model_fn_lib.EstimatorSpec(
            mode=mode,
            predictions={
                KMeansClustering.ALL_DISTANCES: all_distances[0],
                KMeansClustering.CLUSTER_INDEX: model_predictions[0],
            },
            loss=loss,
            train_op=training_op,
            eval_metric_ops={KMeansClustering.SCORE: metrics.mean(loss)},
            training_hooks=training_hooks,
            export_outputs=export_outputs)
예제 #54
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.worker_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
        global_step = tf.train.get_or_create_global_step()

    ######################
    # Select the dataset #
    ######################
    # dataset = dataset_factory.get_dataset(
    #     FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
    dataset = ucf11.get_split(FLAGS.dataset_split_name, FLAGS.dataset_dir)
    ######################
    # Select the network #
    ######################
    network_fn = nets_factory.get_network_fn(FLAGS.model_name,
                                             num_classes=(dataset.num_classes -
                                                          FLAGS.labels_offset),
                                             weight_decay=FLAGS.weight_decay,
                                             is_training=True)

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name, is_training=True)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    train_image_size = FLAGS.train_image_size or network_fn.default_image_size

    input, label = ucf11.build_data(dataset)
    input = image_preprocessing_fn(input, train_image_size, train_image_size)

    inputs, labels = tf.train.batch(
        [input, label],
        batch_size=FLAGS.batch_size,
        num_threads=FLAGS.num_preprocessing_threads,
        capacity=5 * FLAGS.batch_size)
    labels = slim.one_hot_encoding(labels,
                                   dataset.num_classes - FLAGS.labels_offset)
    batch_queue = slim.prefetch_queue.prefetch_queue([inputs, labels],
                                                     capacity=2 *
                                                     deploy_config.num_clones)

    ####################
    # Define the model #
    ####################
    def clone_fn(batch_queue):
        """Allows data parallelism by creating multiple clones of network_fn."""
        inputs, labels = batch_queue.dequeue()
        images = tf.unstack(inputs, axis=1)

        logits, end_points = network_fn(images[0])

        #############################
        # Specify the loss function #
        #############################
        tf.losses.softmax_cross_entropy(logits=logits,
                                        onehot_labels=labels,
                                        label_smoothing=FLAGS.label_smoothing,
                                        weights=1.0)
        return end_points

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
        x = end_points[end_point]
        summaries.add(tf.summary.histogram('activations/' + end_point, x))
        summaries.add(
            tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
        summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
        summaries.add(tf.summary.histogram(variable.op.name, variable))

    # # Accuracy
    # logits = end_points['Logits']
    # pred = tf.nn.softmax(logits)
    # predictions = tf.argmax(pred, axis=1)
    # truth = tf.argmax(labels, axis=1)
    # truth = tf.squeeze(truth)
    # # # accuracy, accuracy_update = tf.metrics.accuracy(truth, predictions)
    # # #
    # # # update_ops.append(accuracy_update)
    # # # summaries.add(tf.summary.scalar('Accuracy', accuracy))
    # #
    # names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
    #     'Accuracy': slim.metrics.streaming_accuracy(predictions, truth),
    #     'Recall_5': slim.metrics.streaming_recall_at_k(
    #         logits, truth, 5),
    # })
    #
    # # Print the summaries to screen.
    # for name, value in names_to_values.items():
    #     summary_name = 'train/%s' % name
    #     op = tf.summary.scalar(summary_name, value, collections=[])
    #     op = tf.Print(op, [value], summary_name)
    #     summaries.add(op)
    #
    # update_ops.append(names_to_updates)

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
        moving_average_variables = slim.get_model_variables()
        variable_averages = tf.train.ExponentialMovingAverage(
            FLAGS.moving_average_decay, global_step)
    else:
        moving_average_variables, variable_averages = None, None

    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
        learning_rate = _configure_learning_rate(dataset.num_samples,
                                                 global_step)
        optimizer = _configure_optimizer(learning_rate)
        summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    if FLAGS.sync_replicas:
        # If sync_replicas is enabled, the averaging will be done in the chief
        # queue runner.
        optimizer = tf.train.SyncReplicasOptimizer(
            opt=optimizer,
            replicas_to_aggregate=FLAGS.replicas_to_aggregate,
            variable_averages=variable_averages,
            variables_to_average=moving_average_variables,
            replica_id=tf.constant(FLAGS.task, tf.int32, shape=()),
            total_num_replicas=FLAGS.worker_replicas)
    elif FLAGS.moving_average_decay:
        # Update ops executed locally by trainer.
        update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones, optimizer, var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))

    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    train_tensor = control_flow_ops.with_dependencies([update_op],
                                                      total_loss,
                                                      name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(
        tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_dir,
        master=FLAGS.master,
        is_chief=(FLAGS.task == 0),
        init_fn=_get_init_fn(),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=FLAGS.log_every_n_steps,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs,
        sync_optimizer=optimizer if FLAGS.sync_replicas else None,
        session_config=session_config)
예제 #55
0
def split(value: ragged_tensor.Ragged,
          num_or_size_splits,
          axis=0,
          num=None,
          name=None):
    """Splits a RaggedTensor `value` into a list of sub RaggedTensors.

  If `num_or_size_splits` is an `int`,  then it splits `value` along the
  dimension `axis` into `num_or_size_splits` smaller RaggedTensors. This
  requires that `value.shape[axis]` is divisible by `num_or_size_splits`.

  If `num_or_size_splits` is a 1-D Tensor (or list), then `value` is split into
  `len(num_or_size_splits)` elements. The shape of the `i`-th element has the
  same size as the `value` except along dimension `axis` where the size is
  `num_or_size_splits[i]`.

  Splits along a ragged dimension is not allowed.

  For example:

  >>> rt = tf.RaggedTensor.from_row_lengths(
  ...      np.arange(6 * 3).reshape(6, 3), row_lengths=[1, 2, 2, 1])
  >>> rt.shape
  TensorShape([4, None, 3])
  >>>
  >>> rt1, rt2 = tf.split(rt, 2)  # uniform splits
  >>> rt1.shape
  TensorShape([2, None, 3])
  >>> rt2.shape
  TensorShape([2, None, 3])
  >>>
  >>> rt3, rt4, rt5 = tf.split(rt, [1, 2, 1])  # ragged splits
  >>> rt3.shape
  TensorShape([1, None, 3])
  >>> rt4.shape
  TensorShape([2, None, 3])
  >>> rt5.shape
  TensorShape([1, None, 3])
  >>>
  >>> rt6, rt7 = tf.split(rt, [1, 2], axis=2)  # splits along axis 2
  >>> rt6.shape
  TensorShape([4, None, 1])
  >>> rt7.shape
  TensorShape([4, None, 2])

  Args:
    value: The `RaggedTensor` to split.
    num_or_size_splits: Either an `int` indicating the number of splits
      along `axis` or a 1-D integer `Tensor` or Python list containing the sizes
      of each output tensor along `axis`. If a Python int, then it must evenly
      divide `value.shape[axis]`; otherwise the sum of sizes along the split
      axis must match that of the `value`.
    axis: An `int` or scalar `int32` `Tensor`. The dimension along which
      to split. Must be in the range `[-rank(value), rank(value))`. Defaults to
      0.
    num: An `int` used to specify the number of outputs when
      `num_or_size_splits` is a 1-D list or `Tensor` and its length is
      statically unknown, e.g., specifying `tf.TensorSepc(None)` with
      the `input_signature` argument of `tf.function` (optional).
    name: A name for the operation (optional).

  Returns:
    if `num_or_size_splits` is an `int` returns a list of `num_or_size_splits`
    `RaggedTensor` objects; if `num_or_size_splits` is a 1-D Tensor returns
    `num_or_size_splits.get_shape[0]` `RaggedTensor` objects resulting from
    splitting `value`.

  Raises:
    ValueError: If the dimension `axis` of `value` is a ragged dimension.
    ValueError: If `num` is unspecified and cannot be inferred.
    ValueError: If `num` is specified but doesn't match the length of
      `num_or_size_splits`.
    ValueError: If `num_or_size_splits` is an `int` and less than 1.
    TypeError: If `num_or_size_splits` is not an `int` or 1-D
      list or 1-D `Tensor`.
    InvalidArgumentError: If the `axis` of `value` cannot be exactly splitted
      by `num_or_size_splits`.
    InvalidArgumentError: If `num_or_size_splits` is contains negative integers.
    InvalidArgumentError: If `num_or_size_splits`'s static shape is unknown and
      its dynamic shape is inconsistent `num`.
    InvalidArgumentError: If `num_or_size_splits`'s static rank is unknown and
      `axis` is a negative integer.
  """
    with ops.name_scope(name, 'RaggedSplit'):
        if isinstance(num_or_size_splits, int) and num_or_size_splits == 1:
            return [value]

        # static assert
        check_ops.assert_integer_v2(
            num_or_size_splits,
            message=('`num_or_size_splits` must be an `int` or 1-D list or '
                     '`Tensor` of integers.'))
        value_shape = ragged_shape.RaggedShape.from_tensor(value)
        axis = array_ops.get_positive_axis(axis, value_shape.rank)
        try:
            dim_size = value_shape[axis]
        except ValueError:
            raise ValueError(
                'Cannot split a ragged dimension. Got `value` with '
                f'shape {value_shape} and `axis` {axis}.')
        if isinstance(num_or_size_splits, int):
            # Uniform split
            num_splits = num_or_size_splits
            if num_splits < 1:
                raise ValueError(
                    '`num_or_size_splits` must be >=1 if it is an `int`.'
                    f'Received {num_or_size_splits}.')
            split_length = math_ops.floordiv(dim_size, num_splits)
            split_lengths = array_ops.repeat(split_length, num_splits)
        else:
            # Ragged split
            num_splits = None
            split_lengths = ops.convert_to_tensor(num_or_size_splits)
            if split_lengths.shape.ndims is not None:
                if split_lengths.shape.ndims != 1:
                    raise TypeError(
                        '`num_or_size_splits` must be an `int` or 1-D list '
                        f'or `Tensor`. Received {num_or_size_splits}.')
                num_splits = tensor_shape.dimension_value(
                    split_lengths.shape[0])

            if num_splits is None:
                if num is None:
                    raise ValueError(
                        '`num` must be specified as an `int` when the '
                        'size of `num_or_size_split` is statically '
                        f'unknown. Received `num`: {num} and '
                        f'`num_or_size_split`: {num_or_size_splits}.')
                num_splits = num
            else:
                if num is not None and num != num_splits:
                    raise ValueError(
                        '`num` does not match the size of '
                        f'`num_or_size_split`. Received `num`: {num} and '
                        f'size of `num_or_size_split`: {num_splits}.')

        splits = array_ops.concat([[0], math_ops.cumsum(split_lengths)],
                                  axis=0)
        checks = []
        checks.append(
            check_ops.assert_non_negative_v2(
                num_or_size_splits,
                message='`num_or_size_splits` must be non-negative.'))
        checks.append(
            check_ops.assert_equal_v2(
                num_splits,
                array_ops.shape(split_lengths)[0],
                message=
                '`num` is inconsistent with `num_or_size_split.shape[0]`.'))
        checks.append(
            check_ops.assert_equal_v2(
                math_ops.cast(dim_size, splits.dtype),
                splits[-1],
                message=(
                    'Cannot exactly split the `axis` dimension of `value` '
                    'with the given `num_or_size_split`.')))
        splits = control_flow_ops.with_dependencies(checks, splits)
        splited_rts = []
        slices = [slice(None)] * (axis + 1)
        for i in range(num_splits):
            slices[-1] = slice(splits[i], splits[i + 1])
            splited_rts.append(value[tuple(slices)])
        return splited_rts
예제 #56
0
def optimize_loss(loss,
                  optimizer,
                  optimizer_params,
                  learning_rate_decay_fn,
                  dtype=tf.float32,
                  clip_gradients=None,
                  summaries=None,
                  larc_params=None,
                  loss_scaling=1.0,
                  loss_scaling_params=None,
                  on_horovod=False,
                  iter_size=1,
                  skip_update_ph=None):
  """Given loss and parameters for optimizer, returns a training op.

  Args:
    loss: Scalar `Tensor`.
    optimizer: string or class of optimizer, used as trainer.
        string should be name of optimizer, like 'SGD',
        'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
        class should be sub-class of `tf.Optimizer` that implements
        `compute_gradients` and `apply_gradients` functions.
    optimizer_params: parameters of the optimizer.
    dtype: model dtype (tf.float16, tf.float32 or "mixed").
    learning_rate_decay_fn: function, takes `global_step`
        `Tensor`s, returns `Tensor`.
        Can be used to implement any learning rate decay
        functions.
        For example: `tf.train.exponential_decay`.
        Ignored if `learning_rate` is not supplied.
    clip_gradients: float, max gradient norm to clip to.
    summaries: List of internal quantities to visualize on tensorboard. If not
        set only the loss and the learning rate will be reported. The
        complete list is in OPTIMIZER_SUMMARIES.
    larc_params: If not None, LARC re-scaling will
        be applied with corresponding parameters.
    loss_scaling: could be float or string. If float, static loss scaling
        is applied. If string, the corresponding automatic
        loss scaling algorithm is used. Must be one of 'Backoff'
        of 'LogMax' (case insensitive). Only used when dtype="mixed".
    on_horovod: whether the model is run on horovod.

  Returns:
    training op.
  """
  if summaries is None:
    summaries = ["learning_rate", "global_gradient_norm", "loss_scale"]
  else:
    for summ in summaries:
      if summ not in OPTIMIZER_SUMMARIES:
        raise ValueError(
            "Summaries should be one of [{}], you provided {}.".format(
                ", ".join(OPTIMIZER_SUMMARIES), summ,
            )
        )
  if clip_gradients is not None and larc_params is not None:
    raise AttributeError(
        "LARC and gradient norm clipping should not be used together"
    )

  global_step = tf.train.get_or_create_global_step()
  lr = learning_rate_decay_fn(global_step)
  if "learning_rate" in summaries:
    tf.summary.scalar("learning_rate", lr)

  with tf.variable_scope("Loss_Optimization"):
    update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
    loss = control_flow_ops.with_dependencies(list(update_ops), loss)

    # Create optimizer, given specified parameters.
    if isinstance(optimizer, six.string_types):
      if optimizer not in OPTIMIZER_CLS_NAMES:
        raise ValueError(
            "Optimizer name should be one of [{}], you provided {}.".format(
                ", ".join(OPTIMIZER_CLS_NAMES), optimizer
            )
        )
      optimizer = OPTIMIZER_CLS_NAMES[optimizer]
    opt = optimizer(learning_rate=lr, **optimizer_params)

    if isinstance(loss_scaling, six.string_types):
      loss_scaling = AutomaticLossScaler(
          algorithm=loss_scaling,
          params=loss_scaling_params
      )
      if "loss_scale" in summaries:
        tf.summary.scalar("loss_scale", loss_scaling.loss_scale)

    if dtype == 'mixed':
      opt = MixedPrecisionOptimizerWrapper(opt, loss_scale=loss_scaling)

    # Compute gradients.
    grads_and_vars = opt.compute_gradients(
        loss, colocate_gradients_with_ops=True,
    )

    if on_horovod:
      if iter_size > 1:
        grads_and_vars_accum = []
        accum_ops = []
        for grad, var in grads_and_vars:
          # necessary to use tf.Variable directly to instantiate cudnn rnn cells
          # which don't have explicit shape.
          grad_accum = tf.Variable(
              initial_value=tf.zeros_like(var),
              name=grad.name.split(":")[0] + "_accum",
              expected_shape=var.shape,
              dtype=grad.dtype,
              trainable=False,
              validate_shape=bool(var.get_shape())
          )
          if isinstance(grad, tf.IndexedSlices):
            add_grads = tf.scatter_nd_add(grad_accum, grad.indices,
                                          grad.values / iter_size)
          else:
            add_grads = grad_accum + grad / iter_size

          accum_ops.append(tf.assign(grad_accum, add_grads))
          grads_and_vars_accum.append((grad_accum, var))

        accum_op = tf.group(accum_ops)

        def update_and_clear_op():
          with tf.control_dependencies([accum_op]):
            red_grad_updates = opt.apply_gradients(
                post_process_gradients(
                    reduce_gradients(grads_and_vars_accum, on_horovod=True),
                    lr=lr,
                    clip_gradients=clip_gradients,
                    larc_params=larc_params,
                    summaries=summaries,
                ),
                global_step=global_step,
            )

          with tf.control_dependencies([red_grad_updates]):
            return tf.group([tf.assign(g, tf.zeros_like(g))
                             for g, v in grads_and_vars_accum])

        grad_updates = tf.cond(
            pred=skip_update_ph,
            true_fn=lambda: accum_op,
            false_fn=update_and_clear_op,
        )
      else:
        grad_updates = opt.apply_gradients(
            post_process_gradients(
                reduce_gradients(grads_and_vars, on_horovod=True),
                lr=lr,
                clip_gradients=clip_gradients,
                larc_params=larc_params,
                summaries=summaries,
            ),
            global_step=global_step,
        )
    else:
      grad_updates = opt.apply_gradients(
          post_process_gradients(
              grads_and_vars,
              lr=lr,
              clip_gradients=clip_gradients,
              larc_params=larc_params,
              summaries=summaries,
          ),
          global_step=global_step,
      )

    # Ensure the train_tensor computes grad_updates.
    train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

    return train_tensor
예제 #57
0
def optimize_loss(loss,
                  optimizer,
                  optimizer_params,
                  learning_rate_decay_fn,
                  var_list = None,
                  dtype = tf.float32,
                  clip_gradients = None,
                  summaries = None,
                  larc_params = None,
                  loss_scaling = 1.0,
                  loss_scaling_params = None,
                  iter_size = 1,
                  skip_update_ph = None,
                  model = None):

    """
    Given loss and parameters for optimizer, returns a training op.
    """

    if summaries is None:
        summaries = ["learning_rate", "global_gradient_norm", "loss_scale"]
    else:
        for sumn in summaries:
            if sumn not in OPTIMIZER_SUMMARIES:
                raise ValueError(
                        "Summaries should be one of [{}], you provided {}.".format(
                            ",".join(OPTIMIZER_SUMMARIES), sumn))

    if clip_gradients is not None and larc_params is not None:
        raise AttributeError(
                "LARC and gradient norm clipping should not be used together")

    global_step = tf.train.get_or_create_global_step()
    lr = learning_rate_decay_fn(global_step)
    
    if "learning_rate" in summaries:
        tf.summary.scalar("learning_rate", lr)

    with tf.variable_scope("LossOptimization"):
        update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
        """
        contro_flow_ops.with_dependencies 实现图节点之间的依赖控制
        with_dependencies(dependencies, output_tensor, name = None)

        """
        loss = control_flow_ops.with_dependencies(list(update_ops), loss)

        if optimizer == "AdamW":
            optimizer_params["weight_decay"] = optimizer_params["weight_decay"] * lr

        # Create optimizer, given specified parameters
        if isinstance(optimizer, six.string_types):
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError("Optimizer name should be one of [{}], you provided {}".format(", ".join(OPTIMIZER_CLS_NAMES), optimizer))
            optimizer = OPTIMIZER_CLS_NAMES[optimizer]

        opt = optimizer(learning_rate = lr, **optimizer_params)

        if isinstance(loss_scaling, six.string_types):
            loss_scaling = AutomaticLossScaler(algorithm = loss_scaling,
                                               params = loss_scaling_params)
        # if "loss_scale" in summaries:
        #    tf.summary.scalar("loss_scale", loss_scaling.loss_scale)

        #if dtype == "mixed":
        #    opt = MixedPrecisionOptimizerWrapper(opt, loss_scale = loss_scaling)

        """
        Compute gradients
        Inputs:
            var_list: A list or tuple of tf.Variable to update to minimize loss.
                      Defaults to the list of variables collected in the graph 
                      under the key GraphKeys.TRAINABLE_VARIABLES
        Returns:
            A list of (gradients, variable) pairs. Variable is always present but gradient can be None
        """
        grads_and_vars = opt.compute_gradients(
                loss, colocate_gradients_with_ops = True, var_list = var_list)
        print("#################\n", grads_and_vars, "\n##################\n")

        """
        apply_gradients returns an Operation that applies gradients.
        Inputs
            grads_and_vars: List of (gradients, variable) pairs as returned by compute_gradients()
            global_step: Optional Varibale to increment by one after the variables have been updated
        Returns:
            If global_step was not None, that operation also increments gloabl_step
        """
        grad_updates = opt.apply_gradients(
                post_process_gradients(
                    grads_and_vars,
                    lr = lr,
                    clip_gradients = clip_gradients,
                    larc_params = larc_params,
                    summaries = summaries),
                global_step = global_step)
        
        # ensure the train tensor computes grad_updates
        print("###########\n {} \n#########\n".format(grad_updates))
        train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)
        print("###########\n {} \n#########\n".format(train_tensor))
        return train_tensor, grads_and_vars
예제 #58
0
  def __init__(self,
               loc=None,
               covariance_matrix=None,
               validate_args=False,
               allow_nan_stats=True,
               name="MultivariateNormalFullCovariance"):
    """Construct Multivariate Normal distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and
    `covariance_matrix` arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `covariance_matrix`. The last dimension of `loc` (if provided) must
    broadcast with this.

    A non-batch `covariance_matrix` matrix is a `k x k` symmetric positive
    definite matrix.  In other words it is (real) symmetric with all eigenvalues
    strictly positive.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      covariance_matrix: Floating-point, symmetric positive definite `Tensor` of
        same `dtype` as `loc`.  The strict upper triangle of `covariance_matrix`
        is ignored, so if `covariance_matrix` is not symmetric no error will be
        raised (unless `validate_args is True`).  `covariance_matrix` has shape
        `[B1, ..., Bb, k, k]` where `b >= 0` and `k` is the event size.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if neither `loc` nor `covariance_matrix` are specified.
    """
    parameters = locals()

    # Convert the covariance_matrix up to a scale_tril and call MVNTriL.
    with ops.name_scope(name) as name:
      with ops.name_scope("init", values=[loc, covariance_matrix]):
        if covariance_matrix is None:
          scale_tril = None
        else:
          covariance_matrix = ops.convert_to_tensor(
              covariance_matrix, name="covariance_matrix")
          if validate_args:
            covariance_matrix = control_flow_ops.with_dependencies([
                check_ops.assert_near(
                    covariance_matrix,
                    array_ops.matrix_transpose(covariance_matrix),
                    message="Matrix was not symmetric")], covariance_matrix)
          # No need to validate that covariance_matrix is non-singular.
          # LinearOperatorLowerTriangular has an assert_non_singular method that
          # is called by the Bijector.
          # However, cholesky() ignores the upper triangular part, so we do need
          # to separately assert symmetric.
          scale_tril = linalg_ops.cholesky(covariance_matrix)
        super(MultivariateNormalFullCovariance, self).__init__(
            loc=loc,
            scale_tril=scale_tril,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            name=name)
    self._parameters = parameters
예제 #59
0
    def __init__(self,
                 loc,
                 scale,
                 skewness=None,
                 tailweight=None,
                 distribution=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="SinhArcsinh"):
        """Construct SinhArcsinh distribution on `(-inf, inf)`.

    Arguments `(loc, scale, skewness, tailweight)` must have broadcastable shape
    (indexing batch dimensions).  They must all have the same `dtype`.

    Args:
      loc: Floating-point `Tensor`.
      scale:  `Tensor` of same `dtype` as `loc`.
      skewness:  Skewness parameter.  Default is `0.0` (no skew).
      tailweight:  Tailweight parameter. Default is `1.0` (unchanged tailweight)
      distribution: `tf.Distribution`-like instance. Distribution that is
        transformed to produce this distribution.
        Default is `tf.distributions.Normal(0., 1.)`.
        Must be a scalar-batch, scalar-event distribution.  Typically
        `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
        a function of non-trainable parameters. WARNING: If you backprop through
        a `SinhArcsinh` sample and `distribution` is not
        `FULLY_REPARAMETERIZED` yet is a function of trainable variables, then
        the gradient will be incorrect!
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
        parameters = dict(locals())

        with tf.name_scope(name, values=[loc, scale, skewness,
                                         tailweight]) as name:
            loc = tf.convert_to_tensor(loc, name="loc")
            dtype = loc.dtype
            scale = tf.convert_to_tensor(scale, name="scale", dtype=dtype)
            tailweight = 1. if tailweight is None else tailweight
            has_default_skewness = skewness is None
            skewness = 0. if skewness is None else skewness
            tailweight = tf.convert_to_tensor(tailweight,
                                              name="tailweight",
                                              dtype=dtype)
            skewness = tf.convert_to_tensor(skewness,
                                            name="skewness",
                                            dtype=dtype)

            batch_shape = distribution_util.get_broadcast_shape(
                loc, scale, tailweight, skewness)

            # Recall, with Z a random variable,
            #   Y := loc + C * F(Z),
            #   F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight )
            #   F_0(Z) := Sinh( Arcsinh(Z) * tailweight )
            #   C := 2 * scale / F_0(2)
            if distribution is None:
                distribution = tf.distributions.Normal(
                    loc=tf.zeros([], dtype=dtype),
                    scale=tf.ones([], dtype=dtype),
                    allow_nan_stats=allow_nan_stats)
            else:
                asserts = distribution_util.maybe_check_scalar_distribution(
                    distribution, dtype, validate_args)
                if asserts:
                    loc = control_flow_ops.with_dependencies(asserts, loc)

            # Make the SAS bijector, 'F'.
            f = bijectors.SinhArcsinh(skewness=skewness, tailweight=tailweight)
            if has_default_skewness:
                f_noskew = f
            else:
                f_noskew = bijectors.SinhArcsinh(
                    skewness=skewness.dtype.as_numpy_dtype(0.),
                    tailweight=tailweight)

            # Make the AffineScalar bijector, Z --> loc + scale * Z (2 / F_0(2))
            c = 2 * scale / f_noskew.forward(
                tf.convert_to_tensor(2, dtype=dtype))
            affine = bijectors.AffineScalar(shift=loc,
                                            scale=c,
                                            validate_args=validate_args)

            bijector = bijectors.Chain([affine, f])

            super(SinhArcsinh, self).__init__(distribution=distribution,
                                              bijector=bijector,
                                              batch_shape=batch_shape,
                                              validate_args=validate_args,
                                              name=name)
        self._parameters = parameters
        self._loc = loc
        self._scale = scale
        self._tailweight = tailweight
        self._skewness = skewness
예제 #60
0
def ragged_assert_compatible_and_get_flat_values(values, mask=None):
    """If ragged, it checks the compatibility and then returns the flat_values.

     Note: If two tensors are dense, it does not check their compatibility.
     Note: Although two ragged tensors with different ragged ranks could have
           identical overall rank and dimension sizes and hence be compatible,
           we do not support those cases.
  Args:
     values: A list of potentially ragged tensor of the same ragged_rank.
     mask: A potentially ragged tensor of the same ragged_rank as elements in
       Values.

  Returns:
     A tuple in which the first element is the list of tensors and the second
     is the mask tensor. ([Values], mask). Mask and the element in Values
     are equal to the flat_values of the input arguments (if they were ragged).
  """
    if isinstance(values, list):
        is_all_ragged = \
            all(isinstance(rt, ragged_tensor.RaggedTensor) for rt in values)
        is_any_ragged = \
            any(isinstance(rt, ragged_tensor.RaggedTensor) for rt in values)
    else:
        is_all_ragged = isinstance(values, ragged_tensor.RaggedTensor)
        is_any_ragged = is_all_ragged
    if (is_all_ragged and
        ((mask is None) or isinstance(mask, ragged_tensor.RaggedTensor))):
        to_be_stripped = False
        if not isinstance(values, list):
            values = [values]
            to_be_stripped = True

        # NOTE: we leave the flat_values compatibility to
        # tf.TensorShape `assert_is_compatible_with`
        # check if both dynamic dimensions are equal and then use the flat_values.
        nested_row_split_list = [rt.nested_row_splits for rt in values]
        assertion_list = _assert_splits_match(nested_row_split_list)

        # if both are ragged sample_weights also should be ragged with same dims.
        if isinstance(mask, ragged_tensor.RaggedTensor):
            assertion_list_for_mask = _assert_splits_match(
                [nested_row_split_list[0], mask.nested_row_splits])
            tmp = control_flow_ops.with_dependencies(assertion_list_for_mask,
                                                     mask.flat_values)
            mask = array_ops.expand_dims(tmp, -1)

        # values has at least 1 element.
        flat_values = []
        for value in values:
            tmp = control_flow_ops.with_dependencies(assertion_list,
                                                     value.flat_values)
            flat_values.append(array_ops.expand_dims(tmp, -1))

        values = flat_values[0] if to_be_stripped else flat_values

    elif is_any_ragged:
        raise TypeError('One of the inputs does not have acceptable types.')
    # values are empty or value are not ragged and mask is ragged.
    elif isinstance(mask, ragged_tensor.RaggedTensor):
        raise TypeError('Ragged mask is not allowed with non-ragged inputs.')

    return values, mask