Example #1
0
 def __call__(self, inputs, state, scope=None):
   """Long short-term memory cell with attention (LSTMA)."""
   with vs.variable_scope(scope or type(self).__name__):
     if self._state_is_tuple:
       state, attns, attn_states = state
     else:
       states = state
       state = array_ops.slice(states, [0, 0], [-1, self._cell.state_size])
       attns = array_ops.slice(
           states, [0, self._cell.state_size], [-1, self._attn_size])
       attn_states = array_ops.slice(
           states, [0, self._cell.state_size + self._attn_size],
           [-1, self._attn_size * self._attn_length])
     attn_states = array_ops.reshape(attn_states,
                                     [-1, self._attn_length, self._attn_size])
     input_size = self._input_size
     if input_size is None:
       input_size = inputs.get_shape().as_list()[1]
     inputs = _linear([inputs, attns], input_size, True)
     lstm_output, new_state = self._cell(inputs, state)
     if self._state_is_tuple:
       new_state_cat = array_ops.concat(1, _unpacked_state(new_state))
     else:
       new_state_cat = new_state
     new_attns, new_attn_states = self._attention(new_state_cat, attn_states)
     with vs.variable_scope("AttnOutputProjection"):
       output = _linear([lstm_output, new_attns], self._attn_size, True)
     new_attn_states = array_ops.concat(1, [new_attn_states,
                                            array_ops.expand_dims(output, 1)])
     new_attn_states = array_ops.reshape(
         new_attn_states, [-1, self._attn_length * self._attn_size])
     new_state = (new_state, new_attns, new_attn_states)
     if not self._state_is_tuple:
       new_state = array_ops.concat(1, list(new_state))
     return output, new_state
Example #2
0
 def _testGradientsSimple(self, use_gpu):
   # Test both positive and negative concat axis.
   # -2 and 1 correspond to the same axis for 3-dimensional tensors.
   for axis in [-2, 1]:
     with self.test_session(use_gpu=use_gpu):
       inp = []
       inp_tensors = []
       for x in [1, 2, 6]:
         shape = [10, x, 2]
         t = np.random.rand(*shape).astype("f")
         inp.append(t)
         inp_tensors.append(
             constant_op.constant(
                 [float(y) for y in t.flatten()],
                 shape=shape,
                 dtype=dtypes.float32))
       c = array_ops.concat(inp_tensors, axis)
       output_shape = [10, 9, 2]
       grad_inp = np.random.rand(*output_shape).astype("f")
       grad_tensor = constant_op.constant(
           [float(x) for x in grad_inp.flatten()], shape=output_shape)
       grad = gradients_impl.gradients([c], inp_tensors, [grad_tensor])
       concated_grad = array_ops.concat(grad, axis)
       result = concated_grad.eval()
   self.assertAllEqual(result, grad_inp)
Example #3
0
def _build_recursive_hd_scatter(input_tensors, devices):
  """Construct the scatter phase of recursive halving-doublng all-reduce.

  Args:
    input_tensors: list of T `tf.Tensor` that are fully-reduced shards.
    devices: a list of strings naming the devices on which the reconstituted
      full tensors should be placed.

  Returns:
    list of T `tf.Tensor` which are the fully reduced tensors.
  """
  num_devices = len(devices)
  num_hops = int(math.log(num_devices, 2))
  assert num_devices == (2 ** num_hops), "num_devices must be a power of 2"
  chunks = input_tensors
  for h in reversed(range(0, num_hops)):
    span = 2 ** h
    group_size = span * 2
    new_chunks = [[] for _ in devices]
    for d in range(0, num_devices):
      if (d % group_size) >= (group_size / 2):
        # skip right half of a pair
        continue
      left_idx = d
      right_idx = d + span
      left_dev = devices[left_idx]
      right_dev = devices[right_idx]
      with ops.device(left_dev):
        new_chunks[left_idx] = array_ops.concat([chunks[left_idx],
                                                 chunks[right_idx]], 0)
      with ops.device(right_dev):
        new_chunks[right_idx] = array_ops.concat([chunks[left_idx],
                                                  chunks[right_idx]], 0)
    chunks = new_chunks
  return chunks
Example #4
0
 def testConcatTuple(self):
   c1 = np.random.rand(4, 4)
   c2 = np.random.rand(4, 4)
   with self.test_session():
     concat_list_t = array_ops.concat(0, [c1, c2])
     concat_tuple_t = array_ops.concat(0, (c1, c2))
     self.assertAllEqual(concat_list_t.eval(), concat_tuple_t.eval())
Example #5
0
    def circular_pad(input_, width, kernel_size):
      """Pad input_ for computing (circular) convolution.

      Args:
        input_: the input tensor
        width: the width of the tensor.
        kernel_size: the kernel size of the filter.
      Returns:
        a tensor whose width is (width + kernel_size - 1).
      """
      beginning = kernel_size // 2
      end = kernel_size - 1 - beginning

      tmp_up = array_ops.slice(input_, [0, width - beginning, 0, 0],
                               [-1, beginning, width, -1])
      tmp_down = array_ops.slice(input_, [0, 0, 0, 0], [-1, end, width, -1])
      tmp = array_ops.concat([tmp_up, input_, tmp_down], 1)

      new_width = width + kernel_size - 1
      tmp_left = array_ops.slice(tmp, [0, 0, width - beginning, 0],
                                 [-1, new_width, beginning, -1])
      tmp_right = array_ops.slice(tmp, [0, 0, 0, 0], [-1, new_width, end, -1])

      final = array_ops.concat([tmp_left, tmp, tmp_right], 2)
      return final
 def testConcatTuple(self):
   c1 = np.random.rand(4, 4)
   c2 = np.random.rand(4, 4)
   with self.cached_session():
     concat_list_t = array_ops.concat([c1, c2], 0)
     concat_tuple_t = array_ops.concat((c1, c2), 0)
     self.assertAllEqual(concat_list_t.eval(), self.evaluate(concat_tuple_t))
 def testConcatNoScalars(self):
   with self.cached_session():
     scalar = constant_op.constant(7)
     dim = array_ops.placeholder(dtypes.int32)
     with self.assertRaisesRegexp(
         ValueError, r"Can't concatenate scalars \(use tf\.stack instead\)"):
       array_ops.concat([scalar, scalar, scalar], dim)
  def testGradientsLastDim(self):
    # Test both positive and negative concat axis.
    # -1 and 2 correspond to the same axis for 3-dimensional tensors.
    for axis in [-1, 2]:
      with self.cached_session(use_gpu=True):
        inp = []
        inp_tensors = []
        for x in [1, 2, 6]:
          shape = [10, 2, x]
          t = np.random.rand(*shape).astype("f")
          inp.append(t)
          inp_tensors.append(
              constant_op.constant(
                  t.flatten(),
                  shape=shape,
                  dtype=dtypes.float32))
        c = array_ops.concat(inp_tensors, 2)
        output_shape = [10, 2, 9]
        grad_inp = np.random.rand(*output_shape).astype("f")
        grad_tensor = constant_op.constant(
            grad_inp.flatten(), shape=output_shape)
        grad = gradients_impl.gradients([c], inp_tensors, [grad_tensor])
        concated_grad = array_ops.concat(grad, axis)
        result = self.evaluate(concated_grad)

    self.assertAllEqual(result, grad_inp)
  def _RunAndVerifyGradientsRandom(self):
    # Random dims of rank 5
    input_shape = np.random.randint(1, 5, size=5)
    # Random number of tensors
    num_tensors = np.random.randint(12, 20)
    # Random dim to concat on
    concat_dim = np.random.randint(5)
    concat_dim_sizes = np.random.randint(1, 5, size=num_tensors)
    with self.cached_session(use_gpu=True):
      inp = []
      inp_tensors = []
      for x in concat_dim_sizes:
        shape = input_shape
        shape[concat_dim] = x
        t = np.random.rand(*shape).astype("f")
        inp.append(t)
        inp_tensors.append(
            constant_op.constant(t.flatten(), shape=shape,
                                 dtype=dtypes.float32))
      c = array_ops.concat(inp_tensors, concat_dim)
      output_shape = input_shape
      output_shape[concat_dim] = concat_dim_sizes.sum()
      grad_inp = np.random.rand(*output_shape).astype("f")
      grad_tensor = constant_op.constant(grad_inp.flatten(), shape=output_shape)
      grad = gradients_impl.gradients([c], inp_tensors, [grad_tensor])
      concated_grad = array_ops.concat(grad, concat_dim)
      result = self.evaluate(concated_grad)

    self.assertAllEqual(result, grad_inp)
 def _entropy(self):
   if (not self.distribution.is_continuous or
       not self.bijector.is_constant_jacobian):
     raise NotImplementedError("entropy is not implemented")
   # Suppose Y = g(X) where g is a diffeomorphism and X is a continuous rv. It
   # can be shown that:
   #   H[Y] = H[X] + E_X[(log o abs o det o J o g)(X)].
   # If is_constant_jacobian then:
   #   E_X[(log o abs o det o J o g)(X)] = (log o abs o det o J o g)(c)
   # where c can by anything.
   entropy = self.distribution.entropy()
   if self._is_maybe_event_override:
     # H[X] = sum_i H[X_i] if X_i are mutually independent.
     # This means that a reduce_sum is a simple rescaling.
     entropy *= math_ops.cast(math_ops.reduce_prod(self._override_event_shape),
                              dtype=entropy.dtype.base_dtype)
   if self._is_maybe_batch_override:
     new_shape = array_ops.concat([
         _ones_like(self._override_batch_shape),
         self.distribution.batch_shape_tensor()
     ], 0)
     entropy = array_ops.reshape(entropy, new_shape)
     multiples = array_ops.concat([
         self._override_batch_shape,
         _ones_like(self.distribution.batch_shape_tensor())
     ], 0)
     entropy = array_ops.tile(entropy, multiples)
   dummy = array_ops.zeros([], self.dtype)
   entropy -= self.bijector.inverse_log_det_jacobian(dummy)
   entropy.set_shape(self.batch_shape)
   return entropy
 def _testGradientsSimple(self, dtype):
   # Test both positive and negative concat axis.
   # -2 and 1 correspond to the same axis for 3-dimensional tensors.
   for axis in [-2, 1]:
     with self.cached_session(use_gpu=True):
       inp = []
       inp_tensors = []
       for x in [1, 2, 6]:
         shape = [10, x, 2]
         t = np.random.rand(*shape).astype(dtype.as_numpy_dtype)
         if dtype.is_complex:
           t += -1j * t
         inp.append(t)
         inp_tensors.append(
             constant_op.constant(
                 t.flatten(),
                 shape=shape,
                 dtype=dtype))
       c = array_ops.concat(inp_tensors, axis)
       output_shape = [10, 9, 2]
       grad_inp = np.random.rand(*output_shape).astype(dtype.as_numpy_dtype)
       if dtype.is_complex:
         grad_inp += -1j * grad_inp
       grad_tensor = constant_op.constant(
           grad_inp.flatten(), shape=output_shape)
       grad = gradients_impl.gradients([c], inp_tensors, [grad_tensor])
       concated_grad = array_ops.concat(grad, axis)
       result = self.evaluate(concated_grad)
   self.assertAllEqual(result, grad_inp)
Example #12
0
def power_sums_tensor(array_size, power_matrix, multiplier):
  r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..(array_size + 1).

  Args:
    array_size: The number of non-trivial sums to pre-compute.
    power_matrix: The "A" matrix above.
    multiplier: The "B" matrix above
  Returns:
    A Tensor with S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T
      S[0] is the zero matrix
      S[1] is B
      S[2] is A B A^T + B
      ...and so on
  """
  array_size = math_ops.cast(array_size, dtypes.int32)
  power_matrix = ops.convert_to_tensor(power_matrix)
  identity_like_power_matrix = linalg_ops.eye(
      array_ops.shape(power_matrix)[0], dtype=power_matrix.dtype)
  identity_like_power_matrix.set_shape(
      ops.convert_to_tensor(power_matrix).get_shape())
  transition_powers = functional_ops.scan(
      lambda previous_power, _: math_ops.matmul(previous_power, power_matrix),
      math_ops.range(array_size - 1),
      initializer=identity_like_power_matrix)
  summed = math_ops.cumsum(
      array_ops.concat([
          array_ops.expand_dims(multiplier, 0), math_ops.matmul(
              batch_times_matrix(transition_powers, multiplier),
              transition_powers,
              adjoint_b=True)
      ], 0))
  return array_ops.concat(
      [array_ops.expand_dims(array_ops.zeros_like(multiplier), 0), summed], 0)
Example #13
0
    def _testConfMatrixOnTensors(self, tf_dtype, np_dtype):
        with self.test_session() as sess:
            m_neg = array_ops.placeholder(dtype=dtypes.float32)
            m_pos = array_ops.placeholder(dtype=dtypes.float32)
            s = array_ops.placeholder(dtype=dtypes.float32)

            neg = random_ops.random_normal([20], mean=m_neg, stddev=s, dtype=dtypes.float32)
            pos = random_ops.random_normal([20], mean=m_pos, stddev=s, dtype=dtypes.float32)

            data = array_ops.concat([neg, pos], 0)
            data = math_ops.cast(math_ops.round(data), tf_dtype)
            data = math_ops.minimum(math_ops.maximum(data, 0), 1)
            lab = array_ops.concat([array_ops.zeros([20], dtype=tf_dtype), array_ops.ones([20], dtype=tf_dtype)], 0)

            cm = confusion_matrix.confusion_matrix(lab, data, dtype=tf_dtype, num_classes=2)

            d, l, cm_out = sess.run([data, lab, cm], {m_neg: 0.0, m_pos: 1.0, s: 1.0})

            truth = np.zeros([2, 2], dtype=np_dtype)
            try:
                range_builder = xrange
            except NameError:  # In Python 3.
                range_builder = range
            for i in range_builder(len(d)):
                truth[l[i], d[i]] += 1

            self.assertEqual(cm_out.dtype, np_dtype)
            self.assertAllClose(cm_out, truth, atol=1e-10)
Example #14
0
 def testConcatTuple(self):
   c1 = np.random.rand(4, 4)
   c2 = np.random.rand(4, 4)
   concat_list_t = array_ops.concat([c1, c2], 0)
   concat_tuple_t = array_ops.concat((c1, c2), 0)
   self.assertAllEqual(
       self.evaluate(concat_list_t), self.evaluate(concat_tuple_t))
Example #15
0
def _BiasAddGradGrad(op, received_grad):
  """Gradient for the BiasAddGrad op.

  Args:
    op: BiasAddGrad op for which we are calculating gradients.
    received_grad: The gradients passed to the BiasAddGrad op.

  Returns:
    A single gradient Tensor for the input to BiasAddGrad (which
    is the gradient of the bias term in BiasAdd)
  """

  try:
    data_format = op.get_attr("data_format")
  except ValueError:
    data_format = None

  shape = array_ops.shape(op.inputs[0])
  rank = array_ops.rank(op.inputs[0])
  bias_shape = array_ops.shape(received_grad)

  if data_format == b"NCHW":
    expanded_shape = array_ops.concat([
        array_ops.ones_like(shape[:-3]), bias_shape,
        array_ops.ones_like(shape[-2:])
    ], 0)
    tile_mults = array_ops.concat([shape[:-3], [1], shape[-2:]], 0)
  else:
    expanded_shape = array_ops.concat(
        [array_ops.ones_like(shape[:-1]), bias_shape], 0)
    tile_mults = array_ops.concat([shape[:-1], [1]], 0)

  expanded_grad = array_ops.reshape(received_grad, expanded_shape)
  return array_ops.tile(expanded_grad, tile_mults)
Example #16
0
  def _sample_n(self, n, seed):
    batch_shape = self.batch_shape_tensor()
    event_shape = self.event_shape_tensor()
    batch_ndims = array_ops.shape(batch_shape)[0]

    ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
    shape = array_ops.concat([[n], batch_shape, event_shape], 0)

    # Complexity: O(nbk**2)
    x = random_ops.random_normal(shape=shape,
                                 mean=0.,
                                 stddev=1.,
                                 dtype=self.dtype,
                                 seed=seed)

    # Complexity: O(nbk)
    # This parametrization is equivalent to Chi2, i.e.,
    # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
    expanded_df = self.df * array_ops.ones(
        self.scale_operator.batch_shape_tensor(),
        dtype=self.df.dtype.base_dtype)
    g = random_ops.random_gamma(shape=[n],
                                alpha=self._multi_gamma_sequence(
                                    0.5 * expanded_df, self.dimension),
                                beta=0.5,
                                dtype=self.dtype,
                                seed=distribution_util.gen_new_seed(
                                    seed, "wishart"))

    # Complexity: O(nbk**2)
    x = array_ops.matrix_band_part(x, -1, 0)  # Tri-lower.

    # Complexity: O(nbk)
    x = array_ops.matrix_set_diag(x, math_ops.sqrt(g))

    # Make batch-op ready.
    # Complexity: O(nbk**2)
    perm = array_ops.concat([math_ops.range(1, ndims), [0]], 0)
    x = array_ops.transpose(x, perm)
    shape = array_ops.concat([batch_shape, [event_shape[0]], [-1]], 0)
    x = array_ops.reshape(x, shape)

    # Complexity: O(nbM) where M is the complexity of the operator solving a
    # vector system. E.g., for LinearOperatorDiag, each matmul is O(k**2), so
    # this complexity is O(nbk**2). For LinearOperatorLowerTriangular,
    # each matmul is O(k^3) so this step has complexity O(nbk^3).
    x = self.scale_operator.matmul(x)

    # Undo make batch-op ready.
    # Complexity: O(nbk**2)
    shape = array_ops.concat([batch_shape, event_shape, [n]], 0)
    x = array_ops.reshape(x, shape)
    perm = array_ops.concat([[ndims - 1], math_ops.range(0, ndims - 1)], 0)
    x = array_ops.transpose(x, perm)

    if not self.cholesky_input_output_matrices:
      # Complexity: O(nbk^3)
      x = math_ops.matmul(x, x, adjoint_b=True)

    return x
Example #17
0
def _SparseDenseCwiseMulOrDivGrad(op, grad, is_mul):
  """Common code for SparseDenseCwise{Mul,Div} gradients."""
  x_indices = op.inputs[0]
  x_shape = op.inputs[2]
  y = op.inputs[3]

  y_shape = math_ops.to_int64(array_ops.shape(y))
  num_added_dims = array_ops.expand_dims(
      array_ops.size(x_shape) - array_ops.size(y_shape), 0)
  augmented_y_shape = array_ops.concat(
      [array_ops.ones(num_added_dims, ops.dtypes.int64), y_shape], 0)

  scaling = x_shape // augmented_y_shape
  scaled_indices = x_indices // scaling
  scaled_indices = array_ops.slice(scaled_indices,
                                   array_ops.concat([[0], num_added_dims], 0),
                                   [-1, -1])
  dense_vals = array_ops.gather_nd(y, scaled_indices)

  if is_mul:
    dx = grad * dense_vals
    dy_val = grad * op.inputs[1]
  else:
    dx = grad / dense_vals
    dy_val = grad * (-op.inputs[1] / math_ops.square(dense_vals))
  # indices can repeat after scaling, so we can't use sparse_to_dense().
  dy = sparse_ops.sparse_add(
      array_ops.zeros_like(y),
      sparse_tensor.SparseTensor(scaled_indices, dy_val, y_shape))

  # (sp_indices, sp_vals, sp_shape, dense)
  return (None, dx, None, dy)
  def _matmul(self, x, adjoint=False, adjoint_arg=False):
    if self._assert_proper_shapes:
      x = linalg.adjoint(x) if adjoint_arg else x
      aps = linear_operator_util.assert_compatible_matrix_dimensions(self, x)
      x = control_flow_ops.with_dependencies([aps], x)
    if self.is_square:
      # Note that adjoint has no effect since this matrix is self-adjoint.
      if adjoint_arg:
        output_shape = array_ops.concat([
            array_ops.shape(x)[:-2],
            [array_ops.shape(x)[-1], array_ops.shape(x)[-2]]], axis=0)
      else:
        output_shape = array_ops.shape(x)

      return self._possibly_broadcast_batch_shape(
          array_ops.zeros(shape=output_shape, dtype=x.dtype))

    x_shape = array_ops.shape(x)
    n = self._num_columns if adjoint else self._num_rows
    m = x_shape[-2] if adjoint_arg else x_shape[-1]

    output_shape = array_ops.concat([x_shape[:-2], [n, m]], axis=0)

    zeros = array_ops.zeros(shape=output_shape, dtype=x.dtype)
    return self._possibly_broadcast_batch_shape(zeros)
  def test_round_robin_placement(self):
    ps_devices = [
        '/device:GPU:0', '/device:GPU:1', '/device:GPU:3', '/device:GPU:4'
    ]
    round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices))

    local_device_setter = replicate_model_fn._local_device_setter(
        ps_devices=ps_devices,
        ps_strategy=round_robin,
        worker_device='/device:GPU:2')

    with ops_lib.device(local_device_setter):
      a = variables.Variable(0.01)
      self.assertEqual('/device:GPU:0', a.device)

      b = variables.Variable(0.02)
      self.assertEqual('/device:GPU:1', b.device)

      c = variables.Variable(0.03)
      self.assertEqual('/device:GPU:3', c.device)

      a_op = array_ops.concat(a, axis=0)
      self.assertEqual('/device:GPU:2', a_op.device)

      b_op = array_ops.concat(b, axis=0)
      self.assertEqual('/device:GPU:2', b_op.device)

      c = variables.Variable(0.03)
      self.assertEqual('/device:GPU:4', c.device)

      d = variables.Variable(0.03)
      self.assertEqual('/device:GPU:0', d.device)

      c_op = array_ops.concat(c, axis=0)
      self.assertEqual('/device:GPU:2', c_op.device)
Example #20
0
  def _format_for_tpu_embedding_sparse_batch(self, sparse_features):
    """Format sparse features for `enqueue_tpu_embedding_sparse_batch()`.

    Args:
      sparse_features: a `Dict` of `SparseTensor`s for embedding.

    Returns:
      Arguments for `enqueue_tpu_embedding_sparse_batch()`.
    """

    sample_idcs, embedding_idcs, aggregation_weights = list(), list(), list()
    for table in self._table_to_features_dict:
      sample_t, indices_t, weights_t = list(), list(), list()

      features = self._table_to_features_dict[table]
      for i, feature in enumerate(features):
        tensor = sparse_features[feature]
        sample_indices = tensor.indices[:, 0]
        embedding_indices = tensor.values
        weights = array_ops.ones_like(embedding_indices)
        sample_t.append(i * self._batch_size_per_core + sample_indices)
        indices_t.append(embedding_indices)
        weights_t.append(weights)

      sample_idcs.append(
          math_ops.cast(array_ops.concat(sample_t, axis=0), dtype=dtypes.int32))
      embedding_idcs.append(
          math_ops.cast(
              array_ops.concat(indices_t, axis=0), dtype=dtypes.int32))
      aggregation_weights.append(
          math_ops.cast(
              array_ops.concat(weights_t, axis=0), dtype=dtypes.float32))

    return sample_idcs, embedding_idcs, aggregation_weights
Example #21
0
def _get_batch(per_class_queues, probs, batch_size):
  """Generates batches according to per-class-probabilities."""
  num_classes = probs.size
  # Number of examples per class is governed by a multinomial distribution.
  # Note: multinomial takes unnormalized log probabilities for its first
  # argument, of dimension [batch_size, num_classes].
  examples = random_ops.multinomial(
      np.expand_dims(np.log(probs), 0), batch_size)

  # Prepare the data and label batches.
  val_list = []
  label_list = []
  for i in range(num_classes):
    num_examples = math_ops.reduce_sum(
        math_ops.cast(math_ops.equal(examples, i), dtypes.int32))
    val_list.append(per_class_queues[i].dequeue_many(num_examples))
    label_list.append(array_ops.ones([num_examples], dtype=dtypes.int32) * i)

  # Create a tensor of labels.
  batch_labels = array_ops.concat(0, label_list)
  batch_labels.set_shape([batch_size])

  # Debug instrumentation.
  sample_tags = ['stratified_sample/samples_class%i' % i for i in
                 range(num_classes)]
  logging_ops.scalar_summary(sample_tags, math_ops.reduce_sum(
      array_ops.one_hot(batch_labels, num_classes), 0))

  return array_ops.concat(0, val_list), batch_labels
  def quantiles_ready():
    """The subgraph for when the quantiles are ready."""
    quantized_feature = quantile_ops.quantiles([sparse_column_values], [],
                                               [quantile_buckets], [])
    quantized_feature = math_ops.cast(quantized_feature[0], dtypes.int64)
    quantized_feature = array_ops.reshape(quantized_feature, [-1])
    example_indices, _ = array_ops.split(
        sparse_column_indices, num_or_size_splits=2, axis=1)
    example_indices = array_ops.squeeze(example_indices, [1])
    filtered_gradients = array_ops.gather(gradients, example_indices)
    filtered_hessians = array_ops.gather(hessians, example_indices)
    filtered_partition_ids = array_ops.gather(example_partition_ids,
                                              example_indices)
    unique_partitions, mapped_partitions = array_ops.unique(
        example_partition_ids)

    # Compute aggregate stats for each partition.
    per_partition_gradients = math_ops.unsorted_segment_sum(
        gradients, mapped_partitions, array_ops.size(unique_partitions))
    per_partition_hessians = math_ops.unsorted_segment_sum(
        hessians, mapped_partitions, array_ops.size(unique_partitions))

    # Prepend a bias feature per partition that accumulates the stats for all
    # examples in that partition.
    bias_feature_ids = array_ops.fill(
        array_ops.shape(unique_partitions), _BIAS_FEATURE_ID)
    bias_feature_ids = math_ops.cast(bias_feature_ids, dtypes.int64)
    partition_ids = array_ops.concat(
        [unique_partitions, filtered_partition_ids], 0)
    filtered_gradients = array_ops.concat(
        [per_partition_gradients, filtered_gradients], 0)
    filtered_hessians = array_ops.concat(
        [per_partition_hessians, filtered_hessians], 0)
    bucket_ids = array_ops.concat([bias_feature_ids, quantized_feature], 0)
    return partition_ids, bucket_ids, filtered_gradients, filtered_hessians
Example #23
0
def _concat_along_batch_dim(tensor_list):
  """Concatenate tensors along batch (first) dimension.

  Args:
    tensor_list: list of Tensors or list of tuples of Tensors.

  Returns:
    Tensor or tuple of Tensors.

  Raises:
    ValueError: If 'tensor_list' is empty.

  """
  if not tensor_list:
    raise ValueError(
        "Cannot concatenate Tensors if there are no Tensors to concatenate.")

  if isinstance(tensor_list[0], (tuple, list)):
    # [(tensor1a, tensor1b),
    #  (tensor2a, tensor2b), ...] --> (tensor_a, tensor_b)
    return tuple(
        array_ops.concat(tensors, axis=0) for tensors in zip(*tensor_list))
  else:
    # [tensor1, tensor2] --> tensor
    return array_ops.concat(tensor_list, axis=0)
Example #24
0
  def _forward(self, x):
    y = x
    # Pad the event_ndims with a zeros vector. We need this because it lets
    # us infer the scale in the inverse function.
    if self._static_event_ndims == 0:
      y = array_ops.expand_dims(y, dim=-1)
      zeros = array_ops.zeros_like(y)
    else:
      shape = array_ops.concat(0, (array_ops.shape(x)[:-1], [1]))
      zeros = array_ops.zeros(shape, dtype=y.dtype)
    y = array_ops.concat(array_ops.rank(y)-1, (y, zeros))

    # Set shape hints.
    if x.get_shape().ndims is not None:
      shape = x.get_shape().as_list()
      if self._static_event_ndims == 0:
        shape += [2]
      elif shape[-1] is not None:
        shape[-1] += 1
      shape = tensor_shape.TensorShape(shape)
      y.get_shape().assert_is_compatible_with(shape)
      y.set_shape(shape)

    # Since we only support event_ndims in [0, 1] and we do padding, we always
    # reduce over the last dimension, i.e., dim=-1 (which is the default).
    return nn_ops.softmax(y)
 def _get_sparse_tensors(self, inputs, weight_collections=None,
                         trainable=None):
   sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)
   id_tensor = sparse_tensors.id_tensor
   weight_tensor = sparse_tensors.weight_tensor
   # Expands final dimension, so that embeddings are not combined during
   # embedding lookup.
   check_id_rank = check_ops.assert_equal(
       array_ops.rank(id_tensor), 2,
       data=[
           'Column {} expected ID tensor of rank 2. '.format(self.name),
           'id_tensor shape: ', array_ops.shape(id_tensor)])
   with ops.control_dependencies([check_id_rank]):
     id_tensor = sparse_ops.sparse_reshape(
         id_tensor,
         shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0))
   if weight_tensor is not None:
     check_weight_rank = check_ops.assert_equal(
         array_ops.rank(weight_tensor), 2,
         data=[
             'Column {} expected weight tensor of rank 2.'.format(self.name),
             'weight_tensor shape:', array_ops.shape(weight_tensor)])
     with ops.control_dependencies([check_weight_rank]):
       weight_tensor = sparse_ops.sparse_reshape(
           weight_tensor,
           shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0))
   return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
Example #26
0
  def testZerosCacheDoesntLeakAcrossModes(self):
    with ops.Graph().as_default():
      t = random_ops.random_normal(shape=[100, 2])
      x = random_ops.random_normal(shape=[100, 4])
      dy = random_ops.random_normal(shape=[100, 4])
      with backprop.GradientTape() as gradient_tape:
        gradient_tape.watch(x)
        x1, _ = array_ops.split(x, num_or_size_splits=2, axis=1)
        y1 = x1 ** 2.
        y = array_ops.concat([y1, t], axis=1)

      dx = gradient_tape.gradient(y, x, output_gradients=dy)
      with self.test_session() as sess:
        sess.run(variables.global_variables_initializer())
        sess.run(dx)

    t = random_ops.random_normal(shape=[100, 2])
    x = random_ops.random_normal(shape=[100, 4])
    dy = random_ops.random_normal(shape=[100, 4])
    with backprop.GradientTape() as gradient_tape:
      gradient_tape.watch(x)
      x1, _ = array_ops.split(x, num_or_size_splits=2, axis=1)
      y1 = x1 ** 2.
      y = array_ops.concat([y1, t], axis=1)

    dx = gradient_tape.gradient(y, x, output_gradients=dy)
  def _to_dense(self):
    num_cols = 0
    rows = []
    broadcasted_blocks = [operator.to_dense() for operator in self.operators]
    broadcasted_blocks = linear_operator_util.broadcast_matrix_batch_dims(
        broadcasted_blocks)
    for block in broadcasted_blocks:
      batch_row_shape = array_ops.shape(block)[:-1]

      zeros_to_pad_before_shape = array_ops.concat(
          [batch_row_shape, [num_cols]], axis=-1)
      zeros_to_pad_before = array_ops.zeros(
          shape=zeros_to_pad_before_shape, dtype=block.dtype)
      num_cols += array_ops.shape(block)[-1]
      zeros_to_pad_after_shape = array_ops.concat(
          [batch_row_shape,
           [self.domain_dimension_tensor() - num_cols]], axis=-1)
      zeros_to_pad_after = array_ops.zeros(
          shape=zeros_to_pad_after_shape, dtype=block.dtype)

      rows.append(array_ops.concat(
          [zeros_to_pad_before, block, zeros_to_pad_after], axis=-1))

    mat = array_ops.concat(rows, axis=-2)
    mat.set_shape(self.shape)
    return mat
Example #28
0
  def sample(self, n, seed=None, name="sample"):
    """Generate `n` samples.

    Args:
      n: scalar.  Number of samples to draw from each distribution.
      seed: Python integer seed for RNG.
      name: name to give to the op.

    Returns:
      samples: a `Tensor` of shape `(n,) + self.batch_shape` with values of type
          `self.dtype`.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self.p, n], name):
        n = ops.convert_to_tensor(n, name="n")
        p_2d = array_ops.reshape(self.p, array_ops.pack([-1, 1]))
        q_2d = 1. - p_2d
        probs = array_ops.concat(1, [q_2d, p_2d])
        samples = random_ops.multinomial(math_ops.log(probs), n, seed=seed)
        ret = array_ops.reshape(
            array_ops.transpose(samples),
            array_ops.concat(0,
                             [array_ops.expand_dims(n, 0), self.batch_shape()]))
        ret.set_shape(tensor_shape.vector(tensor_util.constant_value(n))
                      .concatenate(self.get_batch_shape()))
        return math_ops.cast(ret, self.dtype)
def same_dynamic_shape(a, b):
  """Returns whether a and b have the same dynamic shape.

  Args:
    a: `Tensor`
    b: `Tensor`

  Returns:
    `Boolean` `Tensor` representing if both tensors have the same shape.
  """
  a = ops.convert_to_tensor(a, name="a")
  b = ops.convert_to_tensor(b, name="b")

  # One of the shapes isn't fully defined, so we need to use the dynamic
  # shape.
  return control_flow_ops.cond(
      math_ops.equal(array_ops.rank(a), array_ops.rank(b)),
      # Here we can't just do math_ops.equal(a.shape, b.shape), since
      # static shape inference may break the equality comparison between
      # shape(a) and shape(b) in math_ops.equal.
      lambda: math_ops.reduce_all(math_ops.equal(
          array_ops.concat((
              array_ops.shape(a),
              array_ops.shape(b)), 0),
          array_ops.concat((
              array_ops.shape(b),
              array_ops.shape(a)), 0))),
      lambda: constant_op.constant(False))
def ReferenceDepthwiseConv2D(input_tensor, filter_tensor, strides, padding,
                             data_format=None):
  # Reference implementation of depthwise convolution that uses regular
  # convolution.
  convs = []
  in_channels = filter_tensor.shape[2]
  # Use a custom implementation of depthwise conv2d using slicing.
  for channel in xrange(in_channels):
    # Slice the input along channel
    if data_format == "NCHW":
      input_slice = input_tensor[:, channel:channel+1, :, :]
    else:
      input_slice = input_tensor[:, :, :, channel:channel+1]

    # Slice the filters.  Filters are  H, W, InC, DepthMultiplier
    filter_slice = filter_tensor[:, :, channel:channel+1, :]
    # Do conv
    convs.append(nn_ops.conv2d(input_slice, filter_slice,
                               strides, padding,
                               data_format=data_format,
                               name="depthwise_slice_%d" % channel))

  # Concat along dimension.
  if data_format == "NCHW":
    return array_ops.concat(convs, 1)
  else:
    return array_ops.concat(convs, 3)
 def call(self, inputs):
   return concat([inputs, inputs], axis=-1)
def lu_solve(lower_upper, perm, rhs, validate_args=False, name=None):
    """Solves systems of linear eqns `A X = RHS`, given LU factorizations.

  Note: this function does not verify the implied matrix is actually invertible
  nor is this condition checked even when `validate_args=True`.

  Args:
    lower_upper: `lu` as returned by `tf.linalg.lu`, i.e., if `matmul(P,
      matmul(L, U)) = X` then `lower_upper = L + U - eye`.
    perm: `p` as returned by `tf.linag.lu`, i.e., if `matmul(P, matmul(L, U)) =
      X` then `perm = argmax(P)`.
    rhs: Matrix-shaped float `Tensor` representing targets for which to solve;
      `A X = RHS`. To handle vector cases, use: `lu_solve(..., rhs[...,
        tf.newaxis])[..., 0]`.
    validate_args: Python `bool` indicating whether arguments should be checked
      for correctness. Note: this function does not verify the implied matrix is
        actually invertible, even when `validate_args=True`.
      Default value: `False` (i.e., don't validate arguments).
    name: Python `str` name given to ops managed by this object.
      Default value: `None` (i.e., 'lu_solve').

  Returns:
    x: The `X` in `A @ X = RHS`.

  #### Examples

  ```python
  import numpy as np
  import tensorflow as tf
  import tensorflow_probability as tfp

  x = [[[1., 2],
        [3, 4]],
       [[7, 8],
        [3, 4]]]
  inv_x = tf.linalg.lu_solve(*tf.linalg.lu(x), rhs=tf.eye(2))
  tf.assert_near(tf.matrix_inverse(x), inv_x)
  # ==> True
  ```

  """

    with ops.name_scope(name or 'lu_solve'):
        lower_upper = ops.convert_to_tensor(lower_upper,
                                            dtype_hint=dtypes.float32,
                                            name='lower_upper')
        perm = ops.convert_to_tensor(perm,
                                     dtype_hint=dtypes.int32,
                                     name='perm')
        rhs = ops.convert_to_tensor(rhs,
                                    dtype_hint=lower_upper.dtype,
                                    name='rhs')

        assertions = _lu_solve_assertions(lower_upper, perm, rhs,
                                          validate_args)
        if assertions:
            with ops.control_dependencies(assertions):
                lower_upper = array_ops.identity(lower_upper)
                perm = array_ops.identity(perm)
                rhs = array_ops.identity(rhs)

        if (rhs.shape.rank == 2 and perm.shape.rank == 1):
            # Both rhs and perm have scalar batch_shape.
            permuted_rhs = array_ops.gather(rhs, perm, axis=-2)
        else:
            # Either rhs or perm have non-scalar batch_shape or we can't determine
            # this information statically.
            rhs_shape = array_ops.shape(rhs)
            broadcast_batch_shape = array_ops.broadcast_dynamic_shape(
                rhs_shape[:-2],
                array_ops.shape(perm)[:-1])
            d, m = rhs_shape[-2], rhs_shape[-1]
            rhs_broadcast_shape = array_ops.concat(
                [broadcast_batch_shape, [d, m]], axis=0)

            # Tile out rhs.
            broadcast_rhs = array_ops.broadcast_to(rhs, rhs_broadcast_shape)
            broadcast_rhs = array_ops.reshape(broadcast_rhs, [-1, d, m])

            # Tile out perm and add batch indices.
            broadcast_perm = array_ops.broadcast_to(perm,
                                                    rhs_broadcast_shape[:-1])
            broadcast_perm = array_ops.reshape(broadcast_perm, [-1, d])
            broadcast_batch_size = math_ops.reduce_prod(broadcast_batch_shape)
            broadcast_batch_indices = array_ops.broadcast_to(
                math_ops.range(broadcast_batch_size)[:, array_ops.newaxis],
                [broadcast_batch_size, d])
            broadcast_perm = array_ops.stack(
                [broadcast_batch_indices, broadcast_perm], axis=-1)

            permuted_rhs = array_ops.gather_nd(broadcast_rhs, broadcast_perm)
            permuted_rhs = array_ops.reshape(permuted_rhs, rhs_broadcast_shape)

        lower = set_diag(
            band_part(lower_upper, num_lower=-1, num_upper=0),
            array_ops.ones(array_ops.shape(lower_upper)[:-1],
                           dtype=lower_upper.dtype))
        return triangular_solve(
            lower_upper,  # Only upper is accessed.
            triangular_solve(lower, permuted_rhs),
            lower=False)
def matrix_exponential(input, name=None):  # pylint: disable=redefined-builtin
    r"""Computes the matrix exponential of one or more square matrices.

  exp(A) = \sum_{n=0}^\infty A^n/n!

  The exponential is computed using a combination of the scaling and squaring
  method and the Pade approximation. Details can be found in:
  Nicholas J. Higham, "The scaling and squaring method for the matrix
  exponential revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005.

  The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
  form square matrices. The output is a tensor of the same shape as the input
  containing the exponential for all input submatrices `[..., :, :]`.

  Args:
    input: A `Tensor`. Must be `float16`, `float32`, `float64`, `complex64`, or
      `complex128` with shape `[..., M, M]`.
    name:  A name to give this `Op` (optional).

  Returns:
    the matrix exponential of the input.

  Raises:
    ValueError: An unsupported type is provided as input.

  @compatibility(scipy)
  Equivalent to scipy.linalg.expm
  @end_compatibility
  """
    with ops.name_scope(name, 'matrix_exponential', [input]):
        matrix = ops.convert_to_tensor(input, name='input')
        if matrix.shape[-2:] == [0, 0]:
            return matrix
        batch_shape = matrix.shape[:-2]
        if not batch_shape.is_fully_defined():
            batch_shape = array_ops.shape(matrix)[:-2]

        # reshaping the batch makes the where statements work better
        matrix = array_ops.reshape(
            matrix,
            array_ops.concat(([-1], array_ops.shape(matrix)[-2:]), axis=0))
        l1_norm = math_ops.reduce_max(math_ops.reduce_sum(
            math_ops.abs(matrix),
            axis=array_ops.size(array_ops.shape(matrix)) - 2),
                                      axis=-1)[..., array_ops.newaxis,
                                               array_ops.newaxis]
        const = lambda x: constant_op.constant(x, l1_norm.dtype)

        def _nest_where(vals, cases):
            assert len(vals) == len(cases) - 1
            if len(vals) == 1:
                return array_ops.where_v2(
                    math_ops.less(l1_norm, const(vals[0])), cases[0], cases[1])
            else:
                return array_ops.where_v2(
                    math_ops.less(l1_norm, const(vals[0])), cases[0],
                    _nest_where(vals[1:], cases[1:]))

        if matrix.dtype in [dtypes.float16, dtypes.float32, dtypes.complex64]:
            maxnorm = const(3.925724783138660)
            squarings = math_ops.maximum(
                math_ops.floor(
                    math_ops.log(l1_norm / maxnorm) /
                    math_ops.log(const(2.0))), 0)
            u3, v3 = _matrix_exp_pade3(matrix)
            u5, v5 = _matrix_exp_pade5(matrix)
            u7, v7 = _matrix_exp_pade7(matrix / math_ops.cast(
                math_ops.pow(const(2.0), squarings), matrix.dtype))
            conds = (4.258730016922831e-001, 1.880152677804762e+000)
            u = _nest_where(conds, (u3, u5, u7))
            v = _nest_where(conds, (v3, v5, v7))
        elif matrix.dtype in [dtypes.float64, dtypes.complex128]:
            maxnorm = const(5.371920351148152)
            squarings = math_ops.maximum(
                math_ops.floor(
                    math_ops.log(l1_norm / maxnorm) /
                    math_ops.log(const(2.0))), 0)
            u3, v3 = _matrix_exp_pade3(matrix)
            u5, v5 = _matrix_exp_pade5(matrix)
            u7, v7 = _matrix_exp_pade7(matrix)
            u9, v9 = _matrix_exp_pade9(matrix)
            u13, v13 = _matrix_exp_pade13(matrix / math_ops.cast(
                math_ops.pow(const(2.0), squarings), matrix.dtype))
            conds = (1.495585217958292e-002, 2.539398330063230e-001,
                     9.504178996162932e-001, 2.097847961257068e+000)
            u = _nest_where(conds, (u3, u5, u7, u9, u13))
            v = _nest_where(conds, (v3, v5, v7, v9, v13))
        else:
            raise ValueError(
                'tf.linalg.expm does not support matrices of type %s' %
                matrix.dtype)
        numer = u + v
        denom = -u + v
        result = linalg_ops.matrix_solve(denom, numer)
        max_squarings = math_ops.reduce_max(squarings)

        i = const(0.0)
        c = lambda i, r: math_ops.less(i, max_squarings)

        def b(i, r):
            return i + 1, array_ops.where_v2(math_ops.less(i, squarings),
                                             math_ops.matmul(r, r), r)

        _, result = control_flow_ops.while_loop(c, b, [i, result])
        if not matrix.shape.is_fully_defined():
            return array_ops.reshape(
                result,
                array_ops.concat((batch_shape, array_ops.shape(result)[-2:]),
                                 axis=0))
        return array_ops.reshape(result,
                                 batch_shape.concatenate(result.shape[-2:]))
Example #34
0
 def Elman(theta, state0, inputs):
     h0, w, b, x = state0.h, theta.w, theta.b, inputs.x
     xw = math_ops.matmul(array_ops.concat([x, h0], axis=1), w)
     h1 = math_ops.sigmoid(xw + b)
     state1 = _ElmanState(h=h1)
     return (state1, state1)
Example #35
0
def _AggregatedGrads(grads, op, loop_state, aggregation_method=None):
  """Get the aggregated gradients for op.

  Args:
    grads: The map of memoized gradients.
    op: The op to get gradients for.
    loop_state: An object for maintaining the state of the while loops in the
                graph. It is of type ControlFlowState. None if the graph
                contains no while loops.
    aggregation_method: Specifies the method used to combine gradient terms.
      Accepted values are constants defined in the class `AggregationMethod`.

  Returns:
    A list of gradients, one per each output of `op`. If the gradients
      for a particular output is a list, this function aggregates it
      before returning.

  Raises:
    TypeError: if the incoming grads are not Tensors or IndexedSlices.
    ValueError: if the arguments are invalid.

  """
  if aggregation_method is None:
    aggregation_method = AggregationMethod.DEFAULT
  if aggregation_method not in [
      AggregationMethod.ADD_N, AggregationMethod.EXPERIMENTAL_TREE,
      AggregationMethod.EXPERIMENTAL_ACCUMULATE_N
  ]:
    raise ValueError("Invalid aggregation_method specified %s." %
                     aggregation_method)
  out_grads = _GetGrads(grads, op)
  for i, out_grad in enumerate(out_grads):
    if loop_state:
      if isinstance(out_grad, (ops.Tensor, ops.IndexedSlices)):
        assert control_flow_ops.IsLoopSwitch(op)
        continue
    # Grads have to be Tensors or IndexedSlices
    if (isinstance(out_grad, collections.Sequence) and not all([
        isinstance(g, (ops.Tensor, ops.IndexedSlices)) for g in out_grad
        if g is not None
    ])):
      raise TypeError("gradients have to be either all Tensors "
                      "or all IndexedSlices")
    # Aggregate multiple gradients, and convert [] to None.
    if out_grad:
      if len(out_grad) < 2:
        used = "nop"
        out_grads[i] = out_grad[0]
      elif all([isinstance(g, ops.Tensor) for g in out_grad if g is not None]):
        tensor_shape = _AccumulatorShape(out_grad)
        if (aggregation_method == AggregationMethod.EXPERIMENTAL_ACCUMULATE_N
            and len(out_grad) > 2 and tensor_shape.is_fully_defined()):
          # The benefit of using AccumulateN is that its inputs can be combined
          # in any order and this can allow the expression to be evaluated with
          # a smaller memory footprint.  When used with gpu_allocator_retry,
          # it is possible to compute a sum of terms which are much larger than
          # total GPU memory.
          # AccumulateN can currently only be used if we know the shape for
          # an accumulator variable.  If this is not known, or if we only have
          # 2 grads then we fall through to the "tree" case below.
          used = "accumulate_n"
          out_grads[i] = math_ops.accumulate_n(out_grad)
        elif aggregation_method in [
            AggregationMethod.EXPERIMENTAL_TREE,
            AggregationMethod.EXPERIMENTAL_ACCUMULATE_N
        ]:
          # Aggregate all gradients by doing pairwise sums: this may
          # reduce performance, but it can improve memory because the
          # gradients can be released earlier.
          #
          # TODO(vrv): Consider replacing this with a version of
          # tf.AddN() that eagerly frees its inputs as soon as they are
          # ready, so the order of this tree does not become a problem.
          used = "tree"
          with ops.name_scope(op.name + "_gradient_sum"):
            running_sum = out_grad[0]
            for grad in out_grad[1:]:
              running_sum = math_ops.add_n([running_sum, grad])
            out_grads[i] = running_sum
        else:
          used = "add_n"
          out_grads[i] = _MultiDeviceAddN(out_grad)
        logging.vlog(2, "  _AggregatedGrads %d x %s using %s",
                     len(out_grad), tensor_shape, used)
      else:
        out_grad = math_ops._as_indexed_slices_list(
            [g for g in out_grad if g is not None])
        out_grad = [_HandleNestedIndexedSlices(x) for x in out_grad]
        # Form IndexedSlices out of the concatenated values and
        # indices.
        out_grads[i] = ops.IndexedSlices(
            array_ops.concat([x.values for x in out_grad], 0),
            array_ops.concat([x.indices for x in out_grad], 0),
            out_grad[0].dense_shape)
    else:  # not out_grad
      # out_grads[i] is [], thus its aggregation is simply None.
      out_grads[i] = None
  return out_grads
Example #36
0
 def reduce_fn(state, value):
   s1, s2 = state
   v1, v2 = value
   return array_ops.concat([s1, [v1]], 0), s2 + v2
Example #37
0
 def reduce_fn(x, _):
   # Statically known rank, but dynamic length.
   larger_dim = array_ops.concat([x[0], x[0]], 0)
   # Statically unknown rank.
   larger_rank = array_ops.expand_dims(x[1], 0)
   return larger_dim, larger_rank
Example #38
0
def _ConcatGrad(op, grad):
    """Gradient for concat op."""
    def _CreateDenseMaskAndBegin(sizes, concat_dim):
        """Create variables for iteratively slicing a dense gradients tensor."""
        # Since shape is 1-D, shape_of_shape = [rank-of-inputs]
        shape_of_shape = array_ops.shape(sizes[0])
        # Make a vector of length equal to the input's dimensions,
        # with 0's everywhere and 1 in the concat dim position.
        # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now)
        mask = array_ops.concat(0, [
            array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1],
            array_ops.fill(shape_of_shape - concat_dim - 1, 0)
        ])
        begin = array_ops.fill(shape_of_shape, 0)
        return mask, begin

    # Degenerate concatenation, just return grad.
    if len(op.inputs) == 2:
        return [None, grad]

    concat_dim = op.inputs[0]
    out_grads = []
    if isinstance(grad, ops.Tensor):
        # Get the inputs' tensor shapes
        sizes = array_ops.shape_n(op.inputs[1:])
        # pylint: disable=protected-access
        offset = gen_array_ops._concat_offset(concat_dim, sizes)
        # pylint: enable=protected-access
        for (begin, size) in zip(offset, sizes):
            out_grads.append(array_ops.slice(grad, begin, size))
    elif isinstance(grad, ops.IndexedSlices):
        concat_dim_static = tensor_util.constant_value(concat_dim)
        if concat_dim_static is None:
            raise ValueError("Can only compute IndexedSlices gradient with "
                             "statically-known concat_dim")
        # Get the inputs' tensor shapes
        sizes = [array_ops.shape(x) for x in op.inputs[1:]]
        if concat_dim_static > 0:
            # IndexedSlices, concat_dim > 0. Each input gets IndexedSlices gradients
            # with all the indices, but with grad.values sliced accordingly. This
            # is like the Tensor case, except shape(grad.values)[0] is not equal to
            # shape(sizes[i])[0], since only a subset of the dim-0 values are stored.
            mask, begin = _CreateDenseMaskAndBegin(sizes, concat_dim)
            for size in sizes:
                new_values = array_ops.slice(
                    grad.values, begin,
                    array_ops.concat(
                        0, [[-1], array_ops.slice(size, [1], [-1])]))
                out_grads.append(
                    ops.IndexedSlices(new_values, grad.indices, size))
                # Lint complains begin = begin + ...
                begin = math_ops.add(begin, size * mask)
        else:
            # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients
            # only for the relevant indices.
            start = constant_op.constant(0, dtype=grad.indices.dtype)
            for size in sizes:
                size_concat_dim = array_ops.gather(size, concat_dim)
                if size_concat_dim.dtype != grad.indices.dtype:
                    size_concat_dim = math_ops.cast(size_concat_dim,
                                                    dtype=grad.indices.dtype)
                end = start + size_concat_dim
                # Compute the 1-D Tensor of indices relevant for this input.
                indices_to_select = array_ops.squeeze(array_ops.where(
                    math_ops.logical_and(grad.indices >= start,
                                         grad.indices < end)),
                                                      squeeze_dims=[1])
                new_indices = array_ops.gather(grad.indices,
                                               indices_to_select) - start
                new_values = array_ops.gather(grad.values, indices_to_select)
                out_grads.append(
                    ops.IndexedSlices(new_values, new_indices, size))
                start = end
    else:
        raise TypeError("Expected Tensor or IndexedSlices, got %s" %
                        type(grad))

    return [None] + out_grads
    def call(self, inputs, state):
        dtype = inputs.dtype
        time_now_score = tf.expand_dims(inputs[:, -1], -1)
        time_last_score = tf.expand_dims(inputs[:, -2], -1)
        inputs = inputs[:, :-2]
        input_size = inputs.get_shape().with_rank(2)[1]
        # decay gates
        scope = variable_scope.get_variable_scope()
        with variable_scope.variable_scope(scope) as unit_scope:
            with variable_scope.variable_scope(unit_scope):
                self._time_input_w1 = variable_scope.get_variable(
                    "_time_input_w1", shape=[self._num_units], dtype=dtype)
                self._time_input_bias1 = variable_scope.get_variable(
                    "_time_input_bias1", shape=[self._num_units], dtype=dtype)
                self._time_input_w2 = variable_scope.get_variable(
                    "_time_input_w2", shape=[self._num_units], dtype=dtype)
                self._time_input_bias2 = variable_scope.get_variable(
                    "_time_input_bias2", shape=[self._num_units], dtype=dtype)
                self._time_kernel_w1 = variable_scope.get_variable(
                    "_time_kernel_w1", shape=[input_size, self._num_units], dtype=dtype)
                self._time_kernel_t1 = variable_scope.get_variable(
                    "_time_kernel_t1", shape=[self._num_units, self._num_units], dtype=dtype)
                self._time_bias1 = variable_scope.get_variable(
                    "_time_bias1", shape=[self._num_units], dtype=dtype)
                self._time_kernel_w2 = variable_scope.get_variable(
                    "_time_kernel_w2", shape=[input_size, self._num_units], dtype=dtype)
                self._time_kernel_t2 = variable_scope.get_variable(
                    "_time_kernel_t2", shape=[self._num_units, self._num_units], dtype=dtype)
                self._time_bias2 = variable_scope.get_variable(
                    "_time_bias2", shape=[self._num_units], dtype=dtype)
                #self._o_kernel_t1 = variable_scope.get_variable(
                    #"_o_kernel_t1", shape=[self._num_units, self._num_units], dtype=dtype)
                #self._o_kernel_t2 = variable_scope.get_variable(
                    #"_o_kernel_t2", shape=[self._num_units, self._num_units], dtype=dtype)
        #time_now_input = tf.nn.tanh(tf.log(1+time_now_score) * self._time_input_w1 + self._time_input_bias1)
        #time_last_input = tf.nn.tanh(tf.log(1+time_last_score) * self._time_input_w2 + self._time_input_bias2)
        time_now_input = tf.nn.tanh(time_now_score * self._time_input_w1 + self._time_input_bias1)
        time_last_input = tf.nn.tanh(time_last_score * self._time_input_w2 + self._time_input_bias2)

        time_now_state = math_ops.matmul(inputs, self._time_kernel_w1) + \
                         math_ops.matmul(time_now_input,self._time_kernel_t1) + self._time_bias1
        time_last_state = math_ops.matmul(inputs, self._time_kernel_w2) + \
                          math_ops.matmul(time_last_input,self._time_kernel_t2) + self._time_bias2

        gate_inputs = math_ops.matmul(
            array_ops.concat([inputs, state], 1), self._gate_kernel)
        gate_inputs = nn_ops.bias_add(gate_inputs, self._gate_bias)

        value = math_ops.sigmoid(gate_inputs)
        r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)

        r_state = r * state

        candidate = math_ops.matmul(
            array_ops.concat([inputs, r_state], 1), self._candidate_kernel)
        candidate = nn_ops.bias_add(candidate, self._candidate_bias)

        c = self._activation(candidate)
        #new_h = u * state * sigmoid(time_last_state) + (1 - u) * c * sigmoid(time_now_state)
        new_h = u * state * sigmoid(time_now_state) + (1 - u) * c * sigmoid(time_last_state)
        return new_h, new_h
Example #40
0
    def call(self, mel_input, state_mix):
        """
        Gated recurrent unit (GRU) with nunits cells.
        :param mel_input: coarse_input(cur) + fine_input(cur) + mel(cur) [teacher force] or mel(cur)
        :param state_mix: input(prev) + state
        :return: outputs(coarse and fine outputs), state
        """
        if self._state_is_tuple:
            (input_prev, state) = state_mix
            state_h, state_l = tf.split(state, 2, axis=1)
        else:
            input_prev = tf.slice(state_mix, [0, 0], [-1, 2])
            state = tf.slice(state_mix, [0, 2], [-1, -1])
            #state_h = tf.slice(state_mix, [0, 2], [-1, self._num_units])
            #state_l = tf.slice(state_mix, [0, (2+self._num_units)], [-1, self._num_units])

        if self._teacher_force:
            coarse_input_cur = tf.slice(mel_input, [0, 0], [-1, 1])
            fine_input_cur = tf.slice(mel_input, [0, 1], [-1, 1])
            mel_cur = tf.slice(mel_input, [0, 2], [-1, -1])
        else:
            mel_cur = mel_input

        input_prev_and_mel = tf.concat([input_prev, mel_cur], axis=1)          # 258
        X = tf.matmul(input_prev_and_mel, self._weight_external) + self._bias_external
        Xh, Xl = tf.split(X, 2, axis=1)

        state_h, state_l = tf.split(state, 2, axis=1)
        state = tf.reshape(state, [-1, self._num_units])
        H = tf.matmul(state, self._weight_internal) + self._bias_internal
        Hh, Hl = tf.split(H, 2, axis=0)
        # Hh = tf.matmul(state_h, self._weight_internal) + self._bias_internal   # 2 * 896; 896 * 2688; 2 * 2688
        # Hl = tf.matmul(state_l, self._weight_internal) + self._bias_internal
        Hhr, Hhu, Hhe_ = tf.split(Hh, 3, axis=1)
        Hlr, Hlu, Hle_ = tf.split(Hl, 3, axis=1)
#
        Xhr, Xhu, Xhe_ = tf.split(Xh, 3, axis=1)
        uh = tf.nn.sigmoid(Xhu + Hhu)
        rh = tf.nn.sigmoid(Xhr + Hhr)

        candidate_h = tf.tanh(rh * Hhe_ + Xhe_)
        state_h = state_h * uh + candidate_h * (1 - uh)

        relu_outputs_h = self._affine_relu_C(state_h)
        output_outputs_h = self._affine_C(relu_outputs_h)

        if not self._teacher_force:
            sample_h = tf.multinomial(output_outputs_h, 1, name='multinomial')
            coarse_input_cur = tf.cast(sample_h[0, 0], dtype=tf.int32)
            coarse_input_cur = tf.cast(coarse_input_cur, dtype=tf.float32)

        Xl_C = coarse_input_cur * self._weight_external_C_input + self._bias_internal_C_input
        Xl = Xl + Xl_C
        Xlr, Xlu, Xle_ = tf.split(Xl, 3, axis=1)

        ul = tf.nn.sigmoid(Xlu + Hlu)
        rl = tf.nn.sigmoid(Xlr + Hlr)
        candidate_l = tf.tanh(rl * Hle_ + Xle_)
        state_l = state_l * ul + candidate_l * (1 - ul)

        print("state l", state_l)
        print("state h", state_h)

        relu_outputs_l = self._affine_relu_F(state_l)
        output_outputs_l = self._affine_F(relu_outputs_l)

        if not self._teacher_force:
            sample_l = tf.multinomial(output_outputs_l, 1, name='multinomial')
            fine_input_cur = tf.cast(sample_l[0, 0], dtype=tf.int32)
            fine_input_cur = tf.cast(fine_input_cur, dtype=tf.float32)

        if not self._teacher_force:
            coarse_output_array = tf.reshape(coarse_input_cur, [1, 1])
            fine_output_array = tf.reshape(fine_input_cur, [1, 1])
            coarse_input_cur = tf.truediv(coarse_input_cur, 127.5) - 1.0
            coarse_input_cur = tf.reshape(coarse_input_cur, [1, 1])
            fine_input_cur = tf.truediv(fine_input_cur, 127.5) - 1.0
            fine_input_cur = tf.reshape(fine_input_cur, [1, 1])

        input_cur = tf.concat([coarse_input_cur, fine_input_cur], axis=1)

        if self._state_is_tuple:
            state = tf.concat([state_h, state_l], axis=1)
            state = tf.nn.rnn_cell.LSTMStateTuple(input_cur, state)
        else:
            state = tf.concat([state_h, state_l], axis=1)  # + state_l
            state = array_ops.concat([input_cur, state], axis=1)

        if self._is_synthesis:
            output_array = tf.concat([coarse_output_array, fine_output_array], axis=1)
            return output_array, state
        else:
            output_outputs = tf.concat([output_outputs_h, output_outputs_l], axis=1)
            return output_outputs, state
 def _sample_n(self, n, seed=None):
   new_shape = array_ops.concat([[n], self.batch_shape_tensor()], 0)
   uniform = random_ops.random_uniform(
       new_shape, seed=seed, dtype=self.probs.dtype)
   sample = math_ops.less(uniform, self.probs)
   return math_ops.cast(sample, self.dtype)
Example #42
0
def _SplitGrad(op, *grads):
    return None, array_ops.concat(op.inputs[0], list(grads))
Example #43
0
 def loop_fn(i):
     x1 = array_ops.gather(x, i)
     return array_ops.concat([x1, x1, y],
                             axis=0), array_ops.concat([x1, x1, y],
                                                       axis=-1)
    def call(self, inputs, state):
        dtype = inputs.dtype
        time_now_score = tf.expand_dims(inputs[:, -1], -1)
        time_last_score = tf.expand_dims(inputs[:, -2], -1)
        inputs = inputs[:, :-2]
        input_size = inputs.get_shape().with_rank(2)[1]
        # decay gates
        scope = variable_scope.get_variable_scope()
        with variable_scope.variable_scope(scope) as unit_scope:
            with variable_scope.variable_scope(unit_scope):
                #weights for time now
                self._time_kernel_w1 = variable_scope.get_variable(
                    "_time_kernel_w1", shape=[self._num_units], dtype=dtype)
                self._time_kernel_b1 = variable_scope.get_variable(
                    "_time_kernel_b1", shape=[self._num_units], dtype=dtype)
                self._time_history_w1 =variable_scope.get_variable(
                    "_time_history_w1", shape=[self._num_units], dtype=dtype)
                self._time_history_b1 =variable_scope.get_variable(
                    "_time_history_b1", shape=[self._num_units], dtype=dtype)
                self._time_w1 = variable_scope.get_variable(
                    "_time_w1", shape=[self._num_units], dtype=dtype)
                self._time_w12 = variable_scope.get_variable(
                    "_time_w12", shape=[self._num_units], dtype=dtype)
                self._time_b1 = variable_scope.get_variable(
                    "_time_b1", shape=[self._num_units], dtype=dtype)
                self._time_b12 = variable_scope.get_variable(
                    "_time_b12", shape=[self._num_units], dtype=dtype)
                #weight for time last
                self._time_kernel_w2 = variable_scope.get_variable(
                    "_time_kernel_w2", shape=[self._num_units], dtype=dtype)
                self._time_kernel_b2 = variable_scope.get_variable(
                    "_time_kernel_b2", shape=[self._num_units], dtype=dtype)
                self._time_history_w2 =variable_scope.get_variable(
                    "_time_history_w2", shape=[self._num_units], dtype=dtype)
                self._time_history_b2 =variable_scope.get_variable(
                    "_time_history_b2", shape=[self._num_units], dtype=dtype)
                self._time_w2 = variable_scope.get_variable(
                    "_time_w2", shape=[self._num_units], dtype=dtype)
                self._time_b2 = variable_scope.get_variable(
                    "_time_b2", shape=[self._num_units], dtype=dtype)

        #time_now_weight = tf.nn.relu( inputs * self._time_kernel_w1+self._time_kernel_b1)
        time_last_weight = tf.nn.relu(inputs * self._time_kernel_w1 + self._time_kernel_b1+state * self._time_history_w1)
        #time_now_state = tf.sigmoid( time_now_weight+ self._time_w1*tf.log(time_now_score+1)+self._time_b12)

        #time_last_weight =  tf.nn.relu(inputs* self._time_kernel_w2+self._time_kernel_b2 +state * self._time_history_w2)
        #time_last_state = tf.sigmoid( time_last_weight+ self._time_w2*tf.log(time_last_score+1)+self._time_b2)

        #version 2
        #time_last_score =  tf.nn.relu(self._time_w1 * tf.log(time_last_score + 1) + self._time_b1)
        time_last_score = tf.nn.relu(self._time_w1 * time_last_score+ self._time_b1)
        time_last_state = tf.sigmoid(self._time_kernel_w2*time_last_weight+self._time_w12*time_last_score+self._time_b12)
        #time_last_score = tf.nn.relu(self._time_w2 * tf.log(time_last_score + 1) + self._time_b2)




        gate_inputs = math_ops.matmul(
            array_ops.concat([inputs, state], 1), self._gate_kernel)
        gate_inputs = nn_ops.bias_add(gate_inputs, self._gate_bias)

        value = math_ops.sigmoid(gate_inputs)
        r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)

        r_state = r * state

        candidate = math_ops.matmul(
            array_ops.concat([inputs, r_state], 1), self._candidate_kernel)
        candidate = nn_ops.bias_add(candidate, self._candidate_bias)

        c = self._activation(candidate)
        #time_last_weight = tf.nn.relu(inputs * self._time_kernel_w2 + self._time_kernel_b2 + state * self._time_history_w2)
        #time_last_state = tf.sigmoid(time_last_weight + self._time_w2 * tf.log(time_last_score + 1) + self._time_b2)
        #new_h = u * state *  time_now_state + (1 - u) * c * time_last_state 0.0185 0.0136
        #new_h = u * state + (1 - u) * c * time_now_state 0.0237 0.013
        #new_h = u * state * time_now_state + (1 - u) * c * time_last_state #no position 0.0211 0.0137
        #new_h = u * state + (1 - u) * c * time_now_state #no position 0.0211 0.0143
        #new_h = u * state + (1 - u) * c 0.0185 0.0138
        #####
        #sli_rec no position 0.026 0.0144
        #####
        #new_h = u * state + (1 - u) * c * time_last_state #0.0237 0.0157
        new_h = u * state + (1 - u) * c * time_last_state
        return new_h, new_h
Example #45
0
 def _eval_metric_ops(self, labels, probabilities, weights, unreduced_loss,
                      regularization_loss):
     """Returns a dict of metrics for eval_metric_ops."""
     with ops.name_scope(None, 'metrics', [
             labels, probabilities, weights, unreduced_loss,
             regularization_loss
     ]):
         keys = metric_keys.MetricKeys
         metric_ops = {
             # Estimator already adds a metric for loss.
             head_lib._summary_key(self._name, keys.LOSS_MEAN):  # pylint:disable=protected-access
                 metrics_lib.mean(
                     values=unreduced_loss,
                     weights=weights,
                     name=keys.LOSS_MEAN),
             head_lib._summary_key(self._name, keys.AUC):  # pylint:disable=protected-access
                 metrics_lib.auc(labels=labels, predictions=probabilities,
                                 weights=weights, name=keys.AUC),
             head_lib._summary_key(self._name, keys.AUC_PR):  # pylint:disable=protected-access
                 metrics_lib.auc(labels=labels, predictions=probabilities,
                                 weights=weights, curve='PR',
                                 name=keys.AUC_PR),
         }
         if regularization_loss is not None:
             loss_regularization_key = head_lib._summary_key(  # pylint:disable=protected-access
                 self._name, keys.LOSS_REGULARIZATION)
             metric_ops[loss_regularization_key] = (metrics_lib.mean(
                 values=regularization_loss, name=keys.LOSS_REGULARIZATION))
         for threshold in self._thresholds:
             accuracy_key = keys.ACCURACY_AT_THRESHOLD % threshold
             metric_ops[head_lib._summary_key(self._name, accuracy_key)] = (  # pylint:disable=protected-access
                 head_lib._accuracy_at_threshold(  # pylint:disable=protected-access
                     labels=labels,
                     predictions=probabilities,
                     weights=weights,
                     threshold=threshold,
                     name=accuracy_key))
             # Precision for positive examples.
             precision_key = keys.PRECISION_AT_THRESHOLD % threshold
             metric_ops[head_lib._summary_key(
                 self._name, precision_key)] = (  # pylint:disable=protected-access
                     head_lib._precision_at_threshold(  # pylint:disable=protected-access
                         labels=labels,
                         predictions=probabilities,
                         weights=weights,
                         threshold=threshold,
                         name=precision_key))
             # Recall for positive examples.
             recall_key = keys.RECALL_AT_THRESHOLD % threshold
             metric_ops[head_lib._summary_key(self._name, recall_key)] = (  # pylint:disable=protected-access
                 head_lib._recall_at_threshold(  # pylint:disable=protected-access
                     labels=labels,
                     predictions=probabilities,
                     weights=weights,
                     threshold=threshold,
                     name=recall_key))
         for class_id in self._classes_for_class_based_metrics:
             batch_rank = array_ops.rank(probabilities) - 1
             begin = array_ops.concat([
                 array_ops.zeros([batch_rank], dtype=dtypes.int32),
                 [class_id]
             ],
                                      axis=0)
             size = array_ops.concat([
                 -1 * array_ops.ones([batch_rank], dtype=dtypes.int32), [1]
             ],
                                     axis=0)
             class_probabilities = array_ops.slice(probabilities,
                                                   begin=begin,
                                                   size=size)
             class_labels = array_ops.slice(labels, begin=begin, size=size)
             prob_key = keys.PROBABILITY_MEAN_AT_CLASS % class_id
             metric_ops[head_lib._summary_key(self._name, prob_key)] = (  # pylint:disable=protected-access
                 head_lib._predictions_mean(  # pylint:disable=protected-access
                     predictions=class_probabilities,
                     weights=weights,
                     name=prob_key))
             auc_key = keys.AUC_AT_CLASS % class_id
             metric_ops[head_lib._summary_key(self._name, auc_key)] = (  # pylint:disable=protected-access
                 head_lib._auc(  # pylint:disable=protected-access
                     labels=class_labels,
                     predictions=class_probabilities,
                     weights=weights,
                     name=auc_key))
             auc_pr_key = keys.AUC_PR_AT_CLASS % class_id
             metric_ops[head_lib._summary_key(self._name, auc_pr_key)] = (  # pylint:disable=protected-access
                 head_lib._auc(  # pylint:disable=protected-access
                     labels=class_labels,
                     predictions=class_probabilities,
                     weights=weights,
                     curve='PR',
                     name=auc_pr_key))
     return metric_ops
Example #46
0
def _solve_interpolation(train_points, train_values, order,
                         regularization_weight):
    """Solve for interpolation coefficients.

  Computes the coefficients of the polyharmonic interpolant for the 'training'
  data defined by (train_points, train_values) using the kernel phi.

  Args:
    train_points: `[b, n, d]` interpolation centers
    train_values: `[b, n, k]` function values
    order: order of the interpolation
    regularization_weight: weight to place on smoothness regularization term

  Returns:
    w: `[b, n, k]` weights on each interpolation center
    v: `[b, d, k]` weights on each input dimension
  Raises:
    ValueError: if d or k is not fully specified.
  """

    # These dimensions are set dynamically at runtime.
    b, n, _ = array_ops.unstack(array_ops.shape(train_points), num=3)

    d = train_points.shape[-1]
    if d.value is None:
        raise ValueError('The dimensionality of the input points (d) must be '
                         'statically-inferrable.')

    k = train_values.shape[-1]
    if k.value is None:
        raise ValueError('The dimensionality of the output values (k) must be '
                         'statically-inferrable.')

    # First, rename variables so that the notation (c, f, w, v, A, B, etc.)
    # follows https://en.wikipedia.org/wiki/Polyharmonic_spline.
    # To account for python style guidelines we use
    # matrix_a for A and matrix_b for B.

    c = train_points
    f = train_values

    # Next, construct the linear system.
    with ops.name_scope('construct_linear_system'):

        matrix_a = _phi(_pairwise_squared_distance_matrix(c),
                        order)  # [b, n, n]
        if regularization_weight > 0:
            batch_identity_matrix = array_ops.expand_dims(
                linalg_ops.eye(n, dtype=c.dtype), 0)
            matrix_a += regularization_weight * batch_identity_matrix

        # Append ones to the feature values for the bias term in the linear model.
        ones = array_ops.ones_like(c[..., :1], dtype=c.dtype)
        matrix_b = array_ops.concat([c, ones], 2)  # [b, n, d + 1]

        # [b, n + d + 1, n]
        left_block = array_ops.concat(
            [matrix_a, array_ops.transpose(matrix_b, [0, 2, 1])], 1)

        num_b_cols = matrix_b.get_shape()[2]  # d + 1
        lhs_zeros = array_ops.zeros([b, num_b_cols, num_b_cols],
                                    train_points.dtype)
        right_block = array_ops.concat([matrix_b, lhs_zeros],
                                       1)  # [b, n + d + 1, d + 1]
        lhs = array_ops.concat([left_block, right_block],
                               2)  # [b, n + d + 1, n + d + 1]

        rhs_zeros = array_ops.zeros([b, d + 1, k], train_points.dtype)
        rhs = array_ops.concat([f, rhs_zeros], 1)  # [b, n + d + 1, k]

    # Then, solve the linear system and unpack the results.
    with ops.name_scope('solve_linear_system'):
        w_v = linalg_ops.matrix_solve(lhs, rhs)
        w = w_v[:, :n, :]
        v = w_v[:, n:, :]

    return w, v
  def _shape_tensor(self):
    matrix_shape = array_ops.stack(
        (self._num_rows, self._num_rows), axis=0)

    batch_shape = array_ops.shape(self.multiplier)
    return array_ops.concat((batch_shape, matrix_shape), 0)
Example #48
0
 def _flip_front_dims_to_back():
     # Permutation corresponding to [N1,...,Nn] + [k, M1,...,Mm]
     perm = array_ops.concat(
         0, (math_ops.range(m, vec_rank), math_ops.range(0, m)))
     return array_ops.transpose(vec, perm=perm)
Example #49
0
    def _sample_n(self, n, seed=None):
        if self._use_static_graph:
            # This sampling approach is almost the same as the approach used by
            # `MixtureSameFamily`. The differences are due to having a list of
            # `Distribution` objects rather than a single object, and maintaining
            # random seed management that is consistent with the non-static code path.
            samples = []
            cat_samples = self.cat.sample(n, seed=seed)
            for c in range(self.num_components):
                seed = distribution_util.gen_new_seed(seed, "mixture")
                samples.append(self.components[c].sample(n, seed=seed))
            x = array_ops.stack(samples, -self._static_event_shape.ndims -
                                1)  # [n, B, k, E]
            npdt = x.dtype.as_numpy_dtype
            mask = array_ops.one_hot(
                indices=cat_samples,  # [n, B]
                depth=self._num_components,  # == k
                on_value=np.ones([], dtype=npdt),
                off_value=np.zeros([], dtype=npdt))  # [n, B, k]
            mask = distribution_utils.pad_mixture_dimensions(
                mask, self, self._cat,
                self._static_event_shape.ndims)  # [n, B, k, [1]*e]
            return math_ops.reduce_sum(
                x * mask,
                axis=-1 - self._static_event_shape.ndims)  # [n, B, E]

        with ops.control_dependencies(self._assertions):
            n = ops.convert_to_tensor(n, name="n")
            static_n = tensor_util.constant_value(n)
            n = int(static_n) if static_n is not None else n
            cat_samples = self.cat.sample(n, seed=seed)

            static_samples_shape = cat_samples.get_shape()
            if static_samples_shape.is_fully_defined():
                samples_shape = static_samples_shape.as_list()
                samples_size = static_samples_shape.num_elements()
            else:
                samples_shape = array_ops.shape(cat_samples)
                samples_size = array_ops.size(cat_samples)
            static_batch_shape = self.batch_shape
            if static_batch_shape.is_fully_defined():
                batch_shape = static_batch_shape.as_list()
                batch_size = static_batch_shape.num_elements()
            else:
                batch_shape = self.batch_shape_tensor()
                batch_size = math_ops.reduce_prod(batch_shape)
            static_event_shape = self.event_shape
            if static_event_shape.is_fully_defined():
                event_shape = np.array(static_event_shape.as_list(),
                                       dtype=np.int32)
            else:
                event_shape = self.event_shape_tensor()

            # Get indices into the raw cat sampling tensor. We will
            # need these to stitch sample values back out after sampling
            # within the component partitions.
            samples_raw_indices = array_ops.reshape(
                math_ops.range(0, samples_size), samples_shape)

            # Partition the raw indices so that we can use
            # dynamic_stitch later to reconstruct the samples from the
            # known partitions.
            partitioned_samples_indices = data_flow_ops.dynamic_partition(
                data=samples_raw_indices,
                partitions=cat_samples,
                num_partitions=self.num_components)

            # Copy the batch indices n times, as we will need to know
            # these to pull out the appropriate rows within the
            # component partitions.
            batch_raw_indices = array_ops.reshape(
                array_ops.tile(math_ops.range(0, batch_size), [n]),
                samples_shape)

            # Explanation of the dynamic partitioning below:
            #   batch indices are i.e., [0, 1, 0, 1, 0, 1]
            # Suppose partitions are:
            #     [1 1 0 0 1 1]
            # After partitioning, batch indices are cut as:
            #     [batch_indices[x] for x in 2, 3]
            #     [batch_indices[x] for x in 0, 1, 4, 5]
            # i.e.
            #     [1 1] and [0 0 0 0]
            # Now we sample n=2 from part 0 and n=4 from part 1.
            # For part 0 we want samples from batch entries 1, 1 (samples 0, 1),
            # and for part 1 we want samples from batch entries 0, 0, 0, 0
            #   (samples 0, 1, 2, 3).
            partitioned_batch_indices = data_flow_ops.dynamic_partition(
                data=batch_raw_indices,
                partitions=cat_samples,
                num_partitions=self.num_components)
            samples_class = [None for _ in range(self.num_components)]

            for c in range(self.num_components):
                n_class = array_ops.size(partitioned_samples_indices[c])
                seed = distribution_util.gen_new_seed(seed, "mixture")
                samples_class_c = self.components[c].sample(n_class, seed=seed)

                # Pull out the correct batch entries from each index.
                # To do this, we may have to flatten the batch shape.

                # For sample s, batch element b of component c, we get the
                # partitioned batch indices from
                # partitioned_batch_indices[c]; and shift each element by
                # the sample index. The final lookup can be thought of as
                # a matrix gather along locations (s, b) in
                # samples_class_c where the n_class rows correspond to
                # samples within this component and the batch_size columns
                # correspond to batch elements within the component.
                #
                # Thus the lookup index is
                #   lookup[c, i] = batch_size * s[i] + b[c, i]
                # for i = 0 ... n_class[c] - 1.
                lookup_partitioned_batch_indices = (
                    batch_size * math_ops.range(n_class) +
                    partitioned_batch_indices[c])
                samples_class_c = array_ops.reshape(
                    samples_class_c,
                    array_ops.concat([[n_class * batch_size], event_shape], 0))
                samples_class_c = array_ops.gather(
                    samples_class_c,
                    lookup_partitioned_batch_indices,
                    name="samples_class_c_gather")
                samples_class[c] = samples_class_c

            # Stitch back together the samples across the components.
            lhs_flat_ret = data_flow_ops.dynamic_stitch(
                indices=partitioned_samples_indices, data=samples_class)
            # Reshape back to proper sample, batch, and event shape.
            ret = array_ops.reshape(
                lhs_flat_ret,
                array_ops.concat(
                    [samples_shape, self.event_shape_tensor()], 0))
            ret.set_shape(
                tensor_shape.TensorShape(static_samples_shape).concatenate(
                    self.event_shape))
            return ret
    def _recurrence(self, inputs, hidden_state, cell_states, depth):
        """use recurrence to traverse the nested structure

    Args:
      inputs: A 2D `Tensor` of [batch_size x input_size] shape.
      hidden_state: A 2D `Tensor` of [batch_size x num_units] shape.
      cell_states: A `list` of 2D `Tensor` of [batch_size x num_units] shape.
      depth: `int`
        the current depth in the nested structure, begins at 0.

    Returns:
      new_h: A 2D `Tensor` of [batch_size x num_units] shape.
        the latest hidden state for current step.
      new_cs: A `list` of 2D `Tensor` of [batch_size x num_units] shape.
        The accumulated cell states for current step.
    """
        sigmoid = math_ops.sigmoid
        one = constant_op.constant(1, dtype=dtypes.int32)
        # Parameters of gates are concatenated into one multiply for efficiency.
        c = cell_states[depth]
        h = hidden_state

        gate_inputs = math_ops.matmul(array_ops.concat([inputs, h], 1),
                                      self._kernels[depth])
        if self._use_bias:
            gate_inputs = nn_ops.bias_add(gate_inputs, self._biases[depth])
        if self._use_peepholes:
            peep_gate_inputs = math_ops.matmul(c, self._peep_kernels[depth])
            i_peep, f_peep, o_peep = array_ops.split(value=peep_gate_inputs,
                                                     num_or_size_splits=3,
                                                     axis=one)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=gate_inputs,
                                     num_or_size_splits=4,
                                     axis=one)
        if self._use_peepholes:
            i += i_peep
            f += f_peep
            o += o_peep

        if self._use_peepholes:
            peep_gate_inputs = math_ops.matmul(c, self._peep_kernels[depth])
            i_peep, f_peep, o_peep = array_ops.split(value=peep_gate_inputs,
                                                     num_or_size_splits=3,
                                                     axis=one)
            i += i_peep
            f += f_peep
            o += o_peep

        # Note that using `add` and `multiply` instead of `+` and `*` gives a
        # performance improvement. So using those at the cost of readability.
        add = math_ops.add
        multiply = math_ops.multiply

        if self._use_bias:
            forget_bias_tensor = constant_op.constant(self._forget_bias,
                                                      dtype=f.dtype)
            f = add(f, forget_bias_tensor)

        inner_hidden = multiply(c, self._gate_activation(f))

        if depth == 0:
            inner_input = multiply(self._gate_activation(i),
                                   self._cell_activation(j))
        else:
            inner_input = multiply(self._gate_activation(i),
                                   self._activation(j))

        if depth == (self.depth - 1):
            new_c = add(inner_hidden, inner_input)
            new_cs = [new_c]
        else:
            new_c, new_cs = self._recurrence(inputs=inner_input,
                                             hidden_state=inner_hidden,
                                             cell_states=cell_states,
                                             depth=depth + 1)
        new_h = multiply(self._activation(new_c), self._gate_activation(o))
        new_cs = [new_h] + new_cs
        return new_h, new_cs
Example #51
0
def inception_v3_base(inputs,
                      final_endpoint='Mixed_7c',
                      min_depth=16,
                      depth_multiplier=1.0,
                      scope=None):
  """Inception model from http://arxiv.org/abs/1512.00567.
  Constructs an Inception v3 network from inputs to the given final endpoint.
  This method can construct the network up to the final inception block
  Mixed_7c.
  Note that the names of the layers in the paper do not correspond to the names
  of the endpoints registered by this function although they build the same
  network.
  Here is a mapping from the old_names to the new names:
  Old name          | New name
  =======================================
  conv0             | Conv2d_1a_3x3
  conv1             | Conv2d_2a_3x3
  conv2             | Conv2d_2b_3x3
  pool1             | MaxPool_3a_3x3
  conv3             | Conv2d_3b_1x1
  conv4             | Conv2d_4a_3x3
  pool2             | MaxPool_5a_3x3
  mixed_35x35x256a  | Mixed_5b
  mixed_35x35x288a  | Mixed_5c
  mixed_35x35x288b  | Mixed_5d
  mixed_17x17x768a  | Mixed_6a
  mixed_17x17x768b  | Mixed_6b
  mixed_17x17x768c  | Mixed_6c
  mixed_17x17x768d  | Mixed_6d
  mixed_17x17x768e  | Mixed_6e
  mixed_8x8x1280a   | Mixed_7a
  mixed_8x8x2048a   | Mixed_7b
  mixed_8x8x2048b   | Mixed_7c
  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
      'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
      'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c',
      'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'].
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    scope: Optional variable_scope.
  Returns:
    tensor_out: output tensor corresponding to the final_endpoint.
    end_points: a set of activations for external use, for example summaries or
                losses.
  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
                or depth_multiplier <= 0
  """
  # end_points will collect relevant activations for external use, for example
  # summaries or losses.
  end_points = {}

  if depth_multiplier <= 0:
    raise ValueError('depth_multiplier is not greater than zero.')
  depth = lambda d: max(int(d * depth_multiplier), min_depth)

  with variable_scope.variable_scope(scope, 'InceptionV3', [inputs]):
    with arg_scope(
        [layers.conv2d, layers_lib.max_pool2d, layers_lib.avg_pool2d],
        stride=1,
        padding='VALID'):
      # 299 x 299 x 3
      end_point = 'Conv2d_1a_3x3'
      net = layers.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points
      # 149 x 149 x 32
      end_point = 'Conv2d_2a_3x3'
      net = layers.conv2d(net, depth(32), [3, 3], scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points
      # 147 x 147 x 32
      end_point = 'Conv2d_2b_3x3'
      net = layers.conv2d(
          net, depth(64), [3, 3], padding='SAME', scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points
      # 147 x 147 x 64
      end_point = 'MaxPool_3a_3x3'
      net = layers_lib.max_pool2d(net, [3, 3], stride=2, scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points
      # 73 x 73 x 64
      end_point = 'Conv2d_3b_1x1'
      net = layers.conv2d(net, depth(80), [1, 1], scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points
      # 73 x 73 x 80.
      end_point = 'Conv2d_4a_3x3'
      net = layers.conv2d(net, depth(192), [3, 3], scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points
      # 71 x 71 x 192.
      end_point = 'MaxPool_5a_3x3'
      net = layers_lib.max_pool2d(net, [3, 3], stride=2, scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points
      # 35 x 35 x 192.

      # Inception blocks
    with arg_scope(
        [layers.conv2d, layers_lib.max_pool2d, layers_lib.avg_pool2d],
        stride=1,
        padding='SAME'):
      # mixed: 35 x 35 x 256.
      end_point = 'Mixed_5b'
      with variable_scope.variable_scope(end_point):
        with variable_scope.variable_scope('Branch_0'):
          branch_0 = layers.conv2d(
              net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with variable_scope.variable_scope('Branch_1'):
          branch_1 = layers.conv2d(
              net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = layers.conv2d(
              branch_1, depth(64), [5, 5], scope='Conv2d_0b_5x5')
        with variable_scope.variable_scope('Branch_2'):
          branch_2 = layers.conv2d(
              net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = layers.conv2d(
              branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3')
          branch_2 = layers.conv2d(
              branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3')
        with variable_scope.variable_scope('Branch_3'):
          branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = layers.conv2d(
              branch_3, depth(32), [1, 1], scope='Conv2d_0b_1x1')
        net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points

      # mixed_1: 35 x 35 x 288.
      end_point = 'Mixed_5c'
      with variable_scope.variable_scope(end_point):
        with variable_scope.variable_scope('Branch_0'):
          branch_0 = layers.conv2d(
              net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with variable_scope.variable_scope('Branch_1'):
          branch_1 = layers.conv2d(
              net, depth(48), [1, 1], scope='Conv2d_0b_1x1')
          branch_1 = layers.conv2d(
              branch_1, depth(64), [5, 5], scope='Conv_1_0c_5x5')
        with variable_scope.variable_scope('Branch_2'):
          branch_2 = layers.conv2d(
              net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = layers.conv2d(
              branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3')
          branch_2 = layers.conv2d(
              branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3')
        with variable_scope.variable_scope('Branch_3'):
          branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = layers.conv2d(
              branch_3, depth(64), [1, 1], scope='Conv2d_0b_1x1')
        net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points

      # mixed_2: 35 x 35 x 288.
      end_point = 'Mixed_5d'
      with variable_scope.variable_scope(end_point):
        with variable_scope.variable_scope('Branch_0'):
          branch_0 = layers.conv2d(
              net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with variable_scope.variable_scope('Branch_1'):
          branch_1 = layers.conv2d(
              net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = layers.conv2d(
              branch_1, depth(64), [5, 5], scope='Conv2d_0b_5x5')
        with variable_scope.variable_scope('Branch_2'):
          branch_2 = layers.conv2d(
              net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = layers.conv2d(
              branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3')
          branch_2 = layers.conv2d(
              branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3')
        with variable_scope.variable_scope('Branch_3'):
          branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = layers.conv2d(
              branch_3, depth(64), [1, 1], scope='Conv2d_0b_1x1')
        net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points

      # mixed_3: 17 x 17 x 768.
      end_point = 'Mixed_6a'
      with variable_scope.variable_scope(end_point):
        with variable_scope.variable_scope('Branch_0'):
          branch_0 = layers.conv2d(
              net,
              depth(384), [3, 3],
              stride=2,
              padding='VALID',
              scope='Conv2d_1a_1x1')
        with variable_scope.variable_scope('Branch_1'):
          branch_1 = layers.conv2d(
              net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = layers.conv2d(
              branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
          branch_1 = layers.conv2d(
              branch_1,
              depth(96), [3, 3],
              stride=2,
              padding='VALID',
              scope='Conv2d_1a_1x1')
        with variable_scope.variable_scope('Branch_2'):
          branch_2 = layers_lib.max_pool2d(
              net, [3, 3], stride=2, padding='VALID', scope='MaxPool_1a_3x3')
        net = array_ops.concat([branch_0, branch_1, branch_2], 3)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points

      # mixed4: 17 x 17 x 768.
      end_point = 'Mixed_6b'
      with variable_scope.variable_scope(end_point):
        with variable_scope.variable_scope('Branch_0'):
          branch_0 = layers.conv2d(
              net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
        with variable_scope.variable_scope('Branch_1'):
          branch_1 = layers.conv2d(
              net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = layers.conv2d(
              branch_1, depth(128), [1, 7], scope='Conv2d_0b_1x7')
          branch_1 = layers.conv2d(
              branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1')
        with variable_scope.variable_scope('Branch_2'):
          branch_2 = layers.conv2d(
              net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = layers.conv2d(
              branch_2, depth(128), [7, 1], scope='Conv2d_0b_7x1')
          branch_2 = layers.conv2d(
              branch_2, depth(128), [1, 7], scope='Conv2d_0c_1x7')
          branch_2 = layers.conv2d(
              branch_2, depth(128), [7, 1], scope='Conv2d_0d_7x1')
          branch_2 = layers.conv2d(
              branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7')
        with variable_scope.variable_scope('Branch_3'):
          branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = layers.conv2d(
              branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
        net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points

      # mixed_5: 17 x 17 x 768.
      end_point = 'Mixed_6c'
      with variable_scope.variable_scope(end_point):
        with variable_scope.variable_scope('Branch_0'):
          branch_0 = layers.conv2d(
              net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
        with variable_scope.variable_scope('Branch_1'):
          branch_1 = layers.conv2d(
              net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = layers.conv2d(
              branch_1, depth(160), [1, 7], scope='Conv2d_0b_1x7')
          branch_1 = layers.conv2d(
              branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1')
        with variable_scope.variable_scope('Branch_2'):
          branch_2 = layers.conv2d(
              net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = layers.conv2d(
              branch_2, depth(160), [7, 1], scope='Conv2d_0b_7x1')
          branch_2 = layers.conv2d(
              branch_2, depth(160), [1, 7], scope='Conv2d_0c_1x7')
          branch_2 = layers.conv2d(
              branch_2, depth(160), [7, 1], scope='Conv2d_0d_7x1')
          branch_2 = layers.conv2d(
              branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7')
        with variable_scope.variable_scope('Branch_3'):
          branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = layers.conv2d(
              branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
        net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points
      # mixed_6: 17 x 17 x 768.
      end_point = 'Mixed_6d'
      with variable_scope.variable_scope(end_point):
        with variable_scope.variable_scope('Branch_0'):
          branch_0 = layers.conv2d(
              net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
        with variable_scope.variable_scope('Branch_1'):
          branch_1 = layers.conv2d(
              net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = layers.conv2d(
              branch_1, depth(160), [1, 7], scope='Conv2d_0b_1x7')
          branch_1 = layers.conv2d(
              branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1')
        with variable_scope.variable_scope('Branch_2'):
          branch_2 = layers.conv2d(
              net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = layers.conv2d(
              branch_2, depth(160), [7, 1], scope='Conv2d_0b_7x1')
          branch_2 = layers.conv2d(
              branch_2, depth(160), [1, 7], scope='Conv2d_0c_1x7')
          branch_2 = layers.conv2d(
              branch_2, depth(160), [7, 1], scope='Conv2d_0d_7x1')
          branch_2 = layers.conv2d(
              branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7')
        with variable_scope.variable_scope('Branch_3'):
          branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = layers.conv2d(
              branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
        net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points

      # mixed_7: 17 x 17 x 768.
      end_point = 'Mixed_6e'
      with variable_scope.variable_scope(end_point):
        with variable_scope.variable_scope('Branch_0'):
          branch_0 = layers.conv2d(
              net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
        with variable_scope.variable_scope('Branch_1'):
          branch_1 = layers.conv2d(
              net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = layers.conv2d(
              branch_1, depth(192), [1, 7], scope='Conv2d_0b_1x7')
          branch_1 = layers.conv2d(
              branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1')
        with variable_scope.variable_scope('Branch_2'):
          branch_2 = layers.conv2d(
              net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = layers.conv2d(
              branch_2, depth(192), [7, 1], scope='Conv2d_0b_7x1')
          branch_2 = layers.conv2d(
              branch_2, depth(192), [1, 7], scope='Conv2d_0c_1x7')
          branch_2 = layers.conv2d(
              branch_2, depth(192), [7, 1], scope='Conv2d_0d_7x1')
          branch_2 = layers.conv2d(
              branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7')
        with variable_scope.variable_scope('Branch_3'):
          branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = layers.conv2d(
              branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
        net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points

      # mixed_8: 8 x 8 x 1280.
      end_point = 'Mixed_7a'
      with variable_scope.variable_scope(end_point):
        with variable_scope.variable_scope('Branch_0'):
          branch_0 = layers.conv2d(
              net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
          branch_0 = layers.conv2d(
              branch_0,
              depth(320), [3, 3],
              stride=2,
              padding='VALID',
              scope='Conv2d_1a_3x3')
        with variable_scope.variable_scope('Branch_1'):
          branch_1 = layers.conv2d(
              net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = layers.conv2d(
              branch_1, depth(192), [1, 7], scope='Conv2d_0b_1x7')
          branch_1 = layers.conv2d(
              branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1')
          branch_1 = layers.conv2d(
              branch_1,
              depth(192), [3, 3],
              stride=2,
              padding='VALID',
              scope='Conv2d_1a_3x3')
        with variable_scope.variable_scope('Branch_2'):
          branch_2 = layers_lib.max_pool2d(
              net, [3, 3], stride=2, padding='VALID', scope='MaxPool_1a_3x3')
        net = array_ops.concat([branch_0, branch_1, branch_2], 3)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points
      # mixed_9: 8 x 8 x 2048.
      end_point = 'Mixed_7b'
      with variable_scope.variable_scope(end_point):
        with variable_scope.variable_scope('Branch_0'):
          branch_0 = layers.conv2d(
              net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
        with variable_scope.variable_scope('Branch_1'):
          branch_1 = layers.conv2d(
              net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = array_ops.concat(
              [
                  layers.conv2d(
                      branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
                  layers.conv2d(
                      branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1')
              ],
              3)
        with variable_scope.variable_scope('Branch_2'):
          branch_2 = layers.conv2d(
              net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = layers.conv2d(
              branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
          branch_2 = array_ops.concat(
              [
                  layers.conv2d(
                      branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
                  layers.conv2d(
                      branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')
              ],
              3)
        with variable_scope.variable_scope('Branch_3'):
          branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = layers.conv2d(
              branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
        net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points

      # mixed_10: 8 x 8 x 2048.
      end_point = 'Mixed_7c'
      with variable_scope.variable_scope(end_point):
        with variable_scope.variable_scope('Branch_0'):
          branch_0 = layers.conv2d(
              net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
        with variable_scope.variable_scope('Branch_1'):
          branch_1 = layers.conv2d(
              net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = array_ops.concat(
              [
                  layers.conv2d(
                      branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
                  layers.conv2d(
                      branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1')
              ],
              3)
        with variable_scope.variable_scope('Branch_2'):
          branch_2 = layers.conv2d(
              net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = layers.conv2d(
              branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
          branch_2 = array_ops.concat(
              [
                  layers.conv2d(
                      branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
                  layers.conv2d(
                      branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')
              ],
              3)
        with variable_scope.variable_scope('Branch_3'):
          branch_3 = layers_lib.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = layers.conv2d(
              branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
        net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint:
        return net, end_points
    raise ValueError('Unknown final endpoint %s' % final_endpoint)
Example #52
0
    def pack(self, grouped_grads_and_vars):
        """Pack tensors."""
        self.grouped_grads_and_vars = grouped_grads_and_vars
        self.all_tower_shapes = []
        self.all_tower_sizes = []

        device_grad_packs = []
        for tower_grads_and_vars in grouped_grads_and_vars:
            with ops.colocate_with(tower_grads_and_vars[0][0]):
                # Flatten all the grads.
                flat_grads = [
                    array_ops.reshape(g, [-1]) for g, _ in tower_grads_and_vars
                ]
                # Remember the original shape of all the grads.
                tower_shapes = [
                    array_ops.shape(g) for g, _ in tower_grads_and_vars
                ]
                # Remember the original sizes of all the grads.
                tower_sizes = [
                    array_ops.size(g) for g, _ in tower_grads_and_vars
                ]
                # Concat all the flat grads into a big flat tensor.
                concat_grads = array_ops.concat(flat_grads, 0)

                # Split the big tensor into num_splits packs. In cases where the
                # total size is not divisible num_splits, the last pack gets
                # more elements.
                # TODO(zhengxq): it is also possible to optimize away all the concat
                # as well.
                num_splits = self.num_packs

                # The array_ops.size function will sometimes remove static shapes. So if
                # all gradient shapes are defined, we use another method to get the
                # total size.
                # TODO(yuefengz): move this logic to array_ops.size.
                if all([
                        g.shape.is_fully_defined()
                        for g, _ in tower_grads_and_vars
                ]):
                    total_grad_size = sum([
                        g.shape.num_elements() for g, _ in tower_grads_and_vars
                    ])
                else:
                    total_grad_size = array_ops.size(concat_grads)

                split_size = total_grad_size // num_splits
                split_size_last = total_grad_size - split_size * (num_splits -
                                                                  1)
                split_sizes = [split_size] * (num_splits - 1) + [
                    split_size_last
                ]
                grad_packs = array_ops.split(concat_grads, split_sizes)

                # Ready to aggregate the repacked gradients, with fake variables.
                # TODO(zhengxq): It is hacky to have to use fake variables.
                # We should remove the need for variables in
                # aggregate_gradients_using*.
                device_grad_packs.append(zip(grad_packs, [None] * num_splits))
                self.all_tower_shapes.append(tower_shapes)
                self.all_tower_sizes.append(tower_sizes)

        return device_grad_packs
Example #53
0
    def call(self, inputs, state):
        """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, `[batch, num_units]`.
      state: if `state_is_tuple` is False, this must be a state Tensor, `2-D,
        [batch, state_size]`.  If `state_is_tuple` is True, this must be a tuple
        of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`.

    Returns:
      A tuple containing:

      - A `2-D, [batch, output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.

    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
        inputs = self._tflite_wrapper.add_input(inputs,
                                                tag="input",
                                                name="input",
                                                aggregate="stack",
                                                index_override=0)

        # Make sure inputs and bias_initializer has the same type.
        assert inputs.dtype == self.input_to_input_w.dtype

        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        # Note: For TfLite, cell_state is at index 19 while activation state at
        # index 18.
        c_prev = self._tflite_wrapper.add_input(c_prev,
                                                tag="c_prev",
                                                name="c_prev",
                                                aggregate="first",
                                                index_override=19)
        m_prev = self._tflite_wrapper.add_input(m_prev,
                                                tag="m_prev",
                                                name="m_prev",
                                                aggregate="first",
                                                index_override=18)

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")

        inputs_and_m_prev = array_ops.concat([inputs, m_prev], axis=1)

        # i stands for input gate.
        # f stands for forget gate activation.
        # o outputs.
        # j output of LSTM unit.
        # c is the final state.
        # m is the output.
        i = nn_ops.bias_add(
            math_ops.matmul(inputs_and_m_prev,
                            array_ops.concat(
                                [self.input_to_input_w, self.cell_to_input_w],
                                axis=1),
                            transpose_b=True), self.input_bias)
        f = nn_ops.bias_add(
            math_ops.matmul(
                inputs_and_m_prev,
                array_ops.concat(
                    [self.input_to_forget_w, self.cell_to_forget_w], axis=1),
                transpose_b=True), self.forget_bias)
        o = nn_ops.bias_add(
            math_ops.matmul(
                inputs_and_m_prev,
                array_ops.concat(
                    [self.input_to_output_w, self.cell_to_output_w], axis=1),
                transpose_b=True), self.output_bias)
        j = nn_ops.bias_add(
            math_ops.matmul(inputs_and_m_prev,
                            array_ops.concat(
                                [self.input_to_cell_w, self.cell_to_cell_w],
                                axis=1),
                            transpose_b=True), self.cell_bias)

        # Diagonal connections
        if self._use_peepholes:
            c = (sigmoid(f + self._w_f_diag * c_prev) * c_prev +
                 sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f) * c_prev + sigmoid(i) * self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type
        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            transposed_proj_kernel = array_ops.transpose(self._proj_kernel)
            m = math_ops.matmul(m, transposed_proj_kernel)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
                # pylint: enable=invalid-unary-operand-type

        c = self._tflite_wrapper.add_output(c,
                                            tag="c",
                                            name="c",
                                            aggregate="last",
                                            index_override=1)
        m = self._tflite_wrapper.add_output(m,
                                            tag="m",
                                            name="m",
                                            index_override=2,
                                            aggregate="stack")

        new_state = (rnn_cell_impl.LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))
        return m, new_state
Example #54
0
    def __init__(self,
                 inputs,
                 sequence_length,
                 sampling_probability,
                 time_major=False,
                 seed=None,
                 next_inputs_fn=None,
                 auxiliary_inputs=None,
                 name=None):
        """Initializer.

    Args:
      inputs: A (structure) of input tensors.
      sequence_length: An int32 vector tensor.
      sampling_probability: A 0D `float32` tensor: the probability of sampling
        from the outputs instead of reading directly from the inputs.
      time_major: Python bool.  Whether the tensors in `inputs` are time major.
        If `False` (default), they are assumed to be batch major.
      seed: The sampling seed.
      next_inputs_fn: (Optional) callable to apply to the RNN outputs to create
        the next input when sampling. If `None` (default), the RNN outputs will
        be used as the next inputs.
      auxiliary_inputs: An optional (structure of) auxiliary input tensors with
        a shape that matches `inputs` in all but (potentially) the final
        dimension. These tensors will be concatenated to the sampled output or
        the `inputs` when not sampling for use as the next input.
      name: Name scope for any created operations.

    Raises:
      ValueError: if `sampling_probability` is not a scalar or vector.
    """
        with ops.name_scope(name, "ScheduledOutputTrainingHelper",
                            [inputs, auxiliary_inputs, sampling_probability]):
            self._sampling_probability = ops.convert_to_tensor(
                sampling_probability, name="sampling_probability")
            if self._sampling_probability.get_shape().ndims not in (0, 1):
                raise ValueError(
                    "sampling_probability must be either a scalar or a vector. "
                    "saw shape: %s" % (self._sampling_probability.get_shape()))

            if auxiliary_inputs is None:
                maybe_concatenated_inputs = inputs
            else:
                inputs = ops.convert_to_tensor(inputs, name="inputs")
                auxiliary_inputs = ops.convert_to_tensor(
                    auxiliary_inputs, name="auxiliary_inputs")
                maybe_concatenated_inputs = nest.map_structure(
                    lambda x, y: array_ops.concat((x, y), -1), inputs,
                    auxiliary_inputs)
                if not time_major:
                    auxiliary_inputs = nest.map_structure(
                        _transpose_batch_time, auxiliary_inputs)

            self._auxiliary_input_tas = (nest.map_structure(
                _unstack_ta, auxiliary_inputs) if auxiliary_inputs is not None
                                         else None)

            self._seed = seed

            self._next_inputs_fn = next_inputs_fn

            super(ScheduledOutputTrainingHelper,
                  self).__init__(inputs=maybe_concatenated_inputs,
                                 sequence_length=sequence_length,
                                 time_major=time_major,
                                 name=name)
def kernel_classifier_distance_and_std(real_images,
                                       generated_images,
                                       classifier_fn,
                                       num_classifier_batches=1,
                                       max_block_size=1024,
                                       dtype=None):
    """Kernel "classifier" distance for evaluating a generative model.

  This is based on the Kernel Inception distance, but for an arbitrary
  embedding. Also returns an estimate of the standard error of the distance
  estimator.

  This technique is described in detail in https://arxiv.org/abs/1801.01401.
  Given two distributions P and Q of activations, this function calculates

      E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
        - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]

  where k is the polynomial kernel

      k(x, y) = ( x^T y / dimension + 1 )^3.

  This captures how different the distributions of real and generated images'
  visual features are. Like the Frechet distance (and unlike the Inception
  score), this is a true distance and incorporates information about the
  target images. Unlike the Frechet score, this function computes an
  *unbiased* and asymptotically normal estimator, which makes comparing
  estimates across models much more intuitive.

  The estimator used takes time quadratic in max_block_size. Larger values of
  max_block_size will decrease the variance of the estimator but increase the
  computational cost. This differs slightly from the estimator used by the
  original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.

  NOTE: the blocking code assumes that real_activations and
  generated_activations are both in random order. If either is sorted in a
  meaningful order, the estimator will behave poorly.

  NOTE: This function consumes images, computes their activations, and then
  computes the classifier score. If you would like to precompute many
  activations for real and generated images for large batches, or to compute
  multiple scores based on the same images, please use
  kernel_clasifier_distance_from_activations(), which this method also uses.

  Args:
    real_images: Real images to use to compute Kernel Inception distance.
    generated_images: Generated images to use to compute Kernel Inception
      distance.
    classifier_fn: A function that takes images and produces activations based
      on a classifier.
    num_classifier_batches: Number of batches to split images in to in order to
      efficiently run them through the classifier network.
    max_estimator_block_size: integer, default 1024. The distance estimator
      splits samples into blocks for computational efficiency. Larger values are
      more computationally expensive but decrease the variance of the distance
      estimate. Having a smaller block size also gives a better estimate of the
      standard error.
    dtype: if not None, coerce activations to this dtype before computations.

  Returns:
   The Kernel Inception Distance. A floating-point scalar of the same type
     as the output of the activations.
   An estimate of the standard error of the distance estimator (a scalar of
     the same type).
  """
    real_images_list = array_ops.split(
        real_images, num_or_size_splits=num_classifier_batches)
    generated_images_list = array_ops.split(
        generated_images, num_or_size_splits=num_classifier_batches)

    real_imgs = array_ops.stack(real_images_list)
    generated_imgs = array_ops.stack(generated_images_list)

    # Compute the activations using the memory-efficient `map_fn`.
    def compute_activations(elems):
        return functional_ops.map_fn(fn=classifier_fn,
                                     elems=elems,
                                     parallel_iterations=1,
                                     back_prop=False,
                                     swap_memory=True,
                                     name='RunClassifier')

    real_a = compute_activations(real_imgs)
    gen_a = compute_activations(generated_imgs)

    # Ensure the activations have the right shapes.
    real_a = array_ops.concat(array_ops.unstack(real_a), 0)
    gen_a = array_ops.concat(array_ops.unstack(gen_a), 0)

    return kernel_classifier_distance_and_std_from_activations(
        real_a, gen_a, max_block_size=max_block_size)
Example #56
0
 def cell_input_fn(self, inputs, attention):
     return self.attn_projection(array_ops.concat([inputs, attention], -1))
Example #57
0
 def targets(self):
     if all(target is None for target in self._targets_components):
         return None
     return array_ops.concat(self._targets_components, axis=0)
def kernel_classifier_distance_and_std_from_activations(
        real_activations,
        generated_activations,
        max_block_size=1024,
        dtype=None):
    """Kernel "classifier" distance for evaluating a generative model.

  This methods computes the kernel classifier distance from activations of
  real images and generated images. This can be used independently of the
  kernel_classifier_distance() method, especially in the case of using large
  batches during evaluation where we would like to precompute all of the
  activations before computing the classifier distance, or if we want to
  compute multiple metrics based on the same images. It also returns a rough
  estimate of the standard error of the estimator.

  This technique is described in detail in https://arxiv.org/abs/1801.01401.
  Given two distributions P and Q of activations, this function calculates

      E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
        - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]

  where k is the polynomial kernel

      k(x, y) = ( x^T y / dimension + 1 )^3.

  This captures how different the distributions of real and generated images'
  visual features are. Like the Frechet distance (and unlike the Inception
  score), this is a true distance and incorporates information about the
  target images. Unlike the Frechet score, this function computes an
  *unbiased* and asymptotically normal estimator, which makes comparing
  estimates across models much more intuitive.

  The estimator used takes time quadratic in max_block_size. Larger values of
  max_block_size will decrease the variance of the estimator but increase the
  computational cost. This differs slightly from the estimator used by the
  original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
  The estimate of the standard error will also be more reliable when there are
  more blocks, i.e. when max_block_size is smaller.

  NOTE: the blocking code assumes that real_activations and
  generated_activations are both in random order. If either is sorted in a
  meaningful order, the estimator will behave poorly.

  Args:
    real_activations: 2D Tensor containing activations of real data. Shape is
      [batch_size, activation_size].
    generated_activations: 2D Tensor containing activations of generated data.
      Shape is [batch_size, activation_size].
    max_block_size: integer, default 1024. The distance estimator splits samples
      into blocks for computational efficiency. Larger values are more
      computationally expensive but decrease the variance of the distance
      estimate. Having a smaller block size also gives a better estimate of the
      standard error.
    dtype: if not None, coerce activations to this dtype before computations.

  Returns:
   The Kernel Inception Distance. A floating-point scalar of the same type
     as the output of the activations.
   An estimate of the standard error of the distance estimator (a scalar of
     the same type).
  """

    real_activations.shape.assert_has_rank(2)
    generated_activations.shape.assert_has_rank(2)
    real_activations.shape[1].assert_is_compatible_with(
        generated_activations.shape[1])

    if dtype is None:
        dtype = real_activations.dtype
        assert generated_activations.dtype == dtype
    else:
        real_activations = math_ops.cast(real_activations, dtype)
        generated_activations = math_ops.cast(generated_activations, dtype)

    # Figure out how to split the activations into blocks of approximately
    # equal size, with none larger than max_block_size.
    n_r = array_ops.shape(real_activations)[0]
    n_g = array_ops.shape(generated_activations)[0]

    n_bigger = math_ops.maximum(n_r, n_g)
    n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size))

    v_r = n_r // n_blocks
    v_g = n_g // n_blocks

    n_plusone_r = n_r - v_r * n_blocks
    n_plusone_g = n_g - v_g * n_blocks

    sizes_r = array_ops.concat([
        array_ops.fill([n_blocks - n_plusone_r], v_r),
        array_ops.fill([n_plusone_r], v_r + 1),
    ], 0)
    sizes_g = array_ops.concat([
        array_ops.fill([n_blocks - n_plusone_g], v_g),
        array_ops.fill([n_plusone_g], v_g + 1),
    ], 0)

    zero = array_ops.zeros([1], dtype=dtypes.int32)
    inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0)
    inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0)

    dim = math_ops.cast(real_activations.shape[1], dtype)

    def compute_kid_block(i):
        'Compute the ith block of the KID estimate.'
        r_s = inds_r[i]
        r_e = inds_r[i + 1]
        r = real_activations[r_s:r_e]
        m = math_ops.cast(r_e - r_s, dtype)

        g_s = inds_g[i]
        g_e = inds_g[i + 1]
        g = generated_activations[g_s:g_e]
        n = math_ops.cast(g_e - g_s, dtype)

        k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3
        k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3
        k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3
        return (-2 * math_ops.reduce_mean(k_rg) +
                (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) /
                (m * (m - 1)) +
                (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n *
                                                                      (n - 1)))

    ests = functional_ops.map_fn(compute_kid_block,
                                 math_ops.range(n_blocks),
                                 dtype=dtype,
                                 back_prop=False)

    mn = math_ops.reduce_mean(ests)

    # nn_impl.moments doesn't use the Bessel correction, which we want here
    n_blocks_ = math_ops.cast(n_blocks, dtype)
    var = control_flow_ops.cond(
        math_ops.less_equal(n_blocks, 1),
        lambda: array_ops.constant(float('nan'), dtype=dtype),
        lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) /
        (n_blocks_ - 1))

    return mn, math_ops.sqrt(var / n_blocks_)
Example #59
0
 def _create(s, d):
     return array_ops.zeros(array_ops.concat(([batch_size], _t(s)), axis=0),
                            dtype=d)
def frechet_classifier_distance(real_images,
                                generated_images,
                                classifier_fn,
                                num_batches=1):
    """Classifier distance for evaluating a generative model.

  This is based on the Frechet Inception distance, but for an arbitrary
  classifier.

  This technique is described in detail in https://arxiv.org/abs/1706.08500.
  Given two Gaussian distribution with means m and m_w and covariance matrices
  C and C_w, this function calculates

              |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2))

  which captures how different the distributions of real images and generated
  images (or more accurately, their visual features) are. Note that unlike the
  Inception score, this is a true distance and utilizes information about real
  world images.

  Note that when computed using sample means and sample covariance matrices,
  Frechet distance is biased. It is more biased for small sample sizes. (e.g.
  even if the two distributions are the same, for a small sample size, the
  expected Frechet distance is large). It is important to use the same
  sample size to compute Frechet classifier distance when comparing two
  generative models.

  NOTE: This function consumes images, computes their activations, and then
  computes the classifier score. If you would like to precompute many
  activations for real and generated images for large batches, please use
  frechet_clasifier_distance_from_activations(), which this method also uses.

  Args:
    real_images: Real images to use to compute Frechet Inception distance.
    generated_images: Generated images to use to compute Frechet Inception
      distance.
    classifier_fn: A function that takes images and produces activations
      based on a classifier.
    num_batches: Number of batches to split images in to in order to
      efficiently run them through the classifier network.

  Returns:
    The Frechet Inception distance. A floating-point scalar of the same type
    as the output of `classifier_fn`.
  """
    real_images_list = array_ops.split(real_images,
                                       num_or_size_splits=num_batches)
    generated_images_list = array_ops.split(generated_images,
                                            num_or_size_splits=num_batches)

    real_imgs = array_ops.stack(real_images_list)
    generated_imgs = array_ops.stack(generated_images_list)

    # Compute the activations using the memory-efficient `map_fn`.
    def compute_activations(elems):
        return functional_ops.map_fn(fn=classifier_fn,
                                     elems=elems,
                                     parallel_iterations=1,
                                     back_prop=False,
                                     swap_memory=True,
                                     name='RunClassifier')

    real_a = compute_activations(real_imgs)
    gen_a = compute_activations(generated_imgs)

    # Ensure the activations have the right shapes.
    real_a = array_ops.concat(array_ops.unstack(real_a), 0)
    gen_a = array_ops.concat(array_ops.unstack(gen_a), 0)

    return frechet_classifier_distance_from_activations(real_a, gen_a)