예제 #1
0
 def _entropy(self):
     if (not self.distribution.is_continuous
             or not self.bijector.is_constant_jacobian):
         raise NotImplementedError("entropy is not implemented")
     # Suppose Y = g(X) where g is a diffeomorphism and X is a continuous rv. It
     # can be shown that:
     #   H[Y] = H[X] + E_X[(log o abs o det o J o g)(X)].
     # If is_constant_jacobian then:
     #   E_X[(log o abs o det o J o g)(X)] = (log o abs o det o J o g)(c)
     # where c can by anything.
     entropy = self.distribution.entropy()
     if self._is_maybe_event_override:
         # H[X] = sum_i H[X_i] if X_i are mutually independent.
         # This means that a reduce_sum is a simple rescaling.
         entropy *= math_ops.cast(math_ops.reduce_prod(
             self._override_event_shape),
                                  dtype=entropy.dtype.base_dtype)
     if self._is_maybe_batch_override:
         new_shape = array_ops.concat_v2([
             _ones_like(self._override_batch_shape),
             self.distribution.batch_shape()
         ], 0)
         entropy = array_ops.reshape(entropy, new_shape)
         multiples = array_ops.concat_v2([
             self._override_batch_shape,
             _ones_like(self.distribution.batch_shape())
         ], 0)
         entropy = array_ops.tile(entropy, multiples)
     dummy = 0.
     return entropy - self.bijector.inverse_log_det_jacobian(dummy)
예제 #2
0
def _BiasAddGradGrad(op, received_grad):
  """Gradient for the BiasAddGrad op.

  Args:
    op: BiasAddGrad op for which we are calculating gradients.
    received_grad: The gradients passed to the BiasAddGrad op.

  Returns:
    A single gradient Tensor for the input to BiasAddGrad (which
    is the gradient of the bias term in BiasAdd)
  """

  try:
    data_format = op.get_attr("data_format")
  except ValueError:
    data_format = None

  shape = array_ops.shape(op.inputs[0])
  rank = array_ops.rank(op.inputs[0])
  bias_shape = array_ops.shape(received_grad)

  if data_format == b"NCHW":
    expanded_shape = array_ops.concat_v2([
        array_ops.ones_like(shape[:-3]), bias_shape, array_ops.ones_like(shape[
            -2:])
    ], 0)
    tile_mults = array_ops.concat_v2([shape[:-3], [1], shape[-2:]], 0)
  else:
    expanded_shape = array_ops.concat_v2(
        [array_ops.ones_like(shape[:-1]), bias_shape], 0)
    tile_mults = array_ops.concat_v2([shape[:-1], [1]], 0)

  expanded_grad = array_ops.reshape(received_grad, expanded_shape)
  return array_ops.tile(expanded_grad, tile_mults)
예제 #3
0
def same_dynamic_shape(a, b):
    """Returns whether a and b have the same dynamic shape.

  Args:
    a: `Tensor`
    b: `Tensor`

  Returns:
    `Boolean` `Tensor` representing if both tensors have the same shape.
  """
    a = ops.convert_to_tensor(a, name="a")
    b = ops.convert_to_tensor(b, name="b")

    # One of the shapes isn't fully defined, so we need to use the dynamic
    # shape.
    return control_flow_ops.cond(
        math_ops.equal(array_ops.rank(a), array_ops.rank(b)),
        # Here we can't just do math_ops.equal(a.shape, b.shape), since
        # static shape inference may break the equality comparison between
        # shape(a) and shape(b) in math_ops.equal.
        lambda: math_ops.reduce_all(
            math_ops.equal(
                array_ops.concat_v2(
                    (array_ops.shape(a), array_ops.shape(b)), 0),
                array_ops.concat_v2(
                    (array_ops.shape(b), array_ops.shape(a)), 0))),
        lambda: constant_op.constant(False))
예제 #4
0
def _BiasAddGradGrad(op, received_grad):
  """Gradient for the BiasAddGrad op.

  Args:
    op: BiasAddGrad op for which we are calculating gradients.
    received_grad: The gradients passed to the BiasAddGrad op.

  Returns:
    A single gradient Tensor for the input to BiasAddGrad (which
    is the gradient of the bias term in BiasAdd)
  """

  try:
    data_format = op.get_attr("data_format")
  except ValueError:
    data_format = None

  shape = array_ops.shape(op.inputs[0])
  rank = array_ops.rank(op.inputs[0])
  bias_shape = array_ops.shape(received_grad)

  if data_format == b"NCHW":
    expanded_shape = array_ops.concat_v2([
        array_ops.ones_like(shape[:-3]), bias_shape, array_ops.ones_like(shape[
            -2:])
    ], 0)
    tile_mults = array_ops.concat_v2([shape[:-3], [1], shape[-2:]], 0)
  else:
    expanded_shape = array_ops.concat_v2(
        [array_ops.ones_like(shape[:-1]), bias_shape], 0)
    tile_mults = array_ops.concat_v2([shape[:-1], [1]], 0)

  expanded_grad = array_ops.reshape(received_grad, expanded_shape)
  return array_ops.tile(expanded_grad, tile_mults)
예제 #5
0
def _SparseDenseCwiseMulOrDivGrad(op, grad, is_mul):
    """Common code for SparseDenseCwise{Mul,Div} gradients."""
    x_indices = op.inputs[0]
    x_shape = op.inputs[2]
    y = op.inputs[3]

    y_shape = math_ops.to_int64(array_ops.shape(y))
    num_added_dims = array_ops.expand_dims(
        array_ops.size(x_shape) - array_ops.size(y_shape), 0)
    augmented_y_shape = array_ops.concat_v2(
        [array_ops.ones(num_added_dims, ops.dtypes.int64), y_shape], 0)

    scaling = x_shape // augmented_y_shape
    scaled_indices = x_indices // scaling
    scaled_indices = array_ops.slice(
        scaled_indices, array_ops.concat_v2([[0], num_added_dims], 0),
        [-1, -1])
    dense_vals = array_ops.gather_nd(y, scaled_indices)

    if is_mul:
        dx = grad * dense_vals
        dy_val = grad * op.inputs[1]
    else:
        dx = grad / dense_vals
        dy_val = grad * (-op.inputs[1] / math_ops.square(dense_vals))
    # indices can repeat after scaling, so we can't use sparse_to_dense().
    dy = sparse_ops.sparse_add(
        array_ops.zeros_like(y),
        sparse_tensor.SparseTensor(scaled_indices, dy_val, y_shape))

    # (sp_indices, sp_vals, sp_shape, dense)
    return (None, dx, None, dy)
예제 #6
0
 def _entropy(self):
   if (not self.distribution.is_continuous or
       not self.bijector.is_constant_jacobian):
     raise NotImplementedError("entropy is not implemented")
   # Suppose Y = g(X) where g is a diffeomorphism and X is a continuous rv. It
   # can be shown that:
   #   H[Y] = H[X] + E_X[(log o abs o det o J o g)(X)].
   # If is_constant_jacobian then:
   #   E_X[(log o abs o det o J o g)(X)] = (log o abs o det o J o g)(c)
   # where c can by anything.
   entropy = self.distribution.entropy()
   if self._is_maybe_event_override:
     # H[X] = sum_i H[X_i] if X_i are mutually independent.
     # This means that a reduce_sum is a simple rescaling.
     entropy *= math_ops.cast(math_ops.reduce_prod(self._override_event_shape),
                              dtype=entropy.dtype.base_dtype)
   if self._is_maybe_batch_override:
     new_shape = array_ops.concat_v2([
         _ones_like(self._override_batch_shape),
         self.distribution.batch_shape()], 0)
     entropy = array_ops.reshape(entropy, new_shape)
     multiples = array_ops.concat_v2([
         self._override_batch_shape,
         _ones_like(self.distribution.batch_shape())], 0)
     entropy = array_ops.tile(entropy, multiples)
   dummy = 0.
   return entropy - self.bijector.inverse_log_det_jacobian(dummy)
예제 #7
0
def _SparseDenseCwiseMulOrDivGrad(op, grad, is_mul):
  """Common code for SparseDenseCwise{Mul,Div} gradients."""
  x_indices = op.inputs[0]
  x_shape = op.inputs[2]
  y = op.inputs[3]

  y_shape = math_ops.to_int64(array_ops.shape(y))
  num_added_dims = array_ops.expand_dims(
      array_ops.size(x_shape) - array_ops.size(y_shape), 0)
  augmented_y_shape = array_ops.concat_v2(
      [array_ops.ones(num_added_dims, ops.dtypes.int64), y_shape], 0)

  scaling = x_shape // augmented_y_shape
  scaled_indices = x_indices // scaling
  scaled_indices = array_ops.slice(
      scaled_indices, array_ops.concat_v2([[0], num_added_dims], 0), [-1, -1])
  dense_vals = array_ops.gather_nd(y, scaled_indices)

  if is_mul:
    dx = grad * dense_vals
    dy_val = grad * op.inputs[1]
  else:
    dx = grad / dense_vals
    dy_val = grad * (-op.inputs[1] / math_ops.square(dense_vals))
  # indices can repeat after scaling, so we can't use sparse_to_dense().
  dy = sparse_ops.sparse_add(
      array_ops.zeros_like(y),
      sparse_tensor.SparseTensor(scaled_indices, dy_val, y_shape))

  # (sp_indices, sp_vals, sp_shape, dense)
  return (None, dx, None, dy)
예제 #8
0
def same_dynamic_shape(a, b):
  """Returns whether a and b have the same dynamic shape.

  Args:
    a: `Tensor`
    b: `Tensor`

  Returns:
    `Boolean` `Tensor` representing if both tensors have the same shape.
  """
  a = ops.convert_to_tensor(a, name="a")
  b = ops.convert_to_tensor(b, name="b")

  # One of the shapes isn't fully defined, so we need to use the dynamic
  # shape.
  return control_flow_ops.cond(
      math_ops.equal(array_ops.rank(a), array_ops.rank(b)),
      # Here we can't just do math_ops.equal(a.shape, b.shape), since
      # static shape inference may break the equality comparison between
      # shape(a) and shape(b) in math_ops.equal.
      lambda: math_ops.reduce_all(math_ops.equal(
          array_ops.concat_v2((
              array_ops.shape(a),
              array_ops.shape(b)), 0),
          array_ops.concat_v2((
              array_ops.shape(b),
              array_ops.shape(a)), 0))),
      lambda: constant_op.constant(False))
예제 #9
0
    def _define_partial_maximization_operation(self, shard_id, shard):
        """Computes the partial statistics of the means and covariances.

    Args:
      shard_id: current shard id.
      shard: current data shard, 1 X num_examples X dimensions.
    """
        # Soft assignment of each data point to each of the two clusters.
        self._points_in_k[shard_id] = math_ops.reduce_sum(self._w[shard_id],
                                                          0,
                                                          keep_dims=True)
        # Partial means.
        w_mul_x = array_ops.expand_dims(
            math_ops.matmul(self._w[shard_id],
                            array_ops.squeeze(shard, [0]),
                            transpose_a=True), 1)
        self._w_mul_x.append(w_mul_x)
        # Partial covariances.
        x = array_ops.concat_v2([shard for _ in range(self._num_classes)], 0)
        x_trans = array_ops.transpose(x, perm=[0, 2, 1])
        x_mul_w = array_ops.concat_v2([
            array_ops.expand_dims(x_trans[k, :, :] * self._w[shard_id][:, k],
                                  0) for k in range(self._num_classes)
        ], 0)
        self._w_mul_x2.append(math_ops.matmul(x_mul_w, x))
예제 #10
0
def eye(num_rows,
        num_columns=None,
        batch_shape=None,
        dtype=dtypes.float32,
        name=None):
    """Construct an identity matrix, or a batch of matrices.

  ```python
  # Construct one identity matrix.
  tf.eye(2)
  ==> [[1., 0.],
       [0., 1.]]

  # Construct a batch of 3 identity matricies, each 2 x 2.
  # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2.
  batch_identity = tf.eye(2, batch_shape=[3])

  # Construct one 2 x 3 "identity" matrix
  tf.eye(2, num_columns=3)
  ==> [[ 1.,  0.,  0.],
       [ 0.,  1.,  0.]]
  ```

  Args:
    num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows
      in each batch matrix.
    num_columns: Optional non-negative `int32` scalar `Tensor` giving the number
      of columns in each batch matrix.  Defaults to `num_rows`.
    batch_shape:  `int32` `Tensor`.  If provided, returned `Tensor` will have
      leading batch dimensions of this shape.
    dtype:  The type of an element in the resulting `Tensor`
    name:  A name for this `Op`.  Defaults to "eye".

  Returns:
    A `Tensor` of shape `batch_shape + [num_rows, num_columns]`
  """
    with ops.name_scope(name,
                        default_name='eye',
                        values=[num_rows, num_columns, batch_shape]):

        batch_shape = [] if batch_shape is None else batch_shape
        batch_shape = ops.convert_to_tensor(batch_shape,
                                            name='shape',
                                            dtype=dtypes.int32)

        if num_columns is None:
            diag_size = num_rows
        else:
            diag_size = math_ops.minimum(num_rows, num_columns)
        diag_shape = array_ops.concat_v2((batch_shape, [diag_size]), 0)
        diag_ones = array_ops.ones(diag_shape, dtype=dtype)

        if num_columns is None:
            return array_ops.matrix_diag(diag_ones)
        else:
            shape = array_ops.concat_v2((batch_shape, [num_rows, num_columns]),
                                        0)
            zero_matrix = array_ops.zeros(shape, dtype=dtype)
            return array_ops.matrix_set_diag(zero_matrix, diag_ones)
예제 #11
0
def _GRUBlockCellGrad(op, *grad):
  r"""Gradient for GRUBlockCell.

  Args:
    op: Op for which the gradient is defined.
    *grad: Gradients of the optimization function wrt output
      for the Op.

  Returns:
    d_x: Gradients wrt to x
    d_h: Gradients wrt to h
    d_w_ru: Gradients wrt to w_ru
    d_w_c: Gradients wrt to w_c
    d_b_ru: Gradients wrt to b_ru
    d_b_c: Gradients wrt to b_c

  Mathematics behind the Gradients below:
  ```
  d_c_bar = d_h \circ (1-u) \circ (1-c \circ c)
  d_u_bar = d_h \circ (h-c) \circ u \circ (1-u)

  d_r_bar_u_bar = [d_r_bar d_u_bar]

  [d_x_component_1 d_h_prev_component_1] = d_r_bar_u_bar * w_ru^T

  [d_x_component_2 d_h_prevr] = d_c_bar * w_c^T

  d_x = d_x_component_1 + d_x_component_2

  d_h_prev = d_h_prev_component_1 + d_h_prevr \circ r + u
  ```
  Below calculation is performed in the python wrapper for the Gradients
  (not in the gradient kernel.)
  ```
  d_w_ru = x_h_prevr^T * d_c_bar

  d_w_c = x_h_prev^T * d_r_bar_u_bar

  d_b_ru = sum of d_r_bar_u_bar along axis = 0

  d_b_c = sum of d_c_bar along axis = 0
  ```
  """
  x, h_prev, w_ru, w_c, b_ru, b_c = op.inputs
  r, u, c, _ = op.outputs
  _, _, _, d_h = grad

  d_x, d_h_prev, d_c_bar, d_r_bar_u_bar = _gru_ops_so.gru_block_cell_grad(
      x, h_prev, w_ru, w_c, b_ru, b_c, r, u, c, d_h)

  x_h_prev = array_ops.concat_v2([x, h_prev], 1)
  d_w_ru = math_ops.matmul(x_h_prev, d_r_bar_u_bar, transpose_a=True)
  d_b_ru = nn_ops.bias_add_grad(d_r_bar_u_bar)

  x_h_prevr = array_ops.concat_v2([x, h_prev * r], 1)
  d_w_c = math_ops.matmul(x_h_prevr, d_c_bar, transpose_a=True)
  d_b_c = nn_ops.bias_add_grad(d_c_bar)

  return d_x, d_h_prev, d_w_ru, d_w_c, d_b_ru, d_b_c
예제 #12
0
 def testAttentionCellWrapperCorrectResult(self):
   num_units = 4
   attn_length = 6
   batch_size = 2
   expected_output = np.array(
       [[0.955392, 0.408507, -0.60122, 0.270718],
        [0.903681, 0.331165, -0.500238, 0.224052]],
       dtype=np.float32)
   expected_state = np.array(
       [[0.81331915, 0.32036272, 0.28079176, 1.08888793, 0.41264394,
         0.1062041, 0.10444493, 0.32050529, 0.64655536, 0.70794445,
         0.51896095, 0.31809306, 0.58086717, 0.49446869, 0.7641536,
         0.12814975, 0.92231739, 0.89857256, 0.21889746, 0.38442063,
         0.53481543, 0.8876909, 0.45823169, 0.5905602, 0.78038228,
         0.56501579, 0.03971386, 0.09870267, 0.8074435, 0.66821432,
         0.99211812, 0.12295902, 1.01412082, 0.33123279, -0.71114945,
         0.40583119],
        [0.59962207, 0.42597458, -0.22491696, 0.98063421, 0.32548007,
         0.11623692, -0.10100613, 0.27708149, 0.76956916, 0.6360054,
         0.51719815, 0.50458527, 0.73000264, 0.66986895, 0.73576689,
         0.86301267, 0.87887371, 0.35185754, 0.93417215, 0.64732957,
         0.63173044, 0.66627824, 0.53644657, 0.20477486, 0.98458421,
         0.38277245, 0.03746676, 0.92510188, 0.57714164, 0.84932971,
         0.36127412, 0.12125921, 0.99780077, 0.31886846, -0.67595094,
         0.56531656]],
       dtype=np.float32)
   seed = 12345
   random_seed.set_random_seed(seed)
   for state_is_tuple in [False, True]:
     with session.Session() as sess:
       with variable_scope.variable_scope(
           "state_is_tuple", reuse=state_is_tuple):
         lstm_cell = core_rnn_cell_impl.BasicLSTMCell(
             num_units, state_is_tuple=state_is_tuple)
         cell = rnn_cell.AttentionCellWrapper(
             lstm_cell, attn_length, state_is_tuple=state_is_tuple)
         zeros1 = random_ops.random_uniform(
             (batch_size, num_units), 0.0, 1.0, seed=seed + 1)
         zeros2 = random_ops.random_uniform(
             (batch_size, num_units), 0.0, 1.0, seed=seed + 2)
         zeros3 = random_ops.random_uniform(
             (batch_size, num_units), 0.0, 1.0, seed=seed + 3)
         attn_state_zeros = random_ops.random_uniform(
             (batch_size, attn_length * num_units), 0.0, 1.0, seed=seed + 4)
         zero_state = ((zeros1, zeros2), zeros3, attn_state_zeros)
         if not state_is_tuple:
           zero_state = array_ops.concat_v2([
               zero_state[0][0], zero_state[0][1], zero_state[1], zero_state[2]
           ], 1)
         inputs = random_ops.random_uniform(
             (batch_size, num_units), 0.0, 1.0, seed=seed + 5)
         output, state = cell(inputs, zero_state)
         if state_is_tuple:
           state = array_ops.concat_v2(
               [state[0][0], state[0][1], state[1], state[2]], 1)
         sess.run(variables.global_variables_initializer())
         self.assertAllClose(sess.run(output), expected_output)
         self.assertAllClose(sess.run(state), expected_state)
예제 #13
0
def eye(
    num_rows,
    num_columns=None,
    batch_shape=None,
    dtype=dtypes.float32,
    name=None):
  """Construct an identity matrix, or a batch of matrices.

  ```python
  # Construct one identity matrix.
  tf.eye(2)
  ==> [[1., 0.],
       [0., 1.]]

  # Construct a batch of 3 identity matricies, each 2 x 2.
  # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2.
  batch_identity = tf.eye(2, batch_shape=[3])

  # Construct one 2 x 3 "identity" matrix
  tf.eye(2, num_columns=3)
  ==> [[ 1.,  0.,  0.],
       [ 0.,  1.,  0.]]
  ```

  Args:
    num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows
      in each batch matrix.
    num_columns: Optional non-negative `int32` scalar `Tensor` giving the number
      of columns in each batch matrix.  Defaults to `num_rows`.
    batch_shape:  `int32` `Tensor`.  If provided, returned `Tensor` will have
      leading batch dimensions of this shape.
    dtype:  The type of an element in the resulting `Tensor`
    name:  A name for this `Op`.  Defaults to "eye".

  Returns:
    A `Tensor` of shape `batch_shape + [num_rows, num_columns]`
  """
  with ops.name_scope(
      name, default_name="eye", values=[num_rows, num_columns, batch_shape]):

    batch_shape = [] if batch_shape is None else batch_shape
    batch_shape = ops.convert_to_tensor(
        batch_shape, name="shape", dtype=dtypes.int32)

    if num_columns is None:
      diag_size = num_rows
    else:
      diag_size = math_ops.minimum(num_rows, num_columns)
    diag_shape = array_ops.concat_v2((batch_shape, [diag_size]), 0)
    diag_ones = array_ops.ones(diag_shape, dtype=dtype)

    if num_columns is None:
      return array_ops.matrix_diag(diag_ones)
    else:
      shape = array_ops.concat_v2((batch_shape, [num_rows, num_columns]), 0)
      zero_matrix = array_ops.zeros(shape, dtype=dtype)
      return array_ops.matrix_set_diag(zero_matrix, diag_ones)
예제 #14
0
def boston_eval_fn():
  boston = base.load_boston()
  n_examples = len(boston.target)
  features = array_ops.reshape(
      constant_op.constant(boston.data), [n_examples, _BOSTON_INPUT_DIM])
  labels = array_ops.reshape(
      constant_op.constant(boston.target), [n_examples, 1])
  return array_ops.concat_v2([features, features], 0), array_ops.concat_v2(
      [labels, labels], 0)
예제 #15
0
  def _sample_n(self, n, seed):
    batch_shape = self.batch_shape()
    event_shape = self.event_shape()
    batch_ndims = array_ops.shape(batch_shape)[0]

    ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
    shape = array_ops.concat_v2(((n,), batch_shape, event_shape), 0)

    # Complexity: O(nbk^2)
    x = random_ops.random_normal(shape=shape,
                                 mean=0.,
                                 stddev=1.,
                                 dtype=self.dtype,
                                 seed=seed)

    # Complexity: O(nbk)
    # This parametrization is equivalent to Chi2, i.e.,
    # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
    g = random_ops.random_gamma(shape=(n,),
                                alpha=self._multi_gamma_sequence(
                                    0.5 * self.df, self.dimension),
                                beta=0.5,
                                dtype=self.dtype,
                                seed=distribution_util.gen_new_seed(
                                    seed, "wishart"))

    # Complexity: O(nbk^2)
    x = array_ops.matrix_band_part(x, -1, 0)  # Tri-lower.

    # Complexity: O(nbk)
    x = array_ops.matrix_set_diag(x, math_ops.sqrt(g))

    # Make batch-op ready.
    # Complexity: O(nbk^2)
    perm = array_ops.concat_v2((math_ops.range(1, ndims), (0,)), 0)
    x = array_ops.transpose(x, perm)
    shape = array_ops.concat_v2((batch_shape, (event_shape[0], -1)), 0)
    x = array_ops.reshape(x, shape)

    # Complexity: O(nbM) where M is the complexity of the operator solving a
    # vector system.  E.g., for OperatorPDDiag, each matmul is O(k^2), so
    # this complexity is O(nbk^2). For OperatorPDCholesky, each matmul is
    # O(k^3) so this step has complexity O(nbk^3).
    x = self.scale_operator_pd.sqrt_matmul(x)

    # Undo make batch-op ready.
    # Complexity: O(nbk^2)
    shape = array_ops.concat_v2((batch_shape, event_shape, (n,)), 0)
    x = array_ops.reshape(x, shape)
    perm = array_ops.concat_v2(((ndims - 1,), math_ops.range(0, ndims - 1)), 0)
    x = array_ops.transpose(x, perm)

    if not self.cholesky_input_output_matrices:
      # Complexity: O(nbk^3)
      x = math_ops.matmul(x, x, adjoint_b=True)

    return x
예제 #16
0
def _propagate(dim_indices, conf, cell, c_prev, m_prev, new_output, new_state,
               first_call):
    """Propagates through all the cells in dim_indices dimensions.
  """
    if len(dim_indices) == 0:
        return

    # Because of the way RNNCells are implemented, we take the last dimension
    # (H_{N-1}) out and feed it as the state of the RNN cell
    # (in `last_dim_output`).
    # The input of the cell (H_0 to H_{N-2}) are concatenated into `cell_inputs`
    if conf.num_dims > 1:
        ls_cell_inputs = [None] * (conf.num_dims - 1)
        for d in conf.dims[:-1]:
            ls_cell_inputs[d.idx] = new_output[d.idx] if new_output[
                d.idx] is not None else m_prev[d.idx]
        cell_inputs = array_ops.concat_v2(ls_cell_inputs, 1)
    else:
        cell_inputs = array_ops.zeros([m_prev[0].get_shape().as_list()[0], 0],
                                      m_prev[0].dtype)

    last_dim_output = new_output[-1] if new_output[-1] is not None else m_prev[
        -1]

    for i in dim_indices:
        d = conf.dims[i]
        if d.non_recurrent_fn:
            linear_args = array_ops.concat_v2(
                [cell_inputs, last_dim_output],
                1) if conf.num_dims > 1 else last_dim_output
            with vs.variable_scope('non_recurrent' if conf.tied else
                                   'non_recurrent/cell_{}'.format(i)):
                if conf.tied and not (first_call and i == dim_indices[0]):
                    vs.get_variable_scope().reuse_variables()
                new_output[d.idx] = layers.legacy_fully_connected(
                    linear_args,
                    num_output_units=conf.num_units,
                    activation_fn=d.non_recurrent_fn,
                    weight_init=vs.get_variable_scope().initializer
                    or layers.initializers.xavier_initializer)
        else:
            if c_prev[i] is not None:
                cell_state = array_ops.concat_v2([c_prev[i], last_dim_output],
                                                 1)
            else:
                # for GRU/RNN, the state is just the previous output
                cell_state = last_dim_output

            with vs.variable_scope('recurrent' if conf.
                                   tied else 'recurrent/cell_{}'.format(i)):
                if conf.tied and not (first_call and i == dim_indices[0]):
                    vs.get_variable_scope().reuse_variables()
                new_output[d.idx], new_state[d.idx] = cell(
                    cell_inputs, cell_state)
예제 #17
0
 def testOpsBetweenCut(self):
   with ops.Graph().as_default() as g:
     t1 = constant(1.0)
     t2 = constant(2.0)
     t3 = array_ops.stack([t1, t2])
     t4 = constant([1.0])
     t5 = array_ops.concat_v2([t4, t3], 0)
     t6 = constant([2.0])
     t7 = array_ops.concat_v2([t5, t6], 0)
   self._assertOpListEqual([t7.op, t5.op, t4.op],
                           _OpsBetween(g, [t7.op], [t4.op]))
 def testOpsBetweenCut(self):
     with ops.Graph().as_default() as g:
         t1 = constant(1.0)
         t2 = constant(2.0)
         t3 = array_ops.stack([t1, t2])
         t4 = constant([1.0])
         t5 = array_ops.concat_v2([t4, t3], 0)
         t6 = constant([2.0])
         t7 = array_ops.concat_v2([t5, t6], 0)
     self._assertOpListEqual([t7.op, t5.op, t4.op],
                             _OpsBetween(g, [t7.op], [t4.op]))
예제 #19
0
  def testConcat(self):
    tf_val = array_ops.concat_v2(
        [[16, 37], array_ops.placeholder(
            dtypes.int32, shape=(2,))], 0)
    c_val = tensor_util.constant_value_as_shape(tf_val)
    self.assertEqual([16, 37, None, None], c_val.as_list())

    tf_val = array_ops.concat_v2(
        [[16, 37], array_ops.placeholder(
            dtypes.int32, shape=(1,)), [48]], 0)
    c_val = tensor_util.constant_value_as_shape(tf_val)
    self.assertEqual([16, 37, None, 48], c_val.as_list())
예제 #20
0
    def _sample_n(self, n, seed):
        batch_shape = self.batch_shape()
        event_shape = self.event_shape()
        batch_ndims = array_ops.shape(batch_shape)[0]

        ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
        shape = array_ops.concat_v2(((n,), batch_shape, event_shape), 0)

        # Complexity: O(nbk^2)
        x = random_ops.random_normal(shape=shape, mean=0.0, stddev=1.0, dtype=self.dtype, seed=seed)

        # Complexity: O(nbk)
        # This parametrization is equivalent to Chi2, i.e.,
        # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
        g = random_ops.random_gamma(
            shape=(n,),
            alpha=self._multi_gamma_sequence(0.5 * self.df, self.dimension),
            beta=0.5,
            dtype=self.dtype,
            seed=distribution_util.gen_new_seed(seed, "wishart"),
        )

        # Complexity: O(nbk^2)
        x = array_ops.matrix_band_part(x, -1, 0)  # Tri-lower.

        # Complexity: O(nbk)
        x = array_ops.matrix_set_diag(x, math_ops.sqrt(g))

        # Make batch-op ready.
        # Complexity: O(nbk^2)
        perm = array_ops.concat_v2((math_ops.range(1, ndims), (0,)), 0)
        x = array_ops.transpose(x, perm)
        shape = array_ops.concat_v2((batch_shape, (event_shape[0], -1)), 0)
        x = array_ops.reshape(x, shape)

        # Complexity: O(nbM) where M is the complexity of the operator solving a
        # vector system.  E.g., for OperatorPDDiag, each matmul is O(k^2), so
        # this complexity is O(nbk^2). For OperatorPDCholesky, each matmul is
        # O(k^3) so this step has complexity O(nbk^3).
        x = self.scale_operator_pd.sqrt_matmul(x)

        # Undo make batch-op ready.
        # Complexity: O(nbk^2)
        shape = array_ops.concat_v2((batch_shape, event_shape, (n,)), 0)
        x = array_ops.reshape(x, shape)
        perm = array_ops.concat_v2(((ndims - 1,), math_ops.range(0, ndims - 1)), 0)
        x = array_ops.transpose(x, perm)

        if not self.cholesky_input_output_matrices:
            # Complexity: O(nbk^3)
            x = math_ops.matmul(x, x, adjoint_b=True)

        return x
예제 #21
0
파일: topn.py 프로젝트: kadeng/tensorflow
 def refresh_shortlist():
   """Update the shortlist with the highest scores in id_to_score."""
   new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size)
   smallest_new_score = math_ops.reduce_min(new_scores)
   new_length = math_ops.reduce_sum(
       math_ops.to_int32(math_ops.greater(new_scores, dtypes.float32.min)))
   u1 = self.sl_ids.assign(
       math_ops.to_int64(array_ops.concat_v2([[new_length], new_ids], 0)))
   u2 = self.sl_scores.assign(
       array_ops.concat_v2([[smallest_new_score], new_scores], 0))
   self.last_ops = [u1, u2]
   return control_flow_ops.group(u1, u2)
예제 #22
0
    def testConcat(self):
        tf_val = array_ops.concat_v2(
            [[16, 37],
             array_ops.placeholder(dtypes.int32, shape=(2, ))], 0)
        c_val = tensor_util.constant_value_as_shape(tf_val)
        self.assertEqual([16, 37, None, None], c_val.as_list())

        tf_val = array_ops.concat_v2(
            [[16, 37],
             array_ops.placeholder(dtypes.int32, shape=(1, )), [48]], 0)
        c_val = tensor_util.constant_value_as_shape(tf_val)
        self.assertEqual([16, 37, None, 48], c_val.as_list())
예제 #23
0
def _flip_matrix_to_vector_dynamic(mat, batch_shape):
    """Flip matrix to vector with dynamic shapes."""
    mat_rank = array_ops.rank(mat)
    k = array_ops.gather(array_ops.shape(mat), mat_rank - 2)
    final_shape = array_ops.concat_v2((batch_shape, [k]), 0)

    # mat.shape = matrix_batch_shape + [k, M]
    # Permutation corresponding to [M] + matrix_batch_shape + [k]
    perm = array_ops.concat_v2(
        ([mat_rank - 1], math_ops.range(0, mat_rank - 1)), 0)
    mat_with_end_at_beginning = array_ops.transpose(mat, perm=perm)
    vector = array_ops.reshape(mat_with_end_at_beginning, final_shape)
    return vector
예제 #24
0
def _flip_matrix_to_vector_dynamic(mat, batch_shape):
  """Flip matrix to vector with dynamic shapes."""
  mat_rank = array_ops.rank(mat)
  k = array_ops.gather(array_ops.shape(mat), mat_rank - 2)
  final_shape = array_ops.concat_v2((batch_shape, [k]), 0)

  # mat.shape = matrix_batch_shape + [k, M]
  # Permutation corresponding to [M] + matrix_batch_shape + [k]
  perm = array_ops.concat_v2(
      ([mat_rank - 1], math_ops.range(0, mat_rank - 1)), 0)
  mat_with_end_at_beginning = array_ops.transpose(mat, perm=perm)
  vector = array_ops.reshape(mat_with_end_at_beginning, final_shape)
  return vector
예제 #25
0
def _propagate(dim_indices, conf, cell, c_prev, m_prev, new_output, new_state,
               first_call):
  """Propagates through all the cells in dim_indices dimensions.
  """
  if len(dim_indices) == 0:
    return

  # Because of the way RNNCells are implemented, we take the last dimension
  # (H_{N-1}) out and feed it as the state of the RNN cell
  # (in `last_dim_output`).
  # The input of the cell (H_0 to H_{N-2}) are concatenated into `cell_inputs`
  if conf.num_dims > 1:
    ls_cell_inputs = [None] * (conf.num_dims - 1)
    for d in conf.dims[:-1]:
      ls_cell_inputs[d.idx] = new_output[d.idx] if new_output[
          d.idx] is not None else m_prev[d.idx]
    cell_inputs = array_ops.concat_v2(ls_cell_inputs, 1)
  else:
    cell_inputs = array_ops.zeros([m_prev[0].get_shape().as_list()[0], 0],
                                  m_prev[0].dtype)

  last_dim_output = new_output[-1] if new_output[-1] is not None else m_prev[-1]

  for i in dim_indices:
    d = conf.dims[i]
    if d.non_recurrent_fn:
      linear_args = array_ops.concat_v2(
          [cell_inputs, last_dim_output],
          1) if conf.num_dims > 1 else last_dim_output
      with vs.variable_scope('non_recurrent' if conf.tied else
                             'non_recurrent/cell_{}'.format(i)):
        if conf.tied and not (first_call and i == dim_indices[0]):
          vs.get_variable_scope().reuse_variables()
        new_output[d.idx] = layers.legacy_fully_connected(
            linear_args,
            num_output_units=conf.num_units,
            activation_fn=d.non_recurrent_fn,
            weight_init=vs.get_variable_scope().initializer or
            layers.initializers.xavier_initializer)
    else:
      if c_prev[i] is not None:
        cell_state = array_ops.concat_v2([c_prev[i], last_dim_output], 1)
      else:
        # for GRU/RNN, the state is just the previous output
        cell_state = last_dim_output

      with vs.variable_scope('recurrent' if conf.tied else
                             'recurrent/cell_{}'.format(i)):
        if conf.tied and not (first_call and i == dim_indices[0]):
          vs.get_variable_scope().reuse_variables()
        new_output[d.idx], new_state[d.idx] = cell(cell_inputs, cell_state)
예제 #26
0
 def _testGradientsForAxis(self,
                           inp_tensors,
                           axis,
                           output_shape,
                           feed_dict=None):
     with self.test_session():
         c = array_ops.concat_v2(inp_tensors, axis)
         grad_inp = np.random.rand(*output_shape).astype("f")
         grad_tensor = constant_op.constant(
             [float(x) for x in grad_inp.flatten()], shape=output_shape)
         grad = gradients_impl.gradients([c], inp_tensors, [grad_tensor])
         concated_grad = array_ops.concat_v2(grad, axis)
         result = concated_grad.eval(feed_dict=feed_dict)
         self.assertAllEqual(result, grad_inp)
예제 #27
0
 def testOpsBetweenCycle(self):
     with ops.Graph().as_default() as g:
         t1 = constant(1.0)
         t2 = constant(2.0)
         t3 = array_ops.pack([t1, t2])
         t4 = array_ops.concat_v2([t3, t3, t3], 0)
         t5 = constant([1.0])
         t6 = array_ops.concat_v2([t4, t5], 0)
         t7 = array_ops.concat_v2([t6, t3], 0)
     self._assertOpListEqual([t6.op, t4.op, t3.op],
                             _OpsBetween(g, [t6.op], [t3.op]))
     self._assertOpListEqual([t7.op, t6.op, t5.op, t4.op, t3.op, t1.op],
                             _OpsBetween(g, [t7.op], [t1.op, t5.op]))
     self._assertOpListEqual([t6.op, t5.op, t4.op, t3.op, t2.op],
                             _OpsBetween(g, [t6.op], [t2.op, t5.op]))
예제 #28
0
 def testOpsBetweenCycle(self):
   with ops.Graph().as_default() as g:
     t1 = constant(1.0)
     t2 = constant(2.0)
     t3 = array_ops.pack([t1, t2])
     t4 = array_ops.concat_v2([t3, t3, t3], 0)
     t5 = constant([1.0])
     t6 = array_ops.concat_v2([t4, t5], 0)
     t7 = array_ops.concat_v2([t6, t3], 0)
   self._assertOpListEqual([t6.op, t4.op, t3.op],
                           _OpsBetween(g, [t6.op], [t3.op]))
   self._assertOpListEqual([t7.op, t6.op, t5.op, t4.op, t3.op, t1.op],
                           _OpsBetween(g, [t7.op], [t1.op, t5.op]))
   self._assertOpListEqual([t6.op, t5.op, t4.op, t3.op, t2.op],
                           _OpsBetween(g, [t6.op], [t2.op, t5.op]))
예제 #29
0
 def refresh_shortlist():
     """Update the shortlist with the highest scores in id_to_score."""
     new_scores, new_ids = nn_ops.top_k(self.id_to_score,
                                        self.shortlist_size)
     smallest_new_score = math_ops.reduce_min(new_scores)
     new_length = math_ops.reduce_sum(
         math_ops.to_int32(
             math_ops.greater(new_scores, dtypes.float32.min)))
     u1 = self.sl_ids.assign(
         math_ops.to_int64(
             array_ops.concat_v2([[new_length], new_ids], 0)))
     u2 = self.sl_scores.assign(
         array_ops.concat_v2([[smallest_new_score], new_scores], 0))
     self.last_ops = [u1, u2]
     return control_flow_ops.group(u1, u2)
예제 #30
0
  def test_broadcast_apply_and_solve(self):
    # These cannot be done in the automated (base test class) tests since they
    # test shapes that tf.matmul cannot handle.
    # In particular, tf.matmul does not broadcast.
    with self.test_session() as sess:
      x = random_ops.random_normal(shape=(2, 2, 3, 4))

      # This LinearOperatorDiag will be brodacast to (2, 2, 3, 3) during solve
      # and apply with 'x' as the argument.
      diag = random_ops.random_uniform(shape=(2, 1, 3))
      operator = linalg.LinearOperatorDiag(diag, is_self_adjoint=True)
      self.assertAllEqual((2, 1, 3, 3), operator.shape)

      # Create a batch matrix with the broadcast shape of operator.
      diag_broadcast = array_ops.concat_v2((diag, diag), 1)
      mat = array_ops.matrix_diag(diag_broadcast)
      self.assertAllEqual((2, 2, 3, 3), mat.get_shape())  # being pedantic.

      operator_apply = operator.apply(x)
      mat_apply = math_ops.matmul(mat, x)
      self.assertAllEqual(operator_apply.get_shape(), mat_apply.get_shape())
      self.assertAllClose(*sess.run([operator_apply, mat_apply]))

      operator_solve = operator.solve(x)
      mat_solve = linalg_ops.matrix_solve(mat, x)
      self.assertAllEqual(operator_solve.get_shape(), mat_solve.get_shape())
      self.assertAllClose(*sess.run([operator_solve, mat_solve]))
 def construct_fn(attention_query, attention_keys, attention_values):
   context = attention_score_fn(attention_query, attention_keys,
                                attention_values)
   concat_input = array_ops.concat_v2([attention_query, context], 1)
   attention = layers.linear(
       concat_input, num_units, biases_initializer=None, scope=scope)
   return attention
예제 #32
0
  def _concat(self):
    """Returns the overall concatenated value as a `Tensor`.

    This is different from using the partitioned variable directly as a tensor
    (through tensor conversion and `as_tensor`) in that it creates a new set of
    operations that keeps the control dependencies from its scope.

    Returns:
      `Tensor` containing the concatenated value.
    """
    if len(self._variable_list) == 1:
      with ops.name_scope(None):
        return array_ops.identity(self._variable_list[0], name=self._name)

    partition_axes = self._partition_axes()

    if len(partition_axes) > 1:
      raise NotImplementedError(
          "Cannot concatenate along more than one dimension: %s.  "
          "Multi-axis partition concat is not supported" % str(partition_axes))
    partition_ix = partition_axes[0]

    with ops.name_scope(self._name + "/ConcatPartitions/"):
      concatenated = array_ops.concat_v2(self._variable_list, partition_ix)

    with ops.name_scope(None):
      return array_ops.identity(concatenated, name=self._name)
예제 #33
0
def _transpose_batch_time(x):
  """Transpose the batch and time dimensions of a Tensor.

  Retains as much of the static shape information as possible.

  Args:
    x: A tensor of rank 2 or higher.

  Returns:
    x transposed along the first two dimensions.

  Raises:
    ValueError: if `x` is rank 1 or lower.
  """
  x_static_shape = x.get_shape()
  if x_static_shape.ndims is not None and x_static_shape.ndims < 2:
    raise ValueError(
        "Expected input tensor %s to have rank at least 2, but saw shape: %s" %
        (x, x_static_shape))
  x_rank = array_ops.rank(x)
  x_t = array_ops.transpose(
      x, array_ops.concat_v2(
          ([1, 0], math_ops.range(2, x_rank)), axis=0))
  x_t.set_shape(
      tensor_shape.TensorShape([
          x_static_shape[1].value, x_static_shape[0].value
      ]).concatenate(x_static_shape[2:]))
  return x_t
예제 #34
0
  def sample(self, sample_shape=(), seed=None, name="sample",
             **condition_kwargs):
    """Generate samples of the specified shape.

    Note that a call to `sample()` without arguments will generate a single
    sample.

    Args:
      sample_shape: 0D or 1D `int32` `Tensor`. Shape of the generated samples.
      seed: Python integer seed for RNG
      name: name to give to the op.
      **condition_kwargs: Named arguments forwarded to subclass implementation.

    Returns:
      samples: a `Tensor` with prepended dimensions `sample_shape`.
    """
    with self._name_scope(name, values=[sample_shape]):
      sample_shape = ops.convert_to_tensor(
          sample_shape, dtype=dtypes.int32, name="sample_shape")
      sample_shape, n = self._expand_sample_shape_to_vector(
          sample_shape, "sample_shape")
      samples = self._sample_n(n, seed, **condition_kwargs)
      batch_event_shape = array_ops.shape(samples)[1:]
      final_shape = array_ops.concat_v2([sample_shape, batch_event_shape], 0)
      samples = array_ops.reshape(samples, final_shape)
      samples = self._set_sample_static_shape(samples, sample_shape)
      return samples
예제 #35
0
    def testPartialShapes(self):
        x = array_ops.placeholder(dtypes.float32)

        # Unknown input shape, partial new shape.
        y = array_ops.reshape(x, [1, 1, -1, 1])
        self.assertEqual([1, 1, None, 1], y.get_shape().as_list())

        # Unknown input shape, unknown new shape.
        y = array_ops.reshape(x, array_ops.placeholder(dtypes.int32))
        self.assertEqual(None, y.get_shape().ndims)

        # Unknown input shape, known rank for new shape.
        y = array_ops.reshape(x, array_ops.placeholder(dtypes.int32, shape=(3,)))
        self.assertEqual([None, None, None], y.get_shape().as_list())

        # Unknown input shape, partial new shape using `tf.stack()`.
        y = array_ops.reshape(x, [array_ops.placeholder(dtypes.int32), 37])
        self.assertEqual([None, 37], y.get_shape().as_list())

        # Unknown input shape, partial new shape using `tf.concat_v2()`.
        y = array_ops.reshape(x, array_ops.concat_v2([array_ops.placeholder(dtypes.int32, shape=(2,)), [37, 42]], 0))
        self.assertEqual([None, None, 37, 42], y.get_shape().as_list())

        # Unknown input shape, partial new shape using `tf.shape()`.
        y = array_ops.reshape(x, array_ops.shape(array_ops.placeholder(dtypes.float32, shape=[None, 37, None])))
        self.assertEqual([None, 37, None], y.get_shape().as_list())
예제 #36
0
 def _sample_n(self, n, seed=None):
     shape = array_ops.concat_v2(([n], self.batch_shape()), 0)
     samples = random_ops.random_uniform(shape=shape,
                                         dtype=self.dtype,
                                         seed=seed)
     return (array_ops.expand_dims(self.a, 0) +
             array_ops.expand_dims(self.range(), 0) * samples)
예제 #37
0
    def testDynamicAttentionDecoderStateIsTuple(self):
      with self.test_session() as sess:
        with variable_scope.variable_scope(
            "root", initializer=init_ops.constant_initializer(0.5)):
          cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=True)
          cell = core_rnn_cell_impl.MultiRNNCell(
              cells=[cell] * 2, state_is_tuple=True)
          inp = constant_op.constant(0.5, shape=[2, 2, 2])
          enc_outputs, enc_state = core_rnn.static_rnn(
              cell, inp, dtype=dtypes.float32)
          attn_states = array_ops.concat_v2([
              array_ops.reshape(e, [-1, 1, cell.output_size]) for e in
              enc_outputs
          ], 1)
          dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3
          dec, mem = seq2seq_lib.attention_decoder(
              dec_inp, enc_state, attn_states, cell, output_size=4)
          sess.run([variables.global_variables_initializer()])
          res = sess.run(dec)
          self.assertEqual(3, len(res))
          self.assertEqual((2, 4), res[0].shape)

          res = sess.run([mem])
          self.assertEqual(2, len(res[0]))
          self.assertEqual((2, 2), res[0][0].c.shape)
          self.assertEqual((2, 2), res[0][0].h.shape)
          self.assertEqual((2, 2), res[0][1].c.shape)
          self.assertEqual((2, 2), res[0][1].h.shape)
예제 #38
0
  def logits_to_predictions(self, logits, proba=False):
    if proba:
      raise ValueError(
          "logits to probabilities is not supported for _BinarySvmTargetColumn")

    logits = array_ops.concat_v2([array_ops.zeros_like(logits), logits], 1)
    return math_ops.argmax(logits, 1)
예제 #39
0
    def testTimeReversedFusedRNN(self):
        with self.test_session() as sess:
            initializer = init_ops.random_uniform_initializer(-0.01,
                                                              0.01,
                                                              seed=19890213)
            cell = core_rnn_cell_impl.BasicRNNCell(10)
            batch_size = 5
            input_size = 20
            timelen = 15
            inputs = constant_op.constant(
                np.random.randn(timelen, batch_size, input_size))

            # test bi-directional rnn
            with variable_scope.variable_scope("basic",
                                               initializer=initializer):
                unpacked_inputs = array_ops.unstack(inputs)
                outputs, fw_state, bw_state = core_rnn.static_bidirectional_rnn(
                    cell, cell, unpacked_inputs, dtype=dtypes.float64)
                packed_outputs = array_ops.stack(outputs)
                basic_vars = [
                    v for v in variables.trainable_variables()
                    if v.name.startswith("basic/")
                ]
                sess.run([variables.global_variables_initializer()])
                basic_outputs, basic_fw_state, basic_bw_state = sess.run(
                    [packed_outputs, fw_state, bw_state])
                basic_grads = sess.run(
                    gradients_impl.gradients(packed_outputs, inputs))
                basic_wgrads = sess.run(
                    gradients_impl.gradients(packed_outputs, basic_vars))

            with variable_scope.variable_scope("fused",
                                               initializer=initializer):
                fused_cell = fused_rnn_cell.FusedRNNCellAdaptor(cell)
                fused_bw_cell = fused_rnn_cell.TimeReversedFusedRNN(fused_cell)
                fw_outputs, fw_state = fused_cell(inputs,
                                                  dtype=dtypes.float64,
                                                  scope="fw")
                bw_outputs, bw_state = fused_bw_cell(inputs,
                                                     dtype=dtypes.float64,
                                                     scope="bw")
                outputs = array_ops.concat_v2([fw_outputs, bw_outputs], 2)
                fused_vars = [
                    v for v in variables.trainable_variables()
                    if v.name.startswith("fused/")
                ]
                sess.run([variables.global_variables_initializer()])
                fused_outputs, fused_fw_state, fused_bw_state = sess.run(
                    [outputs, fw_state, bw_state])
                fused_grads = sess.run(
                    gradients_impl.gradients(outputs, inputs))
                fused_wgrads = sess.run(
                    gradients_impl.gradients(outputs, fused_vars))

            self.assertAllClose(basic_outputs, fused_outputs)
            self.assertAllClose(basic_fw_state, fused_fw_state)
            self.assertAllClose(basic_bw_state, fused_bw_state)
            self.assertAllClose(basic_grads, fused_grads)
            for basic, fused in zip(basic_wgrads, fused_wgrads):
                self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
예제 #40
0
  def _concat(self):
    """Returns the overall concatenated value as a `Tensor`.

    This is different from using the partitioned variable directly as a tensor
    (through tensor conversion and `as_tensor`) in that it creates a new set of
    operations that keeps the control dependencies from its scope.

    Returns:
      `Tensor` containing the concatenated value.
    """
    if len(self._variable_list) == 1:
      with ops.name_scope(None):
        return array_ops.identity(self._variable_list[0], name=self._name)

    partition_axes = self._partition_axes()

    if len(partition_axes) > 1:
      raise NotImplementedError(
          "Cannot concatenate along more than one dimension: %s.  "
          "Multi-axis partition concat is not supported" % str(partition_axes))
    partition_ix = partition_axes[0]

    with ops.name_scope(self._name + "/ConcatPartitions/"):
      concatenated = array_ops.concat_v2(self._variable_list, partition_ix)

    with ops.name_scope(None):
      return array_ops.identity(concatenated, name=self._name)
예제 #41
0
def _SplitVGrad(op, *grads):
    returnval = array_ops.concat_v2(list(grads), op.inputs[2])
    returnval = [returnval] + [
        None,
    ] * (len(op.inputs) - 1)
    print(returnval)
    return returnval
예제 #42
0
    def sample(self,
               sample_shape=(),
               seed=None,
               name="sample",
               **condition_kwargs):
        """Generate samples of the specified shape.

    Note that a call to `sample()` without arguments will generate a single
    sample.

    Args:
      sample_shape: 0D or 1D `int32` `Tensor`. Shape of the generated samples.
      seed: Python integer seed for RNG
      name: name to give to the op.
      **condition_kwargs: Named arguments forwarded to subclass implementation.

    Returns:
      samples: a `Tensor` with prepended dimensions `sample_shape`.
    """
        with self._name_scope(name, values=[sample_shape]):
            sample_shape = ops.convert_to_tensor(sample_shape,
                                                 dtype=dtypes.int32,
                                                 name="sample_shape")
            sample_shape, n = self._expand_sample_shape_to_vector(
                sample_shape, "sample_shape")
            samples = self._sample_n(n, seed, **condition_kwargs)
            batch_event_shape = array_ops.shape(samples)[1:]
            final_shape = array_ops.concat_v2(
                [sample_shape, batch_event_shape], 0)
            samples = array_ops.reshape(samples, final_shape)
            samples = self._set_sample_static_shape(samples, sample_shape)
            return samples
예제 #43
0
    def __call__(self, inputs, state, scope=None):
        """LSTM as mentioned in paper."""
        with vs.variable_scope(scope or "basic_lstm_cell"):
            # Parameters of gates are concatenated into one multiply for
            # efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = array_ops.split(
                    value=state, num_or_size_splits=2, axis=1)
            g = tf.concat([inputs, h],1)
            concat = linear([g], 4 * self._num_units)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = array_ops.split(
                value=concat,  num_or_size_splits=4, axis=1)

            new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) *
                     self._activation(j))
            new_h = self._activation(new_c) * sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(new_c, new_h)
            else:
                new_state = array_ops.concat_v2([new_c, new_h], 1)
            return new_h, new_state
예제 #44
0
  def _possibly_broadcast_batch_shape(self, x):
    """Return 'x', possibly after broadcasting the leading dimensions."""
    # If we have no batch shape, our batch shape broadcasts with everything!
    if self._batch_shape_arg is None:
      return x

    # Static attempt:
    #   If we determine that no broadcast is necessary, pass x through
    #   If we need a broadcast, add to an array of zeros.
    #
    # special_shape is the shape that, when broadcast with x's shape, will give
    # the correct broadcast_shape.  Note that
    #   We have already verified the second to last dimension of self.shape
    #   matches x's shape in assert_compatible_matrix_dimensions.
    #   Also, the final dimension of 'x' can have any shape.
    #   Therefore, the final two dimensions of special_shape are 1's.
    special_shape = self.batch_shape.concatenate([1, 1])
    bshape = array_ops.broadcast_static_shape(x.get_shape(), special_shape)
    if special_shape.is_fully_defined():
      # bshape.is_fully_defined iff special_shape.is_fully_defined.
      if bshape == x.get_shape():
        return x
      # Use the built in broadcasting of addition.
      zeros = array_ops.zeros(shape=special_shape, dtype=self.dtype)
      return x + zeros

    # Dynamic broadcast:
    #   Always add to an array of zeros, rather than using a "cond", since a
    #   cond would require copying data from GPU --> CPU.
    special_shape = array_ops.concat_v2(
        (self.batch_shape_dynamic(), [1, 1]), 0)
    zeros = array_ops.zeros(shape=special_shape, dtype=self.dtype)
    return x + zeros
예제 #45
0
    def tensors_to_item(self, keys_to_tensors):
        indices = keys_to_tensors[self._indices_key]
        values = keys_to_tensors[self._values_key]
        if self._shape_key:
            shape = keys_to_tensors[self._shape_key]
            if isinstance(shape, sparse_tensor.SparseTensor):
                shape = sparse_ops.sparse_tensor_to_dense(shape)
        elif self._shape:
            shape = self._shape
        else:
            shape = indices.dense_shape
        indices_shape = array_ops.shape(indices.indices)
        rank = indices_shape[1]
        ids = math_ops.to_int64(indices.values)
        indices_columns_to_preserve = array_ops.slice(
            indices.indices, [0, 0], array_ops.stack([-1, rank - 1]))
        new_indices = array_ops.concat_v2(
            [indices_columns_to_preserve,
             array_ops.reshape(ids, [-1, 1])], 1)

        tensor = sparse_tensor.SparseTensor(new_indices, values.values, shape)
        if self._densify:
            tensor = sparse_ops.sparse_tensor_to_dense(tensor,
                                                       self._default_value)
        return tensor
예제 #46
0
  def make_batch_of_event_sample_matrices(
      self, x, expand_batch_dim=True,
      name="make_batch_of_event_sample_matrices"):
    """Reshapes/transposes `Distribution` `Tensor` from S+B+E to B_+E_+S_.

    Where:
      - `B_ = B if B or not expand_batch_dim  else [1]`,
      - `E_ = E if E else [1]`,
      - `S_ = [tf.reduce_prod(S)]`.

    Args:
      x: `Tensor`.
      expand_batch_dim: Python `Boolean` scalar. If `True` the batch dims will
        be expanded such that batch_ndims>=1.
      name: `String`. The name to give this op.

    Returns:
      x: `Tensor`. Input transposed/reshaped to `B_+E_+S_`.
      sample_shape: `Tensor` (1D, `int32`).
    """
    with self._name_scope(name, values=[x]):
      x = ops.convert_to_tensor(x, name="x")
      sample_shape, batch_shape, event_shape = self.get_shape(x)
      event_shape = distribution_util.pick_vector(
          self._event_ndims_is_0, [1], event_shape)
      if expand_batch_dim:
        batch_shape = distribution_util.pick_vector(
            self._batch_ndims_is_0, [1], batch_shape)
      new_shape = array_ops.concat_v2([[-1], batch_shape, event_shape], 0)
      x = array_ops.reshape(x, shape=new_shape)
      x = distribution_util.rotate_transpose(x, shift=-1)
      return x, sample_shape
예제 #47
0
  def _shape_dynamic(self):
    matrix_shape = array_ops.stack(
        (self._num_rows, self._num_rows), axis=0)
    if self._batch_shape_arg is None:
      return matrix_shape

    return array_ops.concat_v2((self._batch_shape_arg, matrix_shape), 0)
예제 #48
0
def embedding_lookup(params, ids, name='embedding_lookup'):
  """Provides a N dimensional version of tf.embedding_lookup.

  Ids are flattened to a 1d tensor before being passed to embedding_lookup
  then, they are unflattend to match the original ids shape plus an extra
  leading dimension of the size of the embeddings.

  Args:
    params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1.
    ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1.
      Must contain indexes into params.
    name: Optional name for the op.

  Returns:
    A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1
    containing the values from the params tensor(s) for indecies in ids.

  Raises:
    ValueError: if some parameters are invalid.
  """
  with ops.name_scope(name, 'embedding_lookup', [params, ids]):
    params = ops.convert_to_tensor(params)
    ids = ops.convert_to_tensor(ids)
    shape = array_ops_.shape(ids)
    ids_flat = array_ops_.reshape(
        ids, math_ops.reduce_prod(shape, keep_dims=True))
    embeds_flat = nn.embedding_lookup(params, ids_flat, name)
    embed_shape = array_ops_.concat_v2([shape, [-1]], 0)
    embeds = array_ops_.reshape(embeds_flat, embed_shape)
    embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:]))
    return embeds
예제 #49
0
def embedding_lookup_unique(params, ids, name=None):
  """Version of embedding_lookup that avoids duplicate lookups.

  This can save communication in the case of repeated ids.
  Same interface as embedding_lookup. Except it supports multi-dimensional `ids`
  which allows to not reshape input/output to fit gather.

  Args:
    params: A list of tensors with the same shape and type, or a
      `PartitionedVariable`. Shape `[index, d1, d2, ...]`.
    ids: A one-dimensional `Tensor` with type `int32` or `int64` containing
      the ids to be looked up in `params`. Shape `[ids1, ids2, ...]`.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` with the same type as the tensors in `params` and dimension of
    `[ids1, ids2, d1, d2, ...]`.

  Raises:
    ValueError: If `params` is empty.
  """
  with ops.name_scope(name, "EmbeddingLookupUnique", [params, ids]):
    ids = ops.convert_to_tensor(ids)
    shape = array_ops.shape(ids)
    ids_flat = array_ops.reshape(
        ids, math_ops.reduce_prod(shape, keep_dims=True))
    unique_ids, idx = array_ops.unique(ids_flat)
    unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids)
    embeds_flat = array_ops.gather(unique_embeddings, idx)
    embed_shape = array_ops.concat_v2(
        [shape, array_ops.shape(unique_embeddings)[1:]], 0)
    embeds = array_ops.reshape(embeds_flat, embed_shape)
    embeds.set_shape(ids.get_shape().concatenate(
        unique_embeddings.get_shape()[1:]))
    return embeds
예제 #50
0
def _flip_vector_to_matrix_dynamic(vec, batch_shape):
    """flip_vector_to_matrix with dynamic shapes."""
    # Shapes associated with batch_shape
    batch_rank = array_ops.size(batch_shape)

    # Shapes associated with vec.
    vec = ops.convert_to_tensor(vec, name="vec")
    vec_shape = array_ops.shape(vec)
    vec_rank = array_ops.rank(vec)
    vec_batch_rank = vec_rank - 1

    m = vec_batch_rank - batch_rank
    # vec_shape_left = [M1,...,Mm] or [].
    vec_shape_left = array_ops.strided_slice(vec_shape, [0], [m])
    # If vec_shape_left = [], then condensed_shape = [1] since reduce_prod([]) = 1
    # If vec_shape_left = [M1,...,Mm], condensed_shape = [M1*...*Mm]
    condensed_shape = [math_ops.reduce_prod(vec_shape_left)]
    k = array_ops.gather(vec_shape, vec_rank - 1)
    new_shape = array_ops.concat_v2((batch_shape, [k], condensed_shape), 0)

    def _flip_front_dims_to_back():
        # Permutation corresponding to [N1,...,Nn] + [k, M1,...,Mm]
        perm = array_ops.concat_v2(
            (math_ops.range(m, vec_rank), math_ops.range(0, m)), 0)
        return array_ops.transpose(vec, perm=perm)

    x_flipped = control_flow_ops.cond(math_ops.less(0, m),
                                      _flip_front_dims_to_back,
                                      lambda: array_ops.expand_dims(vec, -1))

    return array_ops.reshape(x_flipped, new_shape)
예제 #51
0
 def _sample_n(self, n, seed=None):
     new_shape = array_ops.concat_v2(([n], self.batch_shape()), 0)
     uniform = random_ops.random_uniform(new_shape,
                                         seed=seed,
                                         dtype=self.p.dtype)
     sample = math_ops.less(uniform, self.p)
     return math_ops.cast(sample, self.dtype)
예제 #52
0
def embedding_lookup(params, ids, name='embedding_lookup'):
    """Provides a N dimensional version of tf.embedding_lookup.

  Ids are flattened to a 1d tensor before being passed to embedding_lookup
  then, they are unflattend to match the original ids shape plus an extra
  leading dimension of the size of the embeddings.

  Args:
    params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1.
    ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1.
      Must contain indexes into params.
    name: Optional name for the op.

  Returns:
    A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1
    containing the values from the params tensor(s) for indecies in ids.

  Raises:
    ValueError: if some parameters are invalid.
  """
    with ops.name_scope(name, 'embedding_lookup', [params, ids]):
        params = ops.convert_to_tensor(params)
        ids = ops.convert_to_tensor(ids)
        shape = array_ops_.shape(ids)
        ids_flat = array_ops_.reshape(
            ids, math_ops.reduce_prod(shape, keep_dims=True))
        embeds_flat = nn.embedding_lookup(params, ids_flat, name)
        embed_shape = array_ops_.concat_v2([shape, [-1]], 0)
        embeds = array_ops_.reshape(embeds_flat, embed_shape)
        embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:]))
        return embeds
def embedding_lookup_unique(params, ids, name=None):
    """Version of embedding_lookup that avoids duplicate lookups.

  This can save communication in the case of repeated ids.
  Same interface as embedding_lookup. Except it supports multi-dimensional `ids`
  which allows to not reshape input/output to fit gather.

  Args:
    params: A list of tensors with the same shape and type, or a
      `PartitionedVariable`. Shape `[index, d1, d2, ...]`.
    ids: A one-dimensional `Tensor` with type `int32` or `int64` containing
      the ids to be looked up in `params`. Shape `[ids1, ids2, ...]`.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` with the same type as the tensors in `params` and dimension of
    `[ids1, ids2, d1, d2, ...]`.

  Raises:
    ValueError: If `params` is empty.
  """
    with ops.name_scope(name, "EmbeddingLookupUnique", [params, ids]):
        ids = ops.convert_to_tensor(ids)
        shape = array_ops.shape(ids)
        ids_flat = array_ops.reshape(
            ids, math_ops.reduce_prod(shape, keep_dims=True))
        unique_ids, idx = array_ops.unique(ids_flat)
        unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids)
        embeds_flat = array_ops.gather(unique_embeddings, idx)
        embed_shape = array_ops.concat_v2(
            [shape, array_ops.shape(unique_embeddings)[1:]], 0)
        embeds = array_ops.reshape(embeds_flat, embed_shape)
        embeds.set_shape(ids.get_shape().concatenate(
            unique_embeddings.get_shape()[1:]))
        return embeds
예제 #54
0
def ParseLabelTensorOrDict(labels):
  """Return a tensor to use for input labels to tensor_forest.

  The incoming targets can be a dict where keys are the string names of the
  columns, which we turn into a single 1-D tensor for classification or
  2-D tensor for regression.

  Converts sparse tensors to dense ones.

  Args:
    labels: `Tensor` or `dict` of `Tensor` objects.

  Returns:
    A 2-D tensor for labels/outputs.
  """
  if isinstance(labels, dict):
    return math_ops.to_float(
        array_ops.concat_v2(
            [
                sparse_ops.sparse_tensor_to_dense(
                    labels[k], default_value=-1) if isinstance(
                        labels, sparse_tensor.SparseTensor) else labels[k]
                for k in sorted(labels.keys())
            ],
            1))
  else:
    if isinstance(labels, sparse_tensor.SparseTensor):
      return math_ops.to_float(sparse_ops.sparse_tensor_to_dense(
          labels, default_value=-1))
    else:
      return math_ops.to_float(labels)
예제 #55
0
  def testEmbeddingAttentionDecoder(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        cell = core_rnn_cell_impl.GRUCell(2)
        enc_outputs, enc_state = core_rnn.static_rnn(
            cell, inp, dtype=dtypes.float32)
        attn_states = array_ops.concat_v2([
            array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs
        ], 1)
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]
        dec, mem = seq2seq_lib.embedding_attention_decoder(
            dec_inp,
            enc_state,
            attn_states,
            cell,
            num_symbols=4,
            embedding_size=2,
            output_size=3)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 3), res[0].shape)

        res = sess.run([mem])
        self.assertEqual((2, 2), res[0].shape)