Example #1
0
  def to_weighted_sum(self,
                      input_tensor,
                      num_outputs=1,
                      weight_collections=None,
                      trainable=True):
    """Returns a Tensor as linear predictions and a list of created Variable."""
    dimension = self.source_column.dimension
    batch_size = array_ops.shape(input_tensor)[0]

    if dimension > 1:
      i1 = array_ops.reshape(array_ops.tile(array_ops.expand_dims(
          math_ops.range(0, batch_size), 1), [1, dimension]), [-1])
      i2 = array_ops.tile(math_ops.range(0, dimension), [batch_size])
      # Flatten the bucket indices and unique them across dimensions
      # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets
      # TODO(chapelle): move that logic to insert_transformed_feature to ensure
      #   unique buckets across dimensions after crossing.
      bucket_indices = array_ops.reshape(input_tensor, [-1]) + self.length * i2
    else:
      # Simpler indices when dimension=1
      i1 = math_ops.range(0, batch_size)
      i2 = array_ops.zeros([batch_size], dtype=dtypes.int32)
      bucket_indices = array_ops.reshape(input_tensor, [-1])

    indices = math_ops.to_int64(array_ops.transpose(array_ops.pack((i1, i2))))
    shape = math_ops.to_int64(array_ops.pack([batch_size, 1]))
    sparse_id_values = ops.SparseTensor(indices, bucket_indices, shape)
    vocab_size = self.length * self.source_column.dimension

    return _create_embedding_lookup(
        sparse_id_values, vocab_size, num_outputs,
        _add_variable_collection(weight_collections), 0., "sum",
        trainable, self.name + "_weights")
def crop_to_1d_bounding_box(image, offset_height, target_height,
                         dynamic_shape=False):
  """Crops an image to a specified bounding box.

  This op cuts a rectangular part out of `image`. The top-left corner of the
  returned image is at `offset_height, offset_width` in `image`, and its
  lower-right corner is at
  `offset_height + target_height, offset_width + target_width`.

  Args:
    image: 3-D tensor with shape `[height, width, channels]`
    offset_height: Vertical coordinate of the top-left corner of the result in
                   the input.
    target_height: Height of the result.
    dynamic_shape: Whether the input image has undertermined shape. If set to
      `True`, shape information will be retrieved at run time. Default to
      `False`.

  Returns:
    3-D tensor of image with shape `[target_height, target_width, channels]`

  Raises:
    ValueError: If the shape of `image` is incompatible with the `offset_*` or
    `target_*` arguments, and `dynamic_shape` is set to `False`.
  """
  image = tf.convert_to_tensor(image, name='image')
  height, _ = _ImageDimensions(image, dynamic_shape=dynamic_shape)

  cropped = array_ops.slice(image,
                            array_ops.pack([offset_height, 0]),
                            array_ops.pack([target_height, -1]))

  return cropped
Example #3
0
  def zero_state(self, batch_size, dtype):
    """Return zero-filled state tensor(s).

    Args:
      batch_size: int, float, or unit Tensor representing the batch size.
      dtype: the data type to use for the state.

    Returns:
      If `state_size` is an int or TensorShape, then the return value is a
      `N-D` tensor of shape `[batch_size x state_size]` filled with zeros.

      If `state_size` is a nested list or tuple, then the return value is
      a nested list or tuple (of the same structure) of `2-D` tensors with
    the shapes `[batch_size x s]` for each s in `state_size`.
    """
    state_size = self.state_size
    if nest.is_sequence(state_size):
      state_size_flat = nest.flatten(state_size)
      zeros_flat = [
          array_ops.zeros(
              array_ops.pack(_state_size_with_prefix(s, prefix=[batch_size])),
              dtype=dtype)
          for s in state_size_flat]
      for s, z in zip(state_size_flat, zeros_flat):
        z.set_shape(_state_size_with_prefix(s, prefix=[None]))
      zeros = nest.pack_sequence_as(structure=state_size,
                                    flat_sequence=zeros_flat)
    else:
      zeros_size = _state_size_with_prefix(state_size, prefix=[batch_size])
      zeros = array_ops.zeros(array_ops.pack(zeros_size), dtype=dtype)
      zeros.set_shape(_state_size_with_prefix(state_size, prefix=[None]))

    return zeros
Example #4
0
  def zero_state(self, batch_size, dtype):
    """Return zero-filled state tensor(s).

    Args:
      batch_size: int, float, or unit Tensor representing the batch size.
      dtype: the data type to use for the state.

    Returns:
      If `state_size` is an int, then the return value is a `2-D` tensor of
      shape `[batch_size x state_size]` filled with zeros.

      If `state_size` is a nested list or tuple, then the return value is
      a nested list or tuple (of the same structure) of `2-D` tensors with
    the shapes `[batch_size x s]` for each s in `state_size`.
    """
    state_size = self.state_size
    if _is_sequence(state_size):
      state_size_flat = _unpacked_state(state_size)
      zeros_flat = [
          array_ops.zeros(array_ops.pack([batch_size, s]), dtype=dtype)
          for s in state_size_flat]
      for s, z in zip(state_size_flat, zeros_flat):
        z.set_shape([None, s])
      zeros = _packed_state(structure=state_size, state=zeros_flat)
    else:
      zeros = array_ops.zeros(
          array_ops.pack([batch_size, state_size]), dtype=dtype)
      zeros.set_shape([None, state_size])

    return zeros
  def __call__(self,
               inputs,
               initial_state=None,
               dtype=None,
               sequence_length=None,
               scope=None):
    is_list = isinstance(inputs, list)
    if self._use_dynamic_rnn:
      if is_list:
        inputs = array_ops.pack(inputs)
      outputs, state = rnn.dynamic_rnn(
          self._cell,
          inputs,
          sequence_length=sequence_length,
          initial_state=initial_state,
          dtype=dtype,
          time_major=True,
          scope=scope)
      if is_list:
        # Convert outputs back to list
        outputs = array_ops.unpack(outputs)
    else:  # non-dynamic rnn
      if not is_list:
        inputs = array_ops.unpack(inputs)
      outputs, state = rnn.rnn(self._cell,
                               inputs,
                               initial_state=initial_state,
                               dtype=dtype,
                               sequence_length=sequence_length,
                               scope=scope)
      if not is_list:
        # Convert outputs back to tensor
        outputs = array_ops.pack(outputs)

    return outputs, state
Example #6
0
  def zero_state(self, batch_size, dtype):
    """Return zero-filled state tensor(s).

    Args:
      batch_size: int, float, or unit Tensor representing the batch size.
      dtype: the data type to use for the state.

    Returns:
      If `state_size` is an int, then the return value is a `2-D` tensor of
      shape `[batch_size x state_size]` filled with zeros.

      If `state_size` is a list or tuple of ints, then the return value is
      a tuple of `2-D` tensors with shape
      `[batch_size x s] for s in state_size`.
    """
    state_size = self.state_size
    if isinstance(state_size, (list, tuple)):
      zeros = tuple(
          array_ops.zeros(array_ops.pack([batch_size, s]), dtype=dtype)
          for s in state_size)
      for s, z in zip(state_size, zeros):
        z.set_shape([None, s])
    else:
      zeros = array_ops.zeros(
          array_ops.pack([batch_size, state_size]), dtype=dtype)
      zeros.set_shape([None, state_size])

    return zeros
  def testConst(self):
    np.random.seed(7)
    with self.test_session(use_gpu=True):
      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
        data = np.random.randn(*shape).astype(np.float32)
        # Pack back into a single tensorflow tensor directly using np array
        c = array_ops.pack(data)
        # This is implemented via a Const:
        self.assertEqual(c.op.type, "Const")
        self.assertAllEqual(c.eval(), data)

        # Python lists also work for 1-D case:
        if len(shape) == 1:
          data_list = list(data)
          cl = array_ops.pack(data_list)
          self.assertEqual(cl.op.type, "Const")
          self.assertAllEqual(cl.eval(), data)

          cl = array_ops.stack(data_list)
          self.assertEqual(cl.op.type, "Const")
          self.assertAllEqual(cl.eval(), data)

      # Verify that shape induction works with shapes produced via const pack
      a = constant_op.constant([1, 2, 3, 4, 5, 6])
      b = array_ops.reshape(a, array_ops.pack([2, 3]))
      self.assertAllEqual(b.get_shape(), [2, 3])

      b = array_ops.reshape(a, array_ops.stack([2, 3]))
      self.assertAllEqual(b.get_shape(), [2, 3])
def confusion_matrix(predictions, labels, num_classes=None,
                     dtype=dtypes.int32, name=None):
  """Computes the confusion matrix from predictions and labels.

  Calculate the Confusion Matrix for a pair of prediction and
  label 1-D int arrays.

  Considering a prediction array such as: `[1, 2, 3]`
  And a label array such as: `[2, 2, 3]`

  The confusion matrix returned would be the following one:
      [[0, 0, 0]
       [0, 1, 0]
       [0, 1, 0]
       [0, 0, 1]]

  Where the matrix rows represent the prediction labels and the columns
  represents the real labels. The confusion matrix is always a 2-D array
  of shape [n, n], where n is the number of valid labels for a given
  classification task. Both prediction and labels must be 1-D arrays of
  the same shape in order for this function to work.

  Args:
    predictions: A 1-D array represeting the predictions for a given
                 classification.
    labels: A 1-D represeting the real labels for the classification task.
    num_classes: The possible number of labels the classification task can
                 have. If this value is not provided, it will be calculated
                 using both predictions and labels array.
    dtype: Data type of the confusion matrix.
    name: Scope name.

  Returns:
    A k X k matrix represeting the confusion matrix, where k is the number of
    possible labels in the classification task.

  Raises:
    ValueError: If both predictions and labels are not 1-D vectors and do not
                have the same size.
  """
  with ops.name_scope(name, 'confusion_matrix',
                      [predictions, labels, num_classes]) as name:
    predictions, labels = metric_ops_util.remove_squeezable_dimensions(
        ops.convert_to_tensor(
            predictions, name='predictions', dtype=dtypes.int64),
        ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64))

    if num_classes is None:
      num_classes = math_ops.maximum(math_ops.reduce_max(predictions),
                                     math_ops.reduce_max(labels)) + 1

    shape = array_ops.pack([num_classes, num_classes])
    indices = array_ops.transpose(array_ops.pack([predictions, labels]))
    values = array_ops.ones_like(predictions, dtype)
    cm_sparse = ops.SparseTensor(
        indices=indices, values=values, shape=shape)
    zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype)

    return sparse_ops.sparse_add(zero_matrix, cm_sparse)
Example #9
0
    def build_memory(self, M_prev, read_w_prev, write_w_prev, last_output):
        with tf.variable_scope("memory"):
            # 3.1 Reading
            if self.read_head_size == 1:
                read_w, read = self.build_read_head(M_prev, tf.reshape(read_w_prev, [-1, 1]), last_output, 0)
            else:
                read_w_list = []
                read_list = []

                for idx in xrange(self.read_head_size):
                    read_w_prev_idx = tf.reshape(tf.gather(read_w_prev, idx), [-1, 1])

                    read_w_idx, read_idx = self.build_read_head(M_prev, read_w_prev_idx, last_output, idx)

                    read_w_list.append(tf.transpose(read_w_idx))
                    read_list.append(tf.reshape(read_idx, [1, self.mem_size, self.mem_dim]))

                read_w = array_ops.pack(read_w_list)
                read = array_ops.pack(read_list)

            # 3.2 Writing
            if self.write_head_size == 1:
                write_w, write, erase = self.build_write_head(M_prev, tf.reshape(write_w_prev, [-1, 1]),
                                                              last_output, 0)

                M_erase = tf.ones([self.mem_size, self.mem_dim]) - OuterProd(write_w, erase)
                M_write = OuterProd(write_w, write)
            else:
                write_w_list = []
                write_list = []
                erase_list = []

                M_erases = []
                M_writes = []

                for idx in xrange(self.write_head_size):
                    write_w_prev_idx = tf.reshape(tf.gather(write_w_prev, idx), [-1, 1])

                    write_w_idx, write_idx, erase_idx = self.build_write_head(M_prev, write_w_prev_idx,
                                                                              last_output, idx)

                    write_w_list.append(tf.transpose(write_w_idx))
                    write_list.append(tf.reshape(write_idx, [1, self.mem_size, self.mem_dim]))
                    erase_list.append(tf.reshape(erase_idx, [1, 1, self.mem_dim]))

                    M_erases.append(tf.ones([self.mem_size, self.mem_dim]) * OuterProd(write_w_idx, erase_idx))
                    M_writes.append(OuterProd(write_w_idx, write_idx))

                write_w = array_ops.pack(write_w_list)
                write = array_ops.pack(write_list)
                erase = array_ops.pack(erase_list)

                M_erase = reduce(lambda x, y: x*y, M_erases)
                M_write = tf.add_n(M_writes)

            M = M_prev * M_erase + M_write

            return M, read_w, write_w, read
Example #10
0
def crop_to_bounding_box(image, offset_height, offset_width, target_height,
                         target_width):
  """Crops an image to a specified bounding box.

  This op cuts a rectangular part out of `image`. The top-left corner of the
  returned image is at `offset_height, offset_width` in `image`, and its
  lower-right corner is at
  `offset_height + target_height, offset_width + target_width`.

  Args:
    image: 3-D tensor with shape `[height, width, channels]`
    offset_height: Vertical coordinate of the top-left corner of the result in
                   the input.
    offset_width: Horizontal coordinate of the top-left corner of the result in
                  the input.
    target_height: Height of the result.
    target_width: Width of the result.

  Returns:
    3-D tensor of image with shape `[target_height, target_width, channels]`

  Raises:
    ValueError: If the shape of `image` is incompatible with the `offset_*` or
      `target_*` arguments, or either `offset_height` or `offset_width` is
      negative, or either `target_height` or `target_width` is not positive.
  """
  image = ops.convert_to_tensor(image, name='image')

  assert_ops = []
  assert_ops += _Check3DImage(image, require_static=False)

  height, width, depth = _ImageDimensions(image, static_only=False)

  assert_ops += _assert(offset_width >= 0, ValueError,
                        'offset_width must be >= 0.')
  assert_ops += _assert(offset_height >= 0, ValueError,
                        'offset_height must be >= 0.')
  assert_ops += _assert(target_width > 0, ValueError,
                        'target_width must be > 0.')
  assert_ops += _assert(target_height > 0, ValueError,
                        'target_height must be > 0.')
  assert_ops += _assert(width >= (target_width + offset_width), ValueError,
                        'width must be >= target + offset.')
  assert_ops += _assert(height >= (target_height + offset_height), ValueError,
                        'height must be >= target + offset.')
  image = control_flow_ops.with_dependencies(assert_ops, image)

  cropped = array_ops.slice(
    image,
    array_ops.pack([offset_height, offset_width, 0]),
    array_ops.pack([target_height, target_width, -1]))

  cropped_shape = [None if is_tensor(i) else i
                   for i in [target_height, target_width, depth]]
  cropped.set_shape(cropped_shape)

  return cropped
Example #11
0
 def testOpsBetweenUnreachable(self):
   with ops.Graph().as_default() as g:
     t1 = constant(1.0)
     t2 = constant(2.0)
     _ = array_ops.pack([t1, t2])
     t4 = constant(1.0)
     t5 = constant(2.0)
     t6 = array_ops.pack([t4, t5])
   # Elements of to_ops are always listed.
   self._assertOpListEqual([t6.op], _OpsBetween(g, [t6.op], [t1.op]))
Example #12
0
 def testIndexedSlicesToTensorList(self):
   with self.test_session():
     numpy_list = []
     dense_list = []
     sparse_list = []
     for _ in range(3):
       np_val = np.random.rand(4, 4, 4, 4).astype(np.float32)
       c = constant_op.constant(np_val)
       c_sparse = math_ops._as_indexed_slices(c)
       numpy_list.append(np_val)
       dense_list.append(c)
       sparse_list.append(c_sparse)
     packed_dense = array_ops.pack(dense_list)
     packed_sparse = array_ops.pack(sparse_list)
     self.assertAllClose(packed_dense.eval(), packed_sparse.eval())
Example #13
0
  def sample(self, n, seed=None, name=None):
    """Sample `n` observations from the Multivariate Normal Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: `[n, ...]`, a `Tensor` of `n` samples for each
        of the distributions determined by broadcasting the hyperparameters.
    """
    with ops.op_scope(
        [self._mu, self._sigma_chol, n], name, "MultivariateNormalSample"):
      # TODO(ebrevdo): Is there a better way to get broadcast_shape?
      broadcast_shape = self.mu.get_shape()
      n = ops.convert_to_tensor(n)
      sigma_shape_left = array_ops.slice(
          array_ops.shape(self._sigma_chol),
          [0], array_ops.pack([array_ops.rank(self._sigma_chol) - 2]))

      k_n = array_ops.pack([self._k, n])
      shape = array_ops.concat(0, [sigma_shape_left, k_n])
      white_samples = random_ops.random_normal(
          shape=shape, mean=0, stddev=1, dtype=self._mu.dtype, seed=seed)

      correlated_samples = math_ops.batch_matmul(
          self._sigma_chol, white_samples)

      # Move the last dimension to the front
      perm = array_ops.concat(
          0,
          (array_ops.pack([array_ops.rank(correlated_samples) - 1]),
           math_ops.range(0, array_ops.rank(correlated_samples) - 1)))

      # TODO(ebrevdo): Once we get a proper tensor contraction op,
      # perform the inner product using that instead of batch_matmul
      # and this slow transpose can go away!
      correlated_samples = array_ops.transpose(correlated_samples, perm)

      samples = correlated_samples + self.mu

      # Provide some hints to shape inference
      n_val = tensor_util.constant_value(n)
      final_shape = tensor_shape.vector(n_val).concatenate(broadcast_shape)
      samples.set_shape(final_shape)

      return samples
Example #14
0
  def call(self, inputs):
    inputs_shape = array_ops.shape(inputs)
    batch_size = inputs_shape[0]
    if self.data_format == 'channels_first':
      c_axis, h_axis, w_axis = 1, 2, 3
    else:
      c_axis, h_axis, w_axis = 3, 1, 2

    height, width = inputs_shape[h_axis], inputs_shape[w_axis]
    kernel_h, kernel_w = self.kernel_size
    stride_h, stride_w = self.strides

    def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
      if isinstance(dim_size, ops.Tensor):
        dim_size = math_ops.mul(dim_size, stride_size)
      elif dim_size is not None:
        dim_size *= stride_size

      if padding == 'valid' and dim_size is not None:
        dim_size += max(kernel_size - stride_size, 0)
      return dim_size

    # Infer the dynamic output shape:
    out_height = get_deconv_dim(height, stride_h, kernel_h, self.padding)
    out_width = get_deconv_dim(width, stride_w, kernel_w, self.padding)

    if self.data_format == 'channels_first':
      output_shape = (batch_size, self.filters, out_height, out_width)
      strides = (1, 1, stride_h, stride_w)
    else:
      output_shape = (batch_size, out_height, out_width, self.filters)
      strides = (1, stride_h, stride_w, 1)

    output_shape_tensor = array_ops.pack(output_shape)
    outputs = nn.conv2d_transpose(
        inputs,
        self.kernel,
        output_shape_tensor,
        strides,
        padding=self.padding.upper(),
        data_format=utils.convert_data_format(self.data_format, ndim=4))

    # Infer the static output shape:
    out_shape = inputs.get_shape().as_list()
    out_shape[c_axis] = self.filters
    out_shape[h_axis] = get_deconv_dim(
        out_shape[h_axis], stride_h, kernel_h, self.padding)
    out_shape[w_axis] = get_deconv_dim(
        out_shape[w_axis], stride_w, kernel_w, self.padding)
    outputs.set_shape(out_shape)

    if self.bias:
      outputs = nn.bias_add(
          outputs,
          self.bias,
          data_format=utils.convert_data_format(self.data_format, ndim=4))

    if self.activation is not None:
      return self.activation(outputs)
    return outputs
Example #15
0
def report_uninitialized_resources(resource_list=None,
                                   name="report_uninitialized_resources"):
  """Returns the names of all uninitialized resources in resource_list.

  If the returned tensor is empty then all resources have been initialized.

  Args:
   resource_list: resources to check. If None, will use shared_resources() +
    local_resources().
   name: name for the resource-checking op.

  Returns:
   Tensor containing names of the handles of all resources which have not
   yet been initialized.

  """
  if resource_list is None:
    resource_list = shared_resources() + local_resources()
  with ops.name_scope(name):
    if not resource_list:
      # Return an empty tensor so we only need to check for returned tensor
      # size being 0 as an indication of model ready.
      return array_ops.constant([], dtype=dtypes.string)
    # Get a 1-D boolean tensor listing whether each resource is initialized.
    variables_mask = math_ops.logical_not(array_ops.pack(
        [r.is_initialized for r in resource_list]))
    # Get a 1-D string tensor containing all the resource names.
    variable_names_tensor = array_ops.constant(
        [s.handle.name for s in resource_list])
    # Return a 1-D tensor containing all the names of uninitialized resources.
    return array_ops.boolean_mask(variable_names_tensor, variables_mask)
Example #16
0
  def __init__(
      self,
      logits,
      dtype=dtypes.int32,
      strict=True,
      strict_statistics=True,
      name="Categorical"):
    """Initialize Categorical distributions using class log-probabilities.

    Args:
      logits: An N-D `Tensor`, `N >= 1`, representing the log probabilities
          of a set of Categorical distributions. The first `N - 1` dimensions
          index into a batch of independent distributions and the last dimension
          indexes into the classes.
      dtype: The type of the event samples (default: int32).
      strict: Unused in this distribution.
      strict_statistics:  Boolean, default True.  If True, raise an exception if
        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
        If False, batch members with valid parameters leading to undefined
        statistics will return NaN for this statistic.
      name: A name for this distribution (optional).
    """
    self._strict_statistics = strict_statistics
    self._name = name
    self._dtype = dtype
    self._strict = strict
    with ops.op_scope([logits], name):
      self._logits = ops.convert_to_tensor(logits, name="logits")
      logits_shape = array_ops.shape(self._logits)
      self._batch_rank = array_ops.size(logits_shape) - 1
      self._batch_shape = array_ops.slice(
          logits_shape, [0], array_ops.pack([self._batch_rank]))
      self._num_classes = array_ops.gather(logits_shape, self._batch_rank)
Example #17
0
  def sample(self, n, seed=None, name="sample"):
    """Sample `n` observations from the Categorical distribution.

    Args:
      n: 0-D.  Number of independent samples to draw for each distribution.
      seed: Random seed (optional).
      name: A name for this operation (optional).

    Returns:
      An `int64` `Tensor` with shape `[n, batch_shape, event_shape]`
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self.logits, n], name):
        n = ops.convert_to_tensor(n, name="n")
        logits_2d = array_ops.reshape(
            self.logits, array_ops.pack([-1, self.num_classes]))
        samples = random_ops.multinomial(logits_2d, n, seed=seed)
        samples = math_ops.cast(samples, self._dtype)
        ret = array_ops.reshape(
            array_ops.transpose(samples),
            array_ops.concat(
                0, [array_ops.expand_dims(n, 0), self.batch_shape()]))
        ret.set_shape(tensor_shape.vector(tensor_util.constant_value(n))
                      .concatenate(self.get_batch_shape()))
        return ret
Example #18
0
def sequence_classifier(decoding, labels, sampling_decoding=None, name=None):
  """Returns predictions and loss for sequence of predictions.

  Args:
    decoding: List of Tensors with predictions.
    labels: List of Tensors with labels.
    sampling_decoding: Optional, List of Tensor with predictions to be used
      in sampling. E.g. they shouldn't have dependncy on outputs.
      If not provided, decoding is used.
    name: Operation name.

  Returns:
    Predictions and losses tensors.
  """
  with ops.name_scope(name, "sequence_classifier", [decoding, labels]):
    predictions, xent_list = [], []
    for i, pred in enumerate(decoding):
      xent_list.append(nn.softmax_cross_entropy_with_logits(
          pred, labels[i],
          name="sequence_loss/xent_raw{0}".format(i)))
      if sampling_decoding:
        predictions.append(nn.softmax(sampling_decoding[i]))
      else:
        predictions.append(nn.softmax(pred))
    xent = math_ops.add_n(xent_list, name="sequence_loss/xent")
    loss = math_ops.reduce_sum(xent, name="sequence_loss")
    return array_ops_.pack(predictions, axis=1), loss
Example #19
0
  def sample_n(self, n, seed=None, name="sample_n"):
    """Sample `n` observations from the Categorical distribution.

    Args:
      n: `Scalar` `Tensor` of type `int32` or `int64`, the number of
        observations to sample.
      seed: Random seed (optional).
      name: A name for this operation (optional).

    Returns:
      An `int64` `Tensor` with shape `[n, batch_shape, event_shape]`
    """
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[self.logits, n]):
        n = ops.convert_to_tensor(n, name="n")
        logits_2d = array_ops.reshape(
            self.logits, array_ops.pack([-1, self.num_classes]))
        samples = random_ops.multinomial(logits_2d, n, seed=seed)
        samples = math_ops.cast(samples, self._dtype)
        ret = array_ops.reshape(
            array_ops.transpose(samples),
            array_ops.concat(0, ([n], self.batch_shape())))
        ret.set_shape(tensor_shape.vector(tensor_util.constant_value(n))
                      .concatenate(self.get_batch_shape()))
        return ret
Example #20
0
  def sample(self, n, seed=None, name="sample"):
    """Sample `n` observations from the Uniform Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape`
          with values of type `self.dtype`.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self.a, self.b, n], name):
        n = ops.convert_to_tensor(n, name="n")
        n_val = tensor_util.constant_value(n)

        shape = array_ops.concat(0, [array_ops.pack([n]), self.batch_shape()])
        samples = random_ops.random_uniform(shape=shape,
                                            dtype=self.dtype,
                                            seed=seed)

        # Provide some hints to shape inference
        inferred_shape = tensor_shape.vector(n_val).concatenate(
            self.get_batch_shape())
        samples.set_shape(inferred_shape)

        return (array_ops.expand_dims(self.a, 0) + array_ops.expand_dims(
            self.range(), 0) * samples)
Example #21
0
def _TopKGrad(op, grad, _):
  """Return the gradients for TopK.

  Args:
    op: The TopKOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the TopKOp.

  Returns:
    A list of two tensors, the first being the gradient w.r.t to the input and
    TopK, and the second being the gradient w.r.t. to the indices (all zero).
  """
  in_shape = array_ops.shape(op.inputs[0])
  ind_shape = array_ops.shape(op.outputs[1])

  ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1)
  # Flatten indices to 2D.
  ind_2d = array_ops.reshape(op.outputs[1], array_ops.pack([-1, ind_lastdim]))

  in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1)
  outerdim = array_ops.shape(ind_2d)[0]
  # Compute linear indices (flattened to 1D).
  ind = array_ops.reshape(ind_2d + array_ops.expand_dims(
      math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1])

  # Substitute grad to appropriate locations and fill the rest with zeros,
  # finally reshaping it to the original input shape.
  return [array_ops.reshape(
      sparse_ops.sparse_to_dense(ind,
                                 array_ops.reshape(
                                     math_ops.reduce_prod(in_shape), [1]),
                                 array_ops.reshape(grad, [-1]),
                                 validate_indices=False),
      in_shape), array_ops.zeros(
          [], dtype=dtypes.int32)]
Example #22
0
def _reverse_seq(input_seq, lengths):
  """Reverse a list of Tensors up to specified lengths.

  Args:
    input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
    lengths:   A tensor of dimension batch_size, containing lengths for each
               sequence in the batch. If "None" is specified, simply
               reverses the list.

  Returns:
    time-reversed sequence
  """
  if lengths is None:
    return list(reversed(input_seq))

  for input_ in input_seq:
    input_.set_shape(input_.get_shape().with_rank(2))

  # Join into (time, batch_size, depth)
  s_joined = array_ops_.pack(input_seq)

  # Reverse along dimension 0
  s_reversed = array_ops_.reverse_sequence(s_joined, lengths, 0, 1)
  # Split again into list
  result = array_ops_.unpack(s_reversed)
  return result
Example #23
0
 def _assert_has_shape(x, shape):
     x_shape = array_ops.shape(x)
     packed_shape = array_ops.pack(shape)
     return logging_ops.Assert(
         math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)),
         ["Expected shape for Tensor %s is " % x.name, packed_shape, " but saw shape: ", x_shape],
     )
Example #24
0
def _reverse_seq(input_seq, lengths):
    """Reverse a list of Tensors up to specified lengths.

  Args:
    input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features)
    lengths:   A tensor of dimension batch_size, containing lengths for each
               sequence in the batch. If "None" is specified, simply reverses
               the list.

  Returns:
    time-reversed sequence
  """
    if lengths is None:
        return list(reversed(input_seq))

    input_shape = tensor_shape.unknown_shape(ndims=input_seq[0].get_shape().ndims)
    for input_ in input_seq:
        input_shape.merge_with(input_.get_shape())
        input_.set_shape(input_shape)

    # Join into (time, batch_size, depth)
    s_joined = array_ops.pack(input_seq)

    # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32
    if lengths is not None:
        lengths = math_ops.to_int64(lengths)

    # Reverse along dimension 0
    s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
    # Split again into list
    result = array_ops.unpack(s_reversed)
    for r in result:
        r.set_shape(input_shape)
    return result
Example #25
0
  def sample(self, n, seed=None, name="sample"):
    """Sample `n` observations from the Laplace Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: `[n, ...]`, a `Tensor` of `n` samples for each
        of the distributions determined by broadcasting the parameters.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self._loc, self._scale, n], name):
        n = ops.convert_to_tensor(n)
        n_val = tensor_util.constant_value(n)
        shape = array_ops.concat(
            0, [array_ops.pack([n]), self.batch_shape()])
        # Sample uniformly-at-random from the open-interval (-1, 1).
        uniform_samples = random_ops.random_uniform(
            shape=shape,
            minval=np.nextafter(self.dtype.as_numpy_dtype(-1.),
                                self.dtype.as_numpy_dtype(0.)),
            maxval=self.dtype.as_numpy_dtype(1.),
            dtype=self.dtype,
            seed=seed)

        # Provide some hints to shape inference
        inferred_shape = tensor_shape.vector(n_val).concatenate(
            self.get_batch_shape())
        uniform_samples.set_shape(inferred_shape)

        return (self._loc - self._scale * math_ops.sign(uniform_samples) *
                math_ops.log(1. - math_ops.abs(uniform_samples)))
Example #26
0
  def inference_graph(self, input_data, data_spec=None, **inference_args):
    """Constructs a TF graph for evaluating a random forest.

    Args:
      input_data: A tensor or SparseTensor or placeholder for input data.
      data_spec: A list of tf.dtype values specifying the original types of
        each column.
      **inference_args: Keyword arguments to pass through to each tree.

    Returns:
      The last op in the random forest inference graph.
    """
    data_spec = [constants.DATA_FLOAT] if data_spec is None else data_spec
    probabilities = []
    for i in range(self.params.num_trees):
      with ops.device(self.device_assigner.get_device(i)):
        tree_data = input_data
        if self.params.bagged_features:
          tree_data = self._bag_features(i, input_data)
        probabilities.append(self.trees[i].inference_graph(
            tree_data, data_spec, **inference_args))
    with ops.device(self.device_assigner.get_device(0)):
      all_predict = array_ops.pack(probabilities)
      return math_ops.div(
          math_ops.reduce_sum(all_predict, 0), self.params.num_trees,
          name='probabilities')
Example #27
0
def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None):
  """Processes inputs for Sequence to Sequence models.

  Args:
    x: Input Tensor [batch_size, input_length, embed_dim].
    y: Output Tensor [batch_size, output_length, embed_dim].
    input_length: length of input x.
    output_length: length of output y.
    sentinel: optional first input to decoder and final output expected.
      If sentinel is not provided, zeros are used. Due to fact that y is not
      available in sampling time, shape of sentinel will be inferred from x.
    name: Operation name.

  Returns:
    Encoder input from x, and decoder inputs and outputs from y.
  """
  with ops.op_scope([x, y], name, "seq2seq_inputs"):
    in_x = array_ops.split_squeeze(1, input_length, x)
    y = array_ops.split_squeeze(1, output_length, y)
    if not sentinel:
      # Set to zeros of shape of y[0], using x for batch size.
      sentinel_shape = array_ops_.pack(
          [array_ops_.shape(x)[0], y[0].get_shape()[1]])
      sentinel = array_ops_.zeros(sentinel_shape)
      sentinel.set_shape(y[0].get_shape())
    in_y = [sentinel] + y
    out_y = y + [sentinel]
    return in_x, in_y, out_y
Example #28
0
  def sample(self, n, seed=None, name="sample"):
    """Generate `n` samples.

    Args:
      n: scalar.  Number of samples to draw from each distribution.
      seed: Python integer seed for RNG.
      name: name to give to the op.

    Returns:
      samples: a `Tensor` of shape `(n,) + self.batch_shape` with values of type
          `self.dtype`.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self.p, n], name):
        n = ops.convert_to_tensor(n, name="n")
        p_2d = array_ops.reshape(self.p, array_ops.pack([-1, 1]))
        q_2d = 1. - p_2d
        probs = array_ops.concat(1, [q_2d, p_2d])
        samples = random_ops.multinomial(math_ops.log(probs), n, seed=seed)
        ret = array_ops.reshape(
            array_ops.transpose(samples),
            array_ops.concat(0,
                             [array_ops.expand_dims(n, 0), self.batch_shape()]))
        ret.set_shape(tensor_shape.vector(tensor_util.constant_value(n))
                      .concatenate(self.get_batch_shape()))
        return math_ops.cast(ret, self.dtype)
Example #29
0
  def sample(self, n, seed=None, name=None):
    """Sample `n` observations from the Exponential Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: `[n, ...]`, a `Tensor` of `n` samples for each
        of the distributions determined by the hyperparameters.
    """
    broadcast_shape = self._lam.get_shape()
    with ops.op_scope([self.lam, n], name, "ExponentialSample"):
      n = ops.convert_to_tensor(n, name="n")
      shape = array_ops.concat(
          0, [array_ops.pack([n]), array_ops.shape(self._lam)])
      sampled = random_ops.random_uniform(
          shape, maxval=math_ops.cast(1.0, dtype=self.dtype),
          dtype=self.dtype)

      n_val = tensor_util.constant_value(n)
      final_shape = tensor_shape.vector(n_val).concatenate(broadcast_shape)
      sampled.set_shape(final_shape)

      return -math_ops.log(sampled) / self._lam
Example #30
0
  def sample(self, n, seed=None, name="sample"):
    """Sample `n` observations from the Normal Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: `[n, ...]`, a `Tensor` of `n` samples for each
        of the distributions determined by broadcasting the hyperparameters.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self._mu, self._sigma, n], name):
        broadcast_shape = (self._mu + self._sigma).get_shape()
        n = ops.convert_to_tensor(n)
        shape = array_ops.concat(
            0, [array_ops.pack([n]), array_ops.shape(self.mean())])
        sampled = random_ops.random_normal(
            shape=shape, mean=0, stddev=1, dtype=self._mu.dtype, seed=seed)

        # Provide some hints to shape inference
        n_val = tensor_util.constant_value(n)
        final_shape = tensor_shape.vector(n_val).concatenate(broadcast_shape)
        sampled.set_shape(final_shape)

        return sampled * self._sigma + self._mu
Example #31
0
def global_norm(t_list, name=None):
  """Computes the global norm of multiple tensors.

  Given a tuple or list of tensors `t_list`, this operation returns the
  global norm of the elements in all tensors in `t_list`. The global norm is
  computed as:

  `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))`

  Any entries in `t_list` that are of type None are ignored.

  Args:
    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
    name: A name for the operation (optional).

  Returns:
    A 0-D (scalar) `Tensor` of type `float`.

  Raises:
    TypeError: If `t_list` is not a sequence.
  """
  if (not isinstance(t_list, collections.Sequence)
      or isinstance(t_list, six.string_types)):
    raise TypeError("t_list should be a sequence")
  t_list = list(t_list)
  with ops.op_scope(t_list, name, "global_norm") as name:
    values = [
        ops.convert_to_tensor(
            t.values if isinstance(t, ops.IndexedSlices) else t,
            name="t_%d" % i)
        if t is not None else t
        for i, t in enumerate(t_list)]
    squared_norms = array_ops.pack(
        [math_ops.reduce_sum(v * v) for v in values if v])

    norm = math_ops.sqrt(
        math_ops.reduce_sum(squared_norms), name="global_norm")

  return norm
Example #32
0
def _reverse_seq(input_seq, lengths):
    """Reverse a list of Tensors up to specified lengths.

  Args:
    input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
    lengths:   A tensor of dimension batch_size, containing lengths for each
               sequence in the batch. If "None" is specified, simply reverses
               the list.

  Returns:
    time-reversed sequence
  """
    if lengths is None:
        return list(reversed(input_seq))

    # Join into (time, batch_size, depth)
    s_joined = array_ops.pack(input_seq)
    # Reverse along dimension 0
    s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
    # Split again into list
    result = array_ops.unpack(s_reversed)
    return result
Example #33
0
def random_flip_up_down(image, seed=None):
    """Randomly flips an image vertically (upside down).

  With a 1 in 2 chance, outputs the contents of `image` flipped along the first
  dimension, which is `height`.  Otherwise output the image as-is.

  Args:
    image: A 3-D tensor of shape `[height, width, channels].`
    seed: A Python integer. Used to create a random seed. See
      [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed)
      for behavior.

  Returns:
    A 3-D tensor of the same type and shape as `image`.

  Raises:
    ValueError: if the shape of `image` not supported.
  """
    _Check3DImage(image, require_static=False)
    uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
    mirror = math_ops.less(array_ops.pack([uniform_random, 1.0, 1.0]), 0.5)
    return array_ops.reverse(image, mirror)
Example #34
0
def _SliceGrad(op, grad):
    """Gradient for Slice op."""
    # Create an Nx2 padding where the first column represents how many
    # zeros are to be prepended for each dimension, and the second
    # column indicates how many zeros are appended.
    #
    # The number of zeros to append is the shape of the input
    # elementwise-subtracted by both the begin vector and sizes vector.
    #
    # Some more reshaping is needed to assemble this tensor with the
    # right dimensions.
    input_vec = op.inputs[0]
    begin_vec = op.inputs[1]
    input_rank = array_ops.rank(input_vec)
    slice_size = array_ops.shape(op.outputs[0])

    shape = array_ops.pack([input_rank, 1])
    before_pad = array_ops.reshape(begin_vec, shape)
    after_pad = array_ops.reshape(
        array_ops.shape(input_vec) - slice_size - begin_vec, shape)
    paddings = array_ops.concat(1, [before_pad, after_pad])
    return array_ops.pad(grad, paddings), None, None
Example #35
0
def assert_variables_initialized(var_list=None):
    """Returns an Op to check if variables are initialized.

  NOTE: This function is obsolete and will be removed in 6 months.  Please
  change your implementation to use `report_uninitialized_variables()`.

  When run, the returned Op will raise the exception `FailedPreconditionError`
  if any of the variables has not yet been initialized.

  Note: This function is implemented by trying to fetch the values of the
  variables. If one of the variables is not initialized a message may be
  logged by the C++ runtime. This is expected.

  Args:
    var_list: List of `Variable` objects to check. Defaults to the
      value of `all_variables().`

  Returns:
    An Op, or None if there are no variables.
  """
    if var_list is None:
        var_list = all_variables() + local_variables()
    # Backwards compatibility for old-style variables. TODO(touts): remove.
    if not var_list:
        var_list = []
        for op in ops.get_default_graph().get_operations():
            if op.type in ["Variable", "AutoReloadVariable"]:
                var_list.append(op.outputs[0])
    if not var_list:
        return None
    else:
        ranks = []
        for var in var_list:
            with ops.colocate_with(var.op):
                ranks.append(array_ops.rank_internal(var, optimize=False))
        if len(ranks) == 1:
            return ranks[0]
        else:
            return array_ops.pack(ranks)
Example #36
0
  def _sample_n(self, n, seed=None):
    # Recall _assert_valid_mu ensures mu and self._cov have same batch shape.
    shape = array_ops.concat(0, [self._cov.vector_shape(), [n]])
    white_samples = random_ops.random_normal(shape=shape,
                                             mean=0.,
                                             stddev=1.,
                                             dtype=self.dtype,
                                             seed=seed)

    correlated_samples = self._cov.sqrt_matmul(white_samples)

    # Move the last dimension to the front
    perm = array_ops.concat(0, (
        array_ops.pack([array_ops.rank(correlated_samples) - 1]),
        math_ops.range(0, array_ops.rank(correlated_samples) - 1)))

    # TODO(ebrevdo): Once we get a proper tensor contraction op,
    # perform the inner product using that instead of batch_matmul
    # and this slow transpose can go away!
    correlated_samples = array_ops.transpose(correlated_samples, perm)
    samples = correlated_samples + self.mu
    return samples
Example #37
0
def _TopKGrad(op, grad, _):
    """Return the gradients for TopK.

  Args:
    op: The TopKOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the TopKOp.

  Returns:
    A list of two tensors, the first being the gradient w.r.t to the input and
    TopK, and the second being the gradient w.r.t. to the indices (all zero).
  """
    in_shape = array_ops.shape(op.inputs[0])
    ind_shape = array_ops.shape(op.outputs[1])

    ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1)
    # Flatten indices to 2D.
    ind_2d = array_ops.reshape(op.outputs[1], array_ops.pack([-1,
                                                              ind_lastdim]))

    in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1)
    outerdim = array_ops.shape(ind_2d)[0]
    # Compute linear indices (flattened to 1D).
    ind = array_ops.reshape(
        ind_2d + array_ops.expand_dims(
            math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1])

    # Substitute grad to appropriate locations and fill the rest with zeros,
    # finally reshaping it to the original input shape.
    return [
        array_ops.reshape(
            sparse_ops.sparse_to_dense(ind,
                                       array_ops.reshape(
                                           math_ops.reduce_prod(in_shape),
                                           [1]),
                                       array_ops.reshape(grad, [-1]),
                                       validate_indices=False), in_shape),
        array_ops.zeros([], dtype=dtypes.int32)
    ]
Example #38
0
    def call(self, inputs):
        shape = inputs.get_shape().as_list()
        input_dim = shape[-1]
        output_shape = shape[:-1] + [self.units]
        if len(output_shape) > 2:
            # Reshape the input to 2D.
            output_shape_tensors = array_ops.unpack(array_ops.shape(inputs))
            output_shape_tensors[-1] = self.units
            output_shape_tensor = array_ops.pack(output_shape_tensors)
            inputs = array_ops.reshape(inputs, [-1, input_dim])

        outputs = standard_ops.matmul(inputs, self.w)
        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)

        if len(output_shape) > 2:
            # Reshape the output back to the original ndim of the input.
            outputs = array_ops.reshape(outputs, output_shape_tensor)
            outputs.set_shape(output_shape)

        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs
Example #39
0
  def tensors_to_item(self, keys_to_tensors):
    indices = keys_to_tensors[self._indices_key]
    values = keys_to_tensors[self._values_key]
    if self._shape_key:
      shape = keys_to_tensors[self._shape_key]
      if isinstance(shape, ops.SparseTensor):
        shape = sparse_ops.sparse_tensor_to_dense(shape)
    elif self._shape:
      shape = self._shape
    else:
      shape = indices.shape
    indices_shape = array_ops.shape(indices.indices)
    rank = indices_shape[1]
    ids = math_ops.to_int64(indices.values)
    indices_columns_to_preserve = array_ops.slice(
        indices.indices, [0, 0], array_ops.pack([-1, rank - 1]))
    new_indices = array_ops.concat(1, [indices_columns_to_preserve,
                                       array_ops.reshape(ids, [-1, 1])])

    tensor = ops.SparseTensor(new_indices, values.values, shape)
    if self._densify:
      tensor = sparse_ops.sparse_tensor_to_dense(tensor, self._default_value)
    return tensor
    def _flip_front_dims_to_back(self, x):
        """Flip x to make x.shape = chol.shape[:-1] + [M1*...*Mr]."""
        # E.g. suppose
        # chol.shape =         [N1,...,Nn, k, k]
        # x.shape = [M1,...,Mm, N1,...,Nn, k]
        # Then we want to return x_flipped where
        # x_flipped.shape = [N1,...,Nn, k, M1*...*Mm].
        x_shape = array_ops.shape(x)
        x_rank = array_ops.rank(x)
        m = x_rank + 1 - self.rank()
        x_shape_left = array_ops.slice(x_shape, [0], [m])

        # Permutation corresponding to [N1,...,Nn, k, M1,...,Mm]
        perm = array_ops.concat(
            0, (math_ops.range(m, x_rank), math_ops.range(0, m)))
        x_permuted = array_ops.transpose(x, perm=perm)

        # Now that things are ordered correctly, condense the last dimensions.
        # condensed_shape = [M1*...*Mm]
        condensed_shape = array_ops.pack([math_ops.reduce_prod(x_shape_left)])
        new_shape = array_ops.concat(0, (self.vector_shape(), condensed_shape))

        return array_ops.reshape(x_permuted, new_shape)
Example #41
0
  def __init__(self, logits, dtype=dtypes.int32, strict=True,
               name="Categorical"):
    """Initialize Categorical distributions using class log-probabilities.

    Args:
      logits: An N-D `Tensor`, `N >= 1`, representing the log probabilities
          of a set of Categorical distributions. The first `N - 1` dimensions
          index into a batch of independent distributions and the last dimension
          indexes into the classes.
      dtype: The type of the event samples (default: int32).
      strict: Unused in this distribution.
      name: A name for this distribution (optional).
    """
    self._name = name
    self._dtype = dtype
    self._strict = strict
    with ops.op_scope([logits], name):
      self._logits = ops.convert_to_tensor(logits, name="logits")
      logits_shape = array_ops.shape(self._logits)
      self._batch_rank = array_ops.size(logits_shape) - 1
      self._batch_shape = array_ops.slice(
          logits_shape, [0], array_ops.pack([self._batch_rank]))
      self._num_classes = array_ops.gather(logits_shape, self._batch_rank)
Example #42
0
def reverse_seq(input_seq, lengths):
    if lengths is None:
        return list(reversed(input_seq))

    input_shape = tensor_shape.matrix(None, None)
    for input_ in input_seq:
        input_shape.merge_with(input_.get_shape())
        input_.set_shape(input_shape)

    # Join into (time, batch_size, depth)
    s_joined = array_ops.pack(input_seq)

    # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32
    if lengths is not None:
        lengths = math_ops.to_int64(lengths)

    # Reverse along dimension 0
    s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
    # Split again into list
    result = array_ops.unpack(s_reversed)
    for r in result:
        r.set_shape(input_shape)
    return result
Example #43
0
def RNN(x, output_w, output_b):
    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, n_input])
    x = tf.split(0, n_steps, x)  # x:(40 list,every list--120*200 tensor)

    batch_size = array_ops.shape(x[0])[0]
    state = array_ops.zeros(array_ops.pack([batch_size, n_hidden]),
                            dtype="float")
    state.set_shape([None, n_hidden])

    outputs = []
    with tf.variable_scope("RNN_network") as scope:

        for time_step in range(n_steps):
            if time_step > 0: scope.reuse_variables()

            for i in range(len(periods)):
                if time_step % periods[i] == 0:
                    group_index = periods[i]

            WI_i = tf.matmul(x[time_step],
                             input_w[:, 0:(group_index * n_hidden // n_steps)])
            hidden_mask = tf.mul(hidden_w, clockwork_mask)
            WH_i = tf.matmul(
                state, hidden_mask[:, 0:(group_index * n_hidden // n_steps)])

            y_update = tf.add(WH_i, WI_i)
            y_update = tanh(y_update)

            state = tf.concat(1, [
                y_update, state[:,
                                (group_index * n_hidden // n_steps):n_hidden]
            ])
            outputs.append(state)
        output = tf.matmul(outputs[-1], output_w['out_w'])
        output = tf.nn.bias_add(output, output_b['out_b'])
        return output, outputs
Example #44
0
def report_uninitialized_variables(var_list=None,
                                   name="report_uninitialized_variables"):
  """Adds ops to list the names of uninitialized variables.

  When run, it returns a 1-D tensor containing the names of uninitialized
  variables if there are any, or an empty array if there are none.

  Args:
    var_list: List of `Variable` objects to check. Defaults to the
      value of `global_variables() + local_variables()`
    name: Optional name of the `Operation`.

  Returns:
    A 1-D tensor containing names of the uninitialized variables, or an empty
    1-D tensor if there are no variables or no uninitialized variables.
  """
  if var_list is None:
    var_list = global_variables() + local_variables()
    # Backwards compatibility for old-style variables. TODO(touts): remove.
    if not var_list:
      var_list = []
      for op in ops.get_default_graph().get_operations():
        if op.type in ["Variable", "AutoReloadVariable"]:
          var_list.append(op.outputs[0])
  with ops.name_scope(name):
    if not var_list:
      # Return an empty tensor so we only need to check for returned tensor
      # size being 0 as an indication of model ready.
      return array_ops.constant([], dtype=dtypes.string)
    else:
      # Get a 1-D boolean tensor listing whether each variable is initialized.
      variables_mask = math_ops.logical_not(array_ops.pack(
          [state_ops.is_variable_initialized(v) for v in var_list]))
      # Get a 1-D string tensor containing all the variable names.
      variable_names_tensor = array_ops.constant([s.op.name for s in var_list])
      # Return a 1-D tensor containing all the names of uninitialized variables.
      return array_ops.boolean_mask(variable_names_tensor, variables_mask)
Example #45
0
  def sample(self, n, seed=None, name="sample"):
    """Sample `n` observations from the Categorical distribution.

    Args:
      n: 0-D.  Number of independent samples to draw for each distribution.
      seed: Random seed (optional).
      name: A name for this operation (optional).

    Returns:
      An `int64` `Tensor` with shape `[n, batch_shape, event_shape]`
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self.logits, n], name):
        n = ops.convert_to_tensor(n, name="n")
        logits_2d = array_ops.reshape(
            self.logits, array_ops.pack([-1, self.num_classes]))
        samples = random_ops.multinomial(logits_2d, n, seed=seed)
        ret = array_ops.reshape(
            array_ops.transpose(samples),
            array_ops.concat(
                0, [array_ops.expand_dims(n, 0), self.batch_shape()]))
        ret.set_shape(tensor_shape.vector(tensor_util.constant_value(n))
                      .concatenate(self.get_batch_shape()))
        return ret
Example #46
0
    def sample(self, n, seed=None, name="sample"):
        """Sample `n` observations from the Laplace Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: `[n, ...]`, a `Tensor` of `n` samples for each
        of the distributions determined by broadcasting the parameters.
    """
        with ops.name_scope(self.name):
            with ops.op_scope([self._loc, self._scale, n], name):
                n = ops.convert_to_tensor(n)
                n_val = tensor_util.constant_value(n)
                shape = array_ops.concat(
                    0, [array_ops.pack([n]),
                        self.batch_shape()])
                # Sample uniformly-at-random from the open-interval (-1, 1).
                uniform_samples = random_ops.random_uniform(
                    shape=shape,
                    minval=np.nextafter(self.dtype.as_numpy_dtype(-1.),
                                        self.dtype.as_numpy_dtype(0.)),
                    maxval=self.dtype.as_numpy_dtype(1.),
                    dtype=self.dtype,
                    seed=seed)

                # Provide some hints to shape inference
                inferred_shape = tensor_shape.vector(n_val).concatenate(
                    self.get_batch_shape())
                uniform_samples.set_shape(inferred_shape)

                return (self._loc -
                        self._scale * math_ops.sign(uniform_samples) *
                        math_ops.log(1. - math_ops.abs(uniform_samples)))
Example #47
0
def _assert_compatible_shapes(mu, sigma):
  r_mu = array_ops.rank(mu)
  r_sigma = array_ops.rank(sigma)
  sigma_shape = array_ops.shape(sigma)
  sigma_rank = array_ops.rank(sigma)
  mu_shape = array_ops.shape(mu)
  return control_flow_ops.group(
      logging_ops.Assert(
          math_ops.equal(r_mu + 1, r_sigma),
          ["Rank of mu should be one less than rank of sigma, but saw: ",
           r_mu, " vs. ", r_sigma]),
      logging_ops.Assert(
          math_ops.equal(
              array_ops.gather(sigma_shape, sigma_rank - 2),
              array_ops.gather(sigma_shape, sigma_rank - 1)),
          ["Last two dimensions of sigma (%s) must be equal: " % sigma.name,
           sigma_shape]),
      logging_ops.Assert(
          math_ops.reduce_all(math_ops.equal(
              mu_shape,
              array_ops.slice(
                  sigma_shape, [0], array_ops.pack([sigma_rank - 1])))),
          ["mu.shape and sigma.shape[:-1] must match, but saw: ",
           mu_shape, " vs. ", sigma_shape]))
Example #48
0
def assert_variables_initialized(var_list=None):
    """Returns an Op to check if variables are initialized.

  When run, the returned Op will raise the exception `FailedPreconditionError`
  if any of the variables has not yet been initialized.

  Note: This function is implemented by trying to fetch the values of the
  variables. If one of the variables is not initialized a message may be
  logged by the C++ runtime. This is expected.

  Args:
    var_list: List of `Variable` objects to check. Defaults to the
      value of `all_variables().`

  Returns:
    An Op, or None if there are no variables.
  """
    if var_list is None:
        var_list = all_variables()
    # Backwards compatibility for old-style variables. TODO(mdevin): remove.
    if not var_list:
        var_list = []
        for op in ops.get_default_graph().get_operations():
            if op.type in ["Variable", "AutoReloadVariable"]:
                var_list.append(op.outputs[0])
    if not var_list:
        return None
    else:
        ranks = []
        for var in var_list:
            with ops.device(var.device):
                ranks.append(array_ops.rank(var))
        if len(ranks) == 1:
            return ranks[0]
        else:
            return array_ops.pack(ranks)
Example #49
0
  def inference_graph(self, input_data, data_spec=None):
    """Constructs a TF graph for evaluating a random forest.

    Args:
      input_data: A tensor or SparseTensor or placeholder for input data.
      data_spec: A list of tf.dtype values specifying the original types of
        each column.

    Returns:
      The last op in the random forest inference graph.
    """
    data_spec = ([constants.DATA_FLOAT] * self.params.num_features
                 if data_spec is None else data_spec)
    probabilities = []
    for i in range(self.params.num_trees):
      with ops.device(self.device_assigner.get_device(i)):
        tree_data = input_data
        if self.params.bagged_features:
          tree_data = self._bag_features(i, input_data)
        probabilities.append(self.trees[i].inference_graph(tree_data,
                                                           data_spec))
    with ops.device(self.device_assigner.get_device(0)):
      all_predict = array_ops.pack(probabilities)
      return math_ops.reduce_sum(all_predict, 0) / self.params.num_trees
Example #50
0
    def __call__(self,
                 inputs,
                 initial_state=None,
                 dtype=None,
                 sequence_length=None,
                 scope=None):
        """Run this LSTM on inputs, starting from the given state.

    Args:
      inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`
        or a list of `time_len` tensors of shape `[batch_size, input_size]`.
      initial_state: a tuple `(initial_cell_state, initial_output)` with tensors
        of shape `[batch_size, self._num_units]`. If this is not provided, the
        cell is expected to create a zero initial state of type `dtype`.
      dtype: The data type for the initial state and expected output. Required
        if `initial_state` is not provided or RNN state has a heterogeneous
        dtype.
      sequence_length: Specifies the length of each sequence in inputs. An
        `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
        time_len).`
        Defaults to `time_len` for each element.
      scope: `VariableScope` for the created subgraph; defaults to class name.

    Returns:
      A pair containing:

      - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]`
        or a list of time_len tensors of shape `[batch_size, output_size]`,
        to match the type of the `inputs`.
      - Final state: a tuple `(cell_state, output)` matching `initial_state`.

    Raises:
      ValueError: in case of shape mismatches
    """
        with vs.variable_scope(scope or type(self).__name__):
            is_list = isinstance(inputs, list)
            if is_list:
                inputs = array_ops.pack(inputs)
            inputs_shape = inputs.get_shape().with_rank(3)
            if not inputs_shape[2]:
                raise ValueError("Expecting inputs_shape[2] to be set: %s" %
                                 inputs_shape)
            batch_size = inputs_shape[1].value
            if batch_size is None:
                batch_size = array_ops.shape(inputs)[1]
            time_len = inputs_shape[0].value
            if time_len is None:
                time_len = array_ops.shape(inputs)[0]

            # Provide default values for initial_state and dtype
            if initial_state is None:
                if dtype is None:
                    raise ValueError(
                        "Either initial_state or dtype needs to be specified")
                z = array_ops.zeros(array_ops.pack(
                    [batch_size, self.num_units]),
                                    dtype=dtype)
                initial_state = z, z
            else:
                if len(initial_state) != 2:
                    raise ValueError(
                        "Expecting initial_state to be a tuple with length 2 or None"
                    )
                if dtype is None:
                    dtype = initial_state[0].dtype

            # create the actual cell
            if sequence_length is not None:
                sequence_length = ops.convert_to_tensor(sequence_length)
            initial_cell_state, initial_output = initial_state  # pylint: disable=unpacking-non-sequence
            cell_states, outputs = self._call_cell(inputs, initial_cell_state,
                                                   initial_output, dtype,
                                                   sequence_length)

            if sequence_length is not None:
                # Mask out the part beyond sequence_length
                mask = array_ops.transpose(
                    array_ops.sequence_mask(sequence_length,
                                            time_len,
                                            dtype=dtype), [1, 0])
                mask = array_ops.tile(array_ops.expand_dims(mask, [-1]),
                                      [1, 1, self.num_units])
                outputs *= mask
                # Prepend initial states to cell_states and outputs for indexing to work
                # correctly,since we want to access the last valid state at
                # sequence_length - 1, which can even be -1, corresponding to the
                # initial state.
                mod_cell_states = array_ops.concat(0, [
                    array_ops.expand_dims(initial_cell_state, [0]), cell_states
                ])
                mod_outputs = array_ops.concat(
                    0, [array_ops.expand_dims(initial_output, [0]), outputs])
                final_cell_state = self._gather_states(mod_cell_states,
                                                       sequence_length,
                                                       batch_size)
                final_output = self._gather_states(mod_outputs,
                                                   sequence_length, batch_size)
            else:
                # No sequence_lengths used: final state is the last state
                final_cell_state = cell_states[-1]
                final_output = outputs[-1]

            if is_list:
                # Input was a list, so return a list
                outputs = array_ops.unpack(outputs)

            return outputs, (final_cell_state, final_output)
Example #51
0
def _block_lstm(seq_len_max,
                x,
                w,
                b,
                cs_prev=None,
                h_prev=None,
                wci=None,
                wcf=None,
                wco=None,
                forget_bias=None,
                cell_clip=None,
                use_peephole=None,
                name=None):
    r"""TODO(williamchan): add doc.

  Args:
    seq_len_max: A `Tensor` of type `int64`.
    x: A list of at least 1 `Tensor` objects of the same type in: `float32`.
    w: A `Tensor`. Must have the same type as `x`.
    b: A `Tensor`. Must have the same type as `x`.
    cs_prev: A `Tensor`. Must have the same type as `x`.
    h_prev: A `Tensor`. Must have the same type as `x`.
    wci: A `Tensor`. Must have the same type as `x`.
    wcf: A `Tensor`. Must have the same type as `x`.
    wco: A `Tensor`. Must have the same type as `x`.
    forget_bias: An optional `float`. Defaults to `1`.
    cell_clip: An optional `float`. Defaults to `3`.
    use_peephole: An optional `bool`. Defaults to `False`.
    name: A name for the operation (optional).

  Returns:
    A tuple of `Tensor` objects (i, cs, f, o, ci, co, h).
    i: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    cs: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    f: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    o: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    ci: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    co: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.
    h: A list with the same number of `Tensor` objects as `x` of `Tensor`
    objects of the same type as x.

  Raises:
    ValueError: If `b` does not have a valid shape.
  """
    batch_size = x[0].get_shape().with_rank(2)[0].value
    cell_size4 = b.get_shape().with_rank(1)[0].value
    if cell_size4 is None:
        raise ValueError("`b` shape must not be None.")
    cell_size = cell_size4 / 4
    zero_state = None
    if cs_prev is None or h_prev is None:
        zero_state = array_ops.constant(0,
                                        dtype=dtypes.float32,
                                        shape=[batch_size, cell_size])
    if cs_prev is None:
        cs_prev = zero_state
    if h_prev is None:
        h_prev = zero_state
    if wci is None:
        wci = array_ops.constant(0, dtype=dtypes.float32, shape=[cell_size])
        wco = wci
        wcf = wci

    # pylint: disable=protected-access
    i, cs, f, o, ci, co, h = _lstm_ops_so.block_lstm(seq_len_max=seq_len_max,
                                                     x=array_ops.pack(x),
                                                     cs_prev=cs_prev,
                                                     h_prev=h_prev,
                                                     w=w,
                                                     wci=wci,
                                                     wco=wco,
                                                     wcf=wcf,
                                                     b=b,
                                                     forget_bias=forget_bias,
                                                     cell_clip=cell_clip,
                                                     name=name,
                                                     use_peephole=use_peephole)

    return array_ops.unpack(i), array_ops.unpack(cs), array_ops.unpack(
        f), array_ops.unpack(o), array_ops.unpack(ci), array_ops.unpack(
            co), array_ops.unpack(h)
Example #52
0
 def _event_shape(self):
     return array_ops.pack([self._cov.vector_space_dimension()])
Example #53
0
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_normalizer_fn=None,
                    weights_normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=init_ops.zeros_initializer(),
                    biases_regularizer=None,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None):
    # Be copied and modified from tensorflow-0.12.0.contrib.layer.fully_connected,
    # add weights_nomalizer_* options.
    """Adds a fully connected layer.

    `fully_connected` creates a variable called `weights`, representing a fully
    connected weight matrix, which is multiplied by the `inputs` to produce a
    `Tensor` of hidden units. If a `normalizer_fn` is provided (such as
    `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
    None and a `biases_initializer` is provided then a `biases` variable would be
    created and added the hidden units. Finally, if `activation_fn` is not `None`,
    it is applied to the hidden units as well.

    Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
    prior to the initial matrix multiply by `weights`.

    Args:
      inputs: A tensor of with at least rank 2 and value for the last dimension,
        i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
      num_outputs: Integer or long, the number of output units in the layer.
      activation_fn: activation function, set to None to skip it and maintain
        a linear activation.
      normalizer_fn: normalization function to use instead of `biases`. If
        `normalizer_fn` is provided then `biases_initializer` and
        `biases_regularizer` are ignored and `biases` are not created nor added.
        default set to None for no normalizer function
      normalizer_params: normalization function parameters.
      weights_normalizer_fn: weights normalization function.
      weights_normalizer_params: weights normalization function parameters.
      weights_initializer: An initializer for the weights.
      weights_regularizer: Optional regularizer for the weights.
      biases_initializer: An initializer for the biases. If None skip biases.
      biases_regularizer: Optional regularizer for the biases.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      variables_collections: Optional list of collections for all the variables or
        a dictionary containing a different list of collections per variable.
      outputs_collections: collection to add the outputs.
      trainable: If `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
      scope: Optional scope for variable_scope.

    Returns:
       the tensor variable representing the result of the series of operations.

    Raises:
      ValueError: if x has rank less than 2 or if its last dimension is not set.
    """
    if not (isinstance(num_outputs, six.integer_types)):
        raise ValueError('num_outputs should be int or long, got %s.',
                         num_outputs)
    with variable_scope.variable_scope(scope,
                                       'fully_connected', [inputs],
                                       reuse=reuse) as sc:
        inputs = ops.convert_to_tensor(inputs)
        dtype = inputs.dtype.base_dtype
        inputs_shape = inputs.get_shape()
        num_input_units = utils.last_dimension(inputs_shape, min_rank=2)

        static_shape = inputs_shape.as_list()
        static_shape[-1] = num_outputs

        out_shape = array_ops.unpack(array_ops.shape(inputs),
                                     len(static_shape))
        out_shape[-1] = num_outputs

        weights_shape = [num_input_units, num_outputs]
        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')
        weights = variables.model_variable('weights',
                                           shape=weights_shape,
                                           dtype=dtype,
                                           initializer=weights_initializer,
                                           regularizer=weights_regularizer,
                                           collections=weights_collections,
                                           trainable=trainable)
        if weights_normalizer_fn is not None:
            weights_normalizer_params = weights_normalizer_params or {}
            weights = weights_normalizer_fn(weights,
                                            **weights_normalizer_params)
        if len(static_shape) > 2:
            # Reshape inputs
            inputs = array_ops.reshape(inputs, [-1, num_input_units])
        outputs = standard_ops.matmul(inputs, weights)
        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable(
                    'biases',
                    shape=[
                        num_outputs,
                    ],
                    dtype=dtype,
                    initializer=biases_initializer,
                    regularizer=biases_regularizer,
                    collections=biases_collections,
                    trainable=trainable)
                outputs = nn.bias_add(outputs, biases)
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        if len(static_shape) > 2:
            # Reshape back outputs
            outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
            outputs.set_shape(static_shape)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
Example #54
0
def fill_lower_triangular(x,
                          validate_args=False,
                          name="fill_lower_triangular"):
    """Creates a (batch of) lower triangular matrix from a vector of inputs.

  If `x.get_shape()` is `[b1, b2, ..., bK, d]` then the output shape is `[b1,
  b2, ..., bK, n, n]` where `n` is such that `d = n(n+1)/2`, i.e.,
  `n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))`.

  Although the non-batch complexity is O(n^2), large constants and sub-optimal
  vectorization means the complexity of this function is 5x slower than zeroing
  out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`.  This
  function becomes competitive only when several matmul/cholesky/etc ops can be
  ellided in constructing the input.  Example: wiring a fully connected layer as
  a covariance matrix; this function reduces the final layer by 2x and possibly
  reduces the network arch complexity considerably.  In most cases it is better
  to simply build a full matrix and zero out the upper triangular elements,
  e.g., `tril = tf.matrix_band_part(full, -1, 0)`, rather than directly
  construct a lower triangular.

  Example:

  ```python
  fill_lower_triangular([1, 2, 3, 4, 5, 6])
  # Returns: [[1, 0, 0],
  #           [2, 3, 0],
  #           [4, 5, 6]]
  ```

  For comparison, a pure numpy version of this function can be found in
  `distribution_util_test.py`, function `_fill_lower_triangular`.

  Args:
    x: `Tensor` representing lower triangular elements.
    validate_args: `Boolean`, default `False`.  Whether to ensure the shape of
      `x` can be mapped to a lower triangular matrix (controls non-static checks
      only).
    name: `String`. The name to give this op.

  Returns:
    tril: `Tensor` with lower triangular elements filled from `x`.

  Raises:
    ValueError: if shape if `x` has static shape which cannot be mapped to a
      lower triangular matrix.
  """
    # TODO(jvdillon): Replace this code with dedicated op when it exists.
    with ops.name_scope(name, values=(x, )):
        x = ops.convert_to_tensor(x, name="x")
        if (x.get_shape().ndims is not None
                and x.get_shape()[-1].value is not None):
            d = x.get_shape()[-1].value
            # d = n(n+1)/2 implies n is:
            n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))
            d_inferred = n * (n + 1) / 2
            if d != d_inferred:
                raise ValueError(
                    "Input cannot be mapped to a lower triangular; "
                    "n*(n+1)/2 = %d != %d" % (d_inferred, d))
            final_shape = x.get_shape()[:-1].concatenate(
                tensor_shape.TensorShape([n, n]))
        else:
            d = math_ops.cast(array_ops.shape(x)[-1], dtype=dtypes.float32)
            # d = n(n+1)/2 implies n is:
            n = math_ops.cast(0.5 * (dtypes.sqrt(1. + 8. * d) - 1.),
                              dtype=dtypes.int32)
            if validate_args:
                is_valid_input_shape = check_ops.assert_equal(
                    n * (n + 1) / 2,
                    d,
                    message="Input cannot be mapped to a lower triangular.")
                n = control_flow_ops.with_dependencies([is_valid_input_shape],
                                                       n)
            final_shape = x.get_shape()[:-1].concatenate(
                tensor_shape.TensorShape([None, None]))

        def tril_ids(n):
            """Internal helper to create vector of linear indices into y."""
            # Build the ids statically; chose 512 because it implies 1MiB.
            if not contrib_framework.is_tensor(n) and n <= 512:
                ids = np.arange(n**2, dtype=np.int32)
                rows = (ids / n).astype(np.int32)  # Implicit floor.
                # We need to stop incrementing the index when we encounter
                # upper-triangular elements.  The idea here is to compute the
                # lower-right number of zeros then by "symmetry" subtract this from the
                # total number of zeros, n(n-1)/2.
                # Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2
                offset = (rows * (2 * n - rows - 1) / 2).astype(np.int32)
                # We could also zero out when (rows < cols) == (rows < ids-n*rows).
                # mask = (ids <= (n + 1) * rows).astype(np.int32)
            else:
                ids = math_ops.range(n**2)
                rows = math_ops.cast(ids / n, dtype=dtypes.int32)
                offset = math_ops.cast(rows * (2 * n - rows - 1) / 2,
                                       dtype=dtypes.int32)
            return ids - offset

        # Special-case non-batch case.
        if x.get_shape().ndims == 1:
            y = array_ops.gather(x, array_ops.reshape(tril_ids(n), [n, n]))
            y = array_ops.matrix_band_part(y, -1, 0)
            y.set_shape(y.get_shape().merge_with(final_shape))
            return y

        # Make ids for each batch dim.
        if (x.get_shape().ndims is not None
                and x.get_shape()[:-1].is_fully_defined()):
            batch_shape = np.asarray(x.get_shape()[:-1].as_list(),
                                     dtype=np.int32)
            m = np.prod(batch_shape).astype(np.int32)
        else:
            batch_shape = array_ops.shape(x)[:-1]
            m = array_ops.reduce_prod(array_ops.shape(x)[:-1])
        batch_ids = math_ops.range(m)

        # Assemble the tril_ids into batch,tril_id pairs.
        idx = array_ops.pack([
            array_ops.tile(array_ops.expand_dims(batch_ids, 1), [1, n * n]),
            array_ops.tile(array_ops.expand_dims(tril_ids(n), 0), [m, 1])
        ])
        idx = array_ops.transpose(idx, [1, 2, 0])

        # Gather up, reshape, and return.
        y = array_ops.reshape(x, [-1, d])
        y = array_ops.gather_nd(y, idx)
        y = array_ops.reshape(y, array_ops.concat_v2([batch_shape, [n, n]], 0))
        y = array_ops.matrix_band_part(y, -1, 0)
        y.set_shape(y.get_shape().merge_with(final_shape))
        return y
Example #55
0
def sparse_to_indicator(sp_input, vocab_size, name=None):
    """Converts a `SparseTensor` of ids into a dense bool indicator tensor.

  The last dimension of `sp_input` is discarded and replaced with the values of
  `sp_input`.  If `sp_input.shape = [D0, D1, ..., Dn, K]`, then
  `output.shape = [D0, D1, ..., Dn, vocab_size]`, where

      output[d_0, d_1, ..., d_n, sp_input[d_0, d_1, ..., d_n, k]] = True

  and False elsewhere in `output`.

  For example, if `sp_input.shape = [2, 3, 4]` with non-empty values:

      [0, 0, 0]: 0
      [0, 1, 0]: 10
      [1, 0, 3]: 103
      [1, 1, 2]: 150
      [1, 1, 3]: 149
      [1, 1, 4]: 150
      [1, 2, 1]: 121

  and `vocab_size = 200`, then the output will be a `[2, 3, 200]` dense bool
  tensor with False everywhere except at positions

      (0, 0, 0), (0, 1, 10), (1, 0, 103), (1, 1, 149), (1, 1, 150),
      (1, 2, 121).

  Note that repeats are allowed in the input SparseTensor.
  This op is useful for converting `SparseTensor`s into dense formats for
  compatibility with ops that expect dense tensors.

  The input `SparseTensor` must be in row-major order.

  Args:
    sp_input: A `SparseTensor` of type `int32` or `int64`.
    vocab_size: The new size of the last dimension, with
      `all(0 <= sp_input.values < vocab_size)`.
    name: A name prefix for the returned tensors (optional)

  Returns:
    A dense bool indicator tensor representing the indices with specified value.

  Raises:
    TypeError: If `sp_input` is not a `SparseTensor`.
  """
    if not isinstance(sp_input, ops.SparseTensor):
        raise TypeError("Input must be a SparseTensor")

    with ops.op_scope([sp_input], name, "SparseToIndicator") as name:
        indices_shape = array_ops.shape(sp_input.indices)
        num_entries = indices_shape[0]
        rank = indices_shape[1]

        ids = sp_input.values
        if ids.dtype != dtypes.int64:
            ids = math_ops.cast(ids, dtypes.int64)

        # Slice off the last dimension of indices, then then tack on the ids
        indices_columns_to_preserve = array_ops.slice(
            sp_input.indices, [0, 0], array_ops.pack([-1, rank - 1]))
        new_indices = array_ops.concat(
            1, [indices_columns_to_preserve,
                array_ops.reshape(ids, [-1, 1])])

        new_values = array_ops.fill(array_ops.expand_dims(num_entries, 0),
                                    True)
        new_shape = array_ops.concat(0, [
            array_ops.slice(sp_input.shape, [0],
                            array_ops.expand_dims(rank - 1, 0)), [vocab_size]
        ])

        sp_new = ops.SparseTensor(new_indices, new_values, new_shape)

        # validate_indices may be False because we allow duplicates in new_indices:
        # repeated indices are allowed when creating an indicator matrix.
        return sparse_tensor_to_dense(sp_new,
                                      default_value=False,
                                      validate_indices=False,
                                      name=name)
Example #56
0
def dynamic_distraction_m2_decoder(decoder_inputs,
                      initial_state,
                      distract_initial_state,
                      attention_states,
                      attention_states_query,
                      cell1,cell2,
                      distraction_cell,
                      output_size=None,
                      num_heads=1,
                      loop_function=None,
                      dtype=None,
                      scope=None,
                      initial_state_attention=False):
  """RNN decoder with attention for the sequence-to-sequence model.

  In this context "attention" means that, during decoding, the RNN can look up
  information in the additional tensor attention_states, and it does this by
  focusing on a few entries from the tensor. This model has proven to yield
  especially good results in a number of sequence-to-sequence tasks. This
  implementation is based on http://arxiv.org/abs/1412.7449 (see below for
  details). It is recommended for complex sequence-to-sequence tasks.

  Args:
    decoder_inputs: A list of 2D Tensors [batch_size x input_size].
    initial_state: 2D Tensor [batch_size x cell.state_size].
    attention_states: 3D Tensor [batch_size x attn_length x attn_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    output_size: Size of the output vectors; if None, we use cell.output_size.
    num_heads: Number of attention heads that read from attention_states.
    loop_function: If not None, this function will be applied to i-th output
      in order to generate i+1-th input, and decoder_inputs will be ignored,
      except for the first element ("GO" symbol). This can be used for decoding,
      but also for training to emulate http://arxiv.org/abs/1506.03099.
      Signature -- loop_function(prev, i) = next
        * prev is a 2D Tensor of shape [batch_size x output_size],
        * i is an integer, the step number (when advanced control is needed),
        * next is a 2D Tensor of shape [batch_size x input_size].
    dtype: The dtype to use for the RNN initial state (default: tf.float32).
    scope: VariableScope for the created subgraph; default: "attention_decoder".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states -- useful when we wish to resume decoding from a previously
      stored decoder state and attention states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors of
        shape [batch_size x output_size]. These represent the generated outputs.
        Output i is computed from input i (which is either the i-th element
        of decoder_inputs or loop_function(output {i-1}, i)) as follows.
        First, we run the cell on a combination of the input and previous
        attention masks:
          cell_output, new_state = cell(linear(input, prev_attn), prev_state).
        Then, we calculate new attention masks:
          new_attn = softmax(V^T * tanh(W * attention_states + U * new_state))
        and then we calculate the output:
          output = linear(cell_output, new_attn).
      state: The state of each decoder cell the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: when num_heads is not positive, there are no inputs, shapes
      of attention_states are not set, or input size cannot be inferb_a     from the input.
  """
  if decoder_inputs is None:
    raise ValueError("Must provide at least 1 input to attention decoder.")
  if num_heads < 1:
    raise ValueError("With less than 1 heads, use a non-attention decoder.")
  if attention_states.get_shape()[2].value is None:
    raise ValueError("Shape[2] of attention_states must be known: %s"
                     % attention_states.get_shape())
  if output_size is None:
    output_size = cell1.output_size

  with variable_scope.variable_scope(
      scope or "dynamic_distraction_m2_decoder", dtype=dtype) as scope:
    dtype = scope.dtype

    batch_size = array_ops.shape(decoder_inputs[0])[0]  # Needed for reshaping.
    attn_length_state = attention_states.get_shape()[1].value
    attn_length_query = attention_states_query.get_shape()[1].value

    dim_1 = initial_state.get_shape()[1].value
    dim_2 = cell1.output_size
    project_initial_state_W = variable_scope.get_variable("Initial_State_W", [dim_1, dim_2])
    project_initial_state_B = variable_scope.get_variable("Initial_State_Bias", [dim_2])

    print ("Preksha " + scope.name)
    if attn_length_state is None:
      attn_length_state = shape(attention_states)[1]

    if attn_length_query is None:
      attn_length_query = shape(attention_states_query)[1]

    attn_size_state = attention_states.get_shape()[2].value
    attn_size_query = attention_states_query.get_shape()[2].value
    b_a = variable_scope.get_variable("b_a", [1, attn_size_state])

    # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
    hidden_states = array_ops.reshape(
        attention_states, [-1, attn_length_state, 1, attn_size_state])

    hidden_states_query = array_ops.reshape(
        attention_states_query, [-1, attn_length_query, 1, attn_size_query])

    hidden_features_states = []
    hidden_features_query  = []

    v_state = []
    attention_vec_size_state  = attn_size_state  # Size of query vectors for attention.
    
    for a in xrange(num_heads):
      k = variable_scope.get_variable("AttnW_State_%d" % a,
                                      [1, 1, attn_size_state, attention_vec_size_state])

      hidden_features_states.append(nn_ops.conv2d(hidden_states, k, [1, 1, 1, 1], "SAME"))
      
      v_state.append(
          variable_scope.get_variable("AttnV_State_%d" % a, [attention_vec_size_state]))


    v_query = []
    attention_vec_size_query  = attn_size_query  # Size of query vectors for attention.

    for a in xrange(num_heads):
      k = variable_scope.get_variable("AttnW_Query_%d" %a, 
                                      [1, 1, attn_size_query, attention_vec_size_query])

      hidden_features_query.append(nn_ops.conv2d(hidden_states_query, k, [1, 1, 1, 1], "SAME"))
      
      v_query.append(
          variable_scope.get_variable("AttnV_Query_%d" % a, [attention_vec_size_query]))


    state_1 = math_ops.matmul(initial_state, project_initial_state_W) + project_initial_state_B
    state_2 = state_1


    prev_states = []

    for i in range(attn_length_state):
      prev_states.append(array_ops.zeros([batch_size]))

    def attention(query, prev_states, b_a):
      """Put attention masks on hidden using hidden_features and query."""
      ds = []  # Results of attention reads will be stored here.
      if nest.is_sequence(query):  # If the query is a tuple, flatten it.
        query_list = nest.flatten(query)
        for q in query_list:  # Check that ndims == 2 if specified.
          ndims = q.get_shape().ndims
          if ndims:
            assert ndims == 2
        query = array_ops.concat(1, query_list)
      for a in xrange(num_heads):
        with variable_scope.variable_scope("Attention_%d" % a):
          y = linear(query, attention_vec_size_state, True)
          y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size_state])
          # Attention mask is a softmax of v^T * tanh(...).


          temp = hidden_features_states[a] + y
          new_states = array_ops.squeeze(temp, [2])

          new_states_list = array_ops.unpack(new_states, axis=1)
          #print(temp.get_shape(), new_states.get_shape(), len(new_states_list), new_states_list[0].get_shape())
          distract_states_list = []
          for i, _ in enumerate(new_states_list):
              temp = array_ops.reshape(prev_states[i], [-1, 1])
              t1 = math_ops.matmul(temp, b_a)
              print ("b_a size and prev_states size", temp.get_shape(), prev_states[i].get_shape(), b_a.get_shape(), t1.get_shape())
              distract_states_list.append(new_states_list[i] - t1)

          distract_states = array_ops.pack(distract_states_list, axis=1)

          print (len(distract_states_list), distract_states.get_shape())
          s = math_ops.reduce_sum(
              v_state[a] * math_ops.tanh(distract_states), [2])

          print(s.get_shape())
          a = nn_ops.softmax(s)
          prev_states = array_ops.pack(prev_states,  axis=1)
          prev_states = prev_states + a
          
          # Now calculate the attention-weighted vector d.
          d = math_ops.reduce_sum(
              array_ops.reshape(a, [-1, attn_length_state, 1, 1]) * hidden_states,
              [1, 2])
          ds.append(array_ops.reshape(d, [-1, attn_size_state]))
      return ds, array_ops.unpack(prev_states, axis=1)

    def attention_query(query):
      """Put attention masks on hidden using hidden_features and query."""
      ds = []  # Results of attention reads will be stored here.
      if nest.is_sequence(query):  # If the query is a tuple, flatten it.
        query_list = nest.flatten(query)
        for q in query_list:  # Check that ndims == 2 if specified.
          ndims = q.get_shape().ndims
          if ndims:
            assert ndims == 2
        query = array_ops.concat(1, query_list)
      for a in xrange(num_heads):
        with variable_scope.variable_scope("Attention_Query_%d" % a):
          y = linear(query, attention_vec_size_query, True)
          y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size_query])
          # Attention mask is a softmax of v^T * tanh(...).
          s = math_ops.reduce_sum(
              v_query[a] * math_ops.tanh(hidden_features_query[a] + y), [2, 3])
          a = nn_ops.softmax(s)
          # Now calculate the attention-weighted vector d.
          d = math_ops.reduce_sum(
              array_ops.reshape(a, [-1, attn_length_query, 1, 1]) * hidden_states_query,
              [1, 2])
          ds.append(array_ops.reshape(d, [-1, attn_size_query]))


      return ds[0]


    outputs = []
    prev = None

    batch_attn_size_state = array_ops.pack([batch_size, attn_size_state])
    batch_attn_size_query = array_ops.pack([batch_size, attn_size_query])


    attns_state = [array_ops.zeros(batch_attn_size_state, dtype=dtype)
             for _ in xrange(num_heads)]

    attns_query = [array_ops.zeros(batch_attn_size_query, dtype=dtype)
             for _ in xrange(num_heads)]

    for a in attns_state:  # Ensure the second shape of attention vectors is set.
      a.set_shape([None, attn_size_state])


    for a in attns_query:  # Ensure the second shape of attention vectors is set.
      a.set_shape([None, attn_size_query])


    acc_ctx = array_ops.zeros([batch_size, attn_size_state])

    if initial_state_attention:
      attns_query = attention_query(initial_state)
      list_of_queries = [initial_state, attns_query]
      attns_state, prev_states = attention(list_of_queries, prev_states)

    for i, inp in enumerate(decoder_inputs):
      if i > 0:
        variable_scope.get_variable_scope().reuse_variables()
      # If loop_function is set, we use it instead of decoder_inputs.
      if loop_function is not None and prev is not None:
        with variable_scope.variable_scope("loop_function", reuse=True):
          inp = loop_function(prev, i)
      # Merge input and previous attentions into one vector of the right size.
      input_size = inp.get_shape().with_rank(2)[1]
      if input_size.value is None:
        raise ValueError("Could not infer input size from input: %s" % inp.name)
      

      with variable_scope.variable_scope("Cell2"):
        input_2 = linear([state_1] + [inp], input_size, True)
        output_2, state_2 = cell2(input_2, state_2)
      

      # Run the RNN.
      #print (x.get_shape())
      
      # Run the attention mechanism.

      if i == 0 and initial_state_attention:
        with variable_scope.variable_scope(variable_scope.get_variable_scope(),
                                           reuse=True):
          attns_query = attention_query(output_2)
          list_of_queries = [state, attns_query]
          attns_state, prev_states = attention(list_of_queries, prev_states, b_a)
      else:
        attns_query = attention_query(output_2)
        list_of_queries = [output_2, attns_query]
        attns_state, prev_states = attention(list_of_queries, prev_states, b_a)


      with variable_scope.variable_scope("AttnOutputProjection"):

        #W = variable_scope.get_variable("W", [1,attn_size_state])
        #U = variable_scope.get_variable("U", [1,attn_size_state])

        #new_ctx = math_ops.mul(W, attns_state[0]) - math_ops.mul(U, acc_ctx)
        #new_ctx = math_ops.tanh(new_ctx)

        #acc_ctx = acc_ctx + new_ctx

        with variable_scope.variable_scope("Cell1"):
          input_1 = linear([output_2] + [attns_state[0]], input_size, True)
          output_1, state_1 = cell1(input_1, state_1)

        output = math_ops.tanh(linear([inp] + [output_1] + [attns_state[0]], output_size, True))
        #x_shape = variable_scope.get_variable(name = 'x_shape',shape=cell_output.get_shape())
        if loop_function is not None:
          prev = output
        outputs.append(output)

  return outputs, state_1
def legacy_fully_connected(x,
                           num_output_units,
                           activation_fn=None,
                           weight_init=initializers.xavier_initializer(),
                           bias_init=init_ops.zeros_initializer,
                           name=None,
                           weight_collections=(ops.GraphKeys.WEIGHTS,),
                           bias_collections=(ops.GraphKeys.BIASES,),
                           output_collections=(ops.GraphKeys.ACTIVATIONS,),
                           trainable=True,
                           weight_regularizer=None,
                           bias_regularizer=None):
  # pylint: disable=anomalous-backslash-in-string
  r"""Adds the parameters for a fully connected layer and returns the output.
  A fully connected layer is generally defined as a matrix multiply:
  `y = f(w * x + b)` where `f` is given by `activation_fn`. If
  `activation_fn` is `None`, the result of `y = w * x + b` is
  returned.
  If `x` has shape [\\\(\\text{dim}_0, \\text{dim}_1, ..., \\text{dim}_n\\\)]
  with more than 2 dimensions (\\\(n > 1\\\)), then we repeat the matrix
  multiply along the first dimensions. The result r is a tensor of shape
  [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`],
  where \\\( r_{i_0, ..., i_{n-1}, k} =
  \\sum_{0 \\leq j < \\text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\\).
  This is accomplished by reshaping `x` to 2-D
  [\\\(\\text{dim}_0 \\cdot ... \\cdot \\text{dim}_{n-1}, \\text{dim}_n\\\)]
  before the matrix multiply and afterwards reshaping it to
  [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`].
  This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting
  `bias_init` to `None`.
  The variable creation is compatible with `tf.variable_scope` and so can be
  reused with `tf.variable_scope` or `tf.make_template`.
  Most of the details of variable creation can be controlled by specifying the
  initializers (`weight_init` and `bias_init`) and in which collections to place
  the created variables (`weight_collections` and `bias_collections`; note that
  the variables are always added to the `VARIABLES` collection). The output of
  the layer can be placed in custom collections using `output_collections`.
  The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`,
  respectively.
  A per layer regularization can be specified by setting `weight_regularizer`
  and `bias_regularizer`, which are applied to the weights and biases
  respectively, and whose output is added to the `REGULARIZATION_LOSSES`
  collection.
  Args:
    x: The input `Tensor`.
    num_output_units: The size of the output.
    activation_fn: A function that requires a single Tensor that is applied as a
      non-linearity. If None is used, do not apply any activation.
    weight_init: An optional weight initialization, defaults to
      `xavier_initializer`.
    bias_init: An initializer for the bias, defaults to 0. Set to `None` in
      order to disable bias.
    name: The name for this operation is used to name operations and to find
      variables. If specified it must be unique for this scope, otherwise a
      unique name starting with "fully_connected" will be created.  See
      `tf.variable_op_scope` for details.
    weight_collections: List of graph collections to which weights are added.
    bias_collections: List of graph collections to which biases are added.
    output_collections: List of graph collections to which outputs are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    weight_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for weights.
    bias_regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`. Used for biases.
  Returns:
    The output of the fully connected layer.
  Raises:
    ValueError: if x has rank less than 2 or if its last dimension is not set.
  """
  with variable_scope.variable_op_scope([x], name, 'fully_connected'):
    dims = x.get_shape().dims
    if dims is None:
      raise ValueError('dims of x must be known but is None')
    if len(dims) < 2:
      raise ValueError('rank of x must be at least 2 not: %d' % len(dims))
    num_input_units = dims[-1].value
    if num_input_units is None:
      raise ValueError('last dimension of x must be known but is None')
    dtype = x.dtype.base_dtype

    weight_collections = set(list(weight_collections or []) +
                             [ops.GraphKeys.VARIABLES])
    w = variable_scope.get_variable('weights',
                                    shape=[num_input_units, num_output_units],
                                    dtype=dtype,
                                    initializer=weight_init,
                                    collections=weight_collections,
                                    regularizer=weight_regularizer,
                                    trainable=trainable)
    x_2_dim = x if len(dims) <= 2 else array_ops.reshape(x,
                                                         [-1, num_input_units])
    y = standard_ops.matmul(x_2_dim, w)

    if bias_init is not None:
      bias_collections = set(list(bias_collections or []) +
                             [ops.GraphKeys.VARIABLES])
      b = variable_scope.get_variable('bias',
                                      shape=[num_output_units],
                                      dtype=dtype,
                                      initializer=bias_init,
                                      collections=bias_collections,
                                      regularizer=bias_regularizer,
                                      trainable=trainable)

      y = nn.bias_add(y, b)

    if len(dims) > 2:
      out_shape = array_ops.unpack(array_ops.shape(x))
      out_shape[-1] = num_output_units

      y = array_ops.reshape(y, array_ops.pack(out_shape))

      static_shape = x.get_shape().as_list()
      static_shape[-1] = num_output_units
      y.set_shape(static_shape)

    return _apply_activation(y, activation_fn, output_collections)
Example #58
0
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled,
                            num_classes, num_true=1,
                            sampled_values=None,
                            subtract_log_q=True,
                            remove_accidental_hits=False,
                            partition_strategy="mod",
                            name=None):
  """Helper function for nce_loss and sampled_softmax_loss functions.

  Computes sampled output training logits and labels suitable for implementing
  e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see
  sampled_softmax_loss).

  Note: In the case where num_true > 1, we assign to each target class
  the target probability 1 / num_true so that the target probabilities
  sum to 1 per-example.

  Args:
    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
        objects whose concatenation along dimension 0 has shape
        `[num_classes, dim]`.  The (possibly-partitioned) class embeddings.
    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
        activations of the input network.
    labels: A `Tensor` of type `int64` and shape `[batch_size,
        num_true]`. The target classes.  Note that this format differs from
        the `labels` argument of `nn.softmax_cross_entropy_with_logits`.
    num_sampled: An `int`.  The number of classes to randomly sample per batch.
    num_classes: An `int`. The number of possible classes.
    num_true: An `int`.  The number of target classes per training example.
    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
        (if None, we default to `log_uniform_candidate_sampler`)
    subtract_log_q: A `bool`.  whether to subtract the log expected count of
        the labels in the sample to get the logits of the true labels.
        Default is True.  Turn off for Negative Sampling.
    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
        where a sampled class equals one of the target classes.  Default is
        False.
    partition_strategy: A string specifying the partitioning strategy, relevant
        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: A name for the operation (optional).
  Returns:
    out_logits, out_labels: `Tensor` objects each with shape
        `[batch_size, num_true + num_sampled]`, for passing to either
        `nn.sigmoid_cross_entropy_with_logits` (NCE) or
        `nn.softmax_cross_entropy_with_logits` (sampled softmax).
  """

  if not isinstance(weights, list):
    weights = [weights]

  with ops.op_scope(
      weights + [biases, inputs, labels], name, "compute_sampled_logits"):
    if labels.dtype != dtypes.int64:
      labels = math_ops.cast(labels, dtypes.int64)
    labels_flat = array_ops.reshape(labels, [-1])

    # Sample the negative labels.
    #   sampled shape: [num_sampled] tensor
    #   true_expected_count shape = [batch_size, 1] tensor
    #   sampled_expected_count shape = [num_sampled] tensor
    if sampled_values is None:
      sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler(
          true_classes=labels,
          num_true=num_true,
          num_sampled=num_sampled,
          unique=True,
          range_max=num_classes)
    # NOTE: pylint cannot tell that 'sampled_values' is a sequence
    # pylint: disable=unpacking-non-sequence
    sampled, true_expected_count, sampled_expected_count = sampled_values
    # pylint: enable=unpacking-non-sequence

    # labels_flat is a [batch_size * num_true] tensor
    # sampled is a [num_sampled] int tensor
    all_ids = array_ops.concat(0, [labels_flat, sampled])

    # weights shape is [num_classes, dim]
    all_w = embedding_ops.embedding_lookup(
        weights, all_ids, partition_strategy=partition_strategy)
    all_b = embedding_ops.embedding_lookup(biases, all_ids)
    # true_w shape is [batch_size * num_true, dim]
    # true_b is a [batch_size * num_true] tensor
    true_w = array_ops.slice(
        all_w, [0, 0], array_ops.pack([array_ops.shape(labels_flat)[0], -1]))
    true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat))

    # inputs shape is [batch_size, dim]
    # true_w shape is [batch_size * num_true, dim]
    # row_wise_dots is [batch_size, num_true, dim]
    dim = array_ops.shape(true_w)[1:2]
    new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim])
    row_wise_dots = math_ops.mul(
        array_ops.expand_dims(inputs, 1),
        array_ops.reshape(true_w, new_true_w_shape))
    # We want the row-wise dot plus biases which yields a
    # [batch_size, num_true] tensor of true_logits.
    dots_as_matrix = array_ops.reshape(row_wise_dots,
                                       array_ops.concat(0, [[-1], dim]))
    true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true])
    true_b = array_ops.reshape(true_b, [-1, num_true])
    true_logits += true_b

    # Lookup weights and biases for sampled labels.
    #   sampled_w shape is [num_sampled, dim]
    #   sampled_b is a [num_sampled] float tensor
    sampled_w = array_ops.slice(
        all_w, array_ops.pack([array_ops.shape(labels_flat)[0], 0]), [-1, -1])
    sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1])

    # inputs has shape [batch_size, dim]
    # sampled_w has shape [num_sampled, dim]
    # sampled_b has shape [num_sampled]
    # Apply X*W'+B, which yields [batch_size, num_sampled]
    sampled_logits = math_ops.matmul(inputs,
                                     sampled_w,
                                     transpose_b=True) + sampled_b

    if remove_accidental_hits:
      acc_hits = candidate_sampling_ops.compute_accidental_hits(
          labels, sampled, num_true=num_true)
      acc_indices, acc_ids, acc_weights = acc_hits

      # This is how SparseToDense expects the indices.
      acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1])
      acc_ids_2d_int32 = array_ops.reshape(math_ops.cast(
          acc_ids, dtypes.int32), [-1, 1])
      sparse_indices = array_ops.concat(
          1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices")
      # Create sampled_logits_shape = [batch_size, num_sampled]
      sampled_logits_shape = array_ops.concat(
          0,
          [array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)])
      if sampled_logits.dtype != acc_weights.dtype:
        acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype)
      sampled_logits += sparse_ops.sparse_to_dense(
          sparse_indices, sampled_logits_shape, acc_weights,
          default_value=0.0, validate_indices=False)

    if subtract_log_q:
      # Subtract log of Q(l), prior probability that l appears in sampled.
      true_logits -= math_ops.log(true_expected_count)
      sampled_logits -= math_ops.log(sampled_expected_count)

    # Construct output logits and labels. The true labels/logits start at col 0.
    out_logits = array_ops.concat(1, [true_logits, sampled_logits])
    # true_logits is a float tensor, ones_like(true_logits) is a float tensor
    # of ones. We then divide by num_true to ensure the per-example labels sum
    # to 1.0, i.e. form a proper probability distribution.
    out_labels = array_ops.concat(
        1, [array_ops.ones_like(true_logits) / num_true,
            array_ops.zeros_like(sampled_logits)])

  return out_logits, out_labels
Example #59
0
def attention_decoder(decoder_inputs, initial_state, attention_states, cell,
                      output_size=None, num_heads=1, loop_function=None,
                      dtype=dtypes.float32, scope=None,
                      initial_state_attention=False):
  """RNN decoder with attention for the sequence-to-sequence model.

  Args:
    decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    initial_state: 2D Tensor [batch_size x cell.state_size].
    attention_states: 3D Tensor [batch_size x attn_length x attn_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    output_size: size of the output vectors; if None, we use cell.output_size.
    num_heads: number of attention heads that read from attention_states.
    loop_function: if not None, this function will be applied to i-th output
      in order to generate i+1-th input, and decoder_inputs will be ignored,
      except for the first element ("GO" symbol). This can be used for decoding,
      but also for training to emulate http://arxiv.org/pdf/1506.03099v2.pdf.
      Signature -- loop_function(prev, i) = next
        * prev is a 2D Tensor of shape [batch_size x cell.output_size],
        * i is an integer, the step number (when advanced control is needed),
        * next is a 2D Tensor of shape [batch_size x cell.input_size].
    dtype: The dtype to use for the RNN initial state (default: tf.float32).
    scope: VariableScope for the created subgraph; default: "attention_decoder".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states -- useful when we wish to resume decoding from a previously
      stored decoder state and attention states.

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors of shape
      [batch_size x output_size]. These represent the generated outputs.
      Output i is computed from input i (which is either i-th decoder_inputs or
      loop_function(output {i-1}, i)) as follows. First, we run the cell
      on a combination of the input and previous attention masks:
        cell_output, new_state = cell(linear(input, prev_attn), prev_state).
      Then, we calculate new attention masks:
        new_attn = softmax(V^T * tanh(W * attention_states + U * new_state))
      and then we calculate the output:
        output = linear(cell_output, new_attn).
    state: The state of each decoder cell the final time-step.
      It is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: when num_heads is not positive, there are no inputs, or shapes
      of attention_states are not set.
  """
  if not decoder_inputs:
    raise ValueError("Must provide at least 1 input to attention decoder.")
  if num_heads < 1:
    raise ValueError("With less than 1 heads, use a non-attention decoder.")
  if not attention_states.get_shape()[1:2].is_fully_defined():
    raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
                     % attention_states.get_shape())
  if output_size is None:
    output_size = cell.output_size

  with vs.variable_scope(scope or "attention_decoder"):
    batch_size = array_ops.shape(decoder_inputs[0])[0]  # Needed for reshaping.
    attn_length = attention_states.get_shape()[1].value
    attn_size = attention_states.get_shape()[2].value

    # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
    hidden = array_ops.reshape(
        attention_states, [-1, attn_length, 1, attn_size])
    hidden_features = []
    v = []
    attention_vec_size = attn_size  # Size of query vectors for attention.
    for a in xrange(num_heads):
      k = vs.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size])
      hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
      v.append(vs.get_variable("AttnV_%d" % a, [attention_vec_size]))

    state = initial_state

    def attention(query):
      """Put attention masks on hidden using hidden_features and query."""
      ds = []  # Results of attention reads will be stored here.
      for a in xrange(num_heads):
        with vs.variable_scope("Attention_%d" % a):
          y = rnn_cell.linear(query, attention_vec_size, True)
          y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
          # Attention mask is a softmax of v^T * tanh(...).
          s = math_ops.reduce_sum(
              v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
          a = nn_ops.softmax(s)
          # Now calculate the attention-weighted vector d.
          d = math_ops.reduce_sum(
              array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
              [1, 2])
          ds.append(array_ops.reshape(d, [-1, attn_size]))
      return ds

    outputs = []
    prev = None
    batch_attn_size = array_ops.pack([batch_size, attn_size])
    attns = [array_ops.zeros(batch_attn_size, dtype=dtype)
             for _ in xrange(num_heads)]
    for a in attns:  # Ensure the second shape of attention vectors is set.
      a.set_shape([None, attn_size])
    if initial_state_attention:
      attns = attention(initial_state)
    for i in xrange(len(decoder_inputs)):
      if i > 0:
        vs.get_variable_scope().reuse_variables()
      inp = decoder_inputs[i]
      # If loop_function is set, we use it instead of decoder_inputs.
      if loop_function is not None and prev is not None:
        with vs.variable_scope("loop_function", reuse=True):
          inp = array_ops.stop_gradient(loop_function(prev, i))
      # Merge input and previous attentions into one vector of the right size.
      x = rnn_cell.linear([inp] + attns, cell.input_size, True)
      # Run the RNN.
      cell_output, state = cell(x, state)
      # Run the attention mechanism.
      if i == 0 and initial_state_attention:
        with vs.variable_scope(vs.get_variable_scope(), reuse=True):
          attns = attention(state)
      else:
        attns = attention(state)

      with vs.variable_scope("AttnOutputProjection"):
        output = rnn_cell.linear([cell_output] + attns, output_size, True)
      if loop_function is not None:
        # We do not propagate gradients over the loop function.
        prev = array_ops.stop_gradient(output)
      outputs.append(output)

  return outputs, state
Example #60
0
def safe_embedding_lookup_sparse(embedding_weights,
                                 sparse_ids,
                                 sparse_weights=None,
                                 combiner=None,
                                 default_id=None,
                                 name=None,
                                 partition_strategy="div"):
    """Lookup embedding results, accounting for invalid IDs and empty features.

  The partitioned embedding in `embedding_weights` must all be the same shape
  except for the first dimension. The first dimension is allowed to vary as the
  vocabulary size is not necessarily a multiple of `P`.  `embedding_weights`
  may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a
  partitioner.

  Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs
  with non-positive weight. For an entry with no features, the embedding vector
  for `default_id` is returned, or the 0-vector if `default_id` is not supplied.

  The ids and weights may be multi-dimensional. Embeddings are always aggregated
  along the last dimension.

  Args:
    embedding_weights:  A list of `P` float tensors or values representing
        partitioned embedding tensors.  Alternatively, a `PartitionedVariable`,
        created by partitioning along dimension 0.  The total unpartitioned
        shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the
        vocab size and `e_1, ..., e_m` are the embedding dimensions.
    sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the
        ids. `d_0` is typically batch size.
    sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
        float weights corresponding to `sparse_ids`, or `None` if all weights
        are be assumed to be 1.0.
    combiner: A string specifying how to combine embedding results for each
        entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean"
        the default.
    default_id: The id to use for an entry with no features.
    name: A name for this operation (optional).
    partition_strategy: A string specifying the partitioning strategy.
        Currently `"div"` and `"mod"` are supported. Default is `"div"`.


  Returns:
    Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`.

  Raises:
    ValueError: if `embedding_weights` is empty.
  """
    if combiner is None:
        logging.warn("The default value of combiner will change from \"mean\" "
                     "to \"sqrtn\" after 2016/11/01.")
        combiner = "mean"
    if embedding_weights is None or len(embedding_weights) < 1:
        raise ValueError("Missing embedding_weights %s." % embedding_weights)

    dtype = sparse_weights.dtype if sparse_weights is not None else None
    if isinstance(embedding_weights, variables.PartitionedVariable):
        embedding_weights = list(embedding_weights)
    embedding_weights = [
        ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
    ]

    contrib_tensor_util.assert_same_float_dtype(embedding_weights +
                                                [sparse_weights])

    with ops.name_scope(name, "embedding_lookup", embedding_weights +
                        [sparse_ids, sparse_weights]) as scope:
        # Reshape higher-rank sparse ids and weights to linear segment ids.
        original_shape = sparse_ids.shape
        original_rank_dim = sparse_ids.shape.get_shape()[0]
        original_rank = (array_ops.size(original_shape)
                         if original_rank_dim.value is None else
                         original_rank_dim.value)
        sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [
            math_ops.reduce_prod(
                array_ops.slice(original_shape, [0], [original_rank - 1])),
            array_ops.gather(original_shape, original_rank - 1)
        ])
        if sparse_weights is not None:
            sparse_weights = ops.SparseTensor(sparse_ids.indices,
                                              sparse_weights.values,
                                              sparse_ids.shape)

        # Prune invalid ids and weights.
        sparse_ids, sparse_weights = _prune_invalid_ids(
            sparse_ids, sparse_weights)

        # Fill in dummy values for empty features, if necessary.
        sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
            sparse_ids, default_id or 0)
        if sparse_weights is not None:
            sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(
                sparse_weights, 1.0)

        result = embedding_ops.embedding_lookup_sparse(
            embedding_weights,
            sparse_ids,
            sparse_weights,
            combiner=combiner,
            partition_strategy=partition_strategy,
            name=None if default_id is None else scope)

        if default_id is None:
            # Broadcast is_row_empty to the same shape as embedding_lookup_result,
            # for use in Select.
            is_row_empty = array_ops.tile(
                array_ops.reshape(is_row_empty, [-1, 1]),
                array_ops.pack([1, array_ops.shape(result)[1]]))

            result = math_ops.select(is_row_empty,
                                     array_ops.zeros_like(result),
                                     result,
                                     name=scope)

        # Reshape back from linear ids back into higher-dimensional dense result.
        final_result = array_ops.reshape(
            result,
            array_ops.concat(0, [
                array_ops.slice(math_ops.cast(original_shape, dtypes.int32),
                                [0], [original_rank - 1]),
                array_ops.slice(array_ops.shape(result), [1], [-1])
            ]))
        final_result.set_shape(
            tensor_shape.unknown_shape(
                (original_rank_dim - 1).value).concatenate(
                    result.get_shape()[1:]))
        return final_result