Example #1
0
def frames(signal, frame_length, frame_step, name=None):
  """Frame a signal into overlapping frames.

  May be used in front of spectral functions.

  For example:

  ```python
  pcm = tf.placeholder(tf.float32, [None, 9152])
  frames = tf.contrib.signal.frames(pcm, 512, 180)
  magspec = tf.abs(tf.spectral.rfft(frames, [512]))
  image = tf.expand_dims(magspec, 3)
  ```

  Args:
    signal: A `Tensor` of shape `[batch_size, signal_length]`.
    frame_length: An `int32` or `int64` `Tensor`. The length of each frame.
    frame_step: An `int32` or `int64` `Tensor`. The step between frames.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of frames with shape `[batch_size, num_frames, frame_length]`.

  Raises:
    ValueError: if signal does not have rank 2.
  """
  with ops.name_scope(name, "frames", [signal, frame_length, frame_step]):
    signal = ops.convert_to_tensor(signal, name="signal")
    frame_length = ops.convert_to_tensor(frame_length, name="frame_length")
    frame_step = ops.convert_to_tensor(frame_step, name="frame_step")

    signal_rank = signal.shape.ndims

    if signal_rank != 2:
      raise ValueError("expected signal to have rank 2 but was " + signal_rank)

    signal_length = array_ops.shape(signal)[1]

    num_frames = math_ops.ceil((signal_length - frame_length) / frame_step)
    num_frames = 1 + math_ops.cast(num_frames, dtypes.int32)

    pad_length = (num_frames - 1) * frame_step + frame_length
    pad_signal = array_ops.pad(signal, [[0, 0], [0,
                                                 pad_length - signal_length]])

    indices_frame = array_ops.expand_dims(math_ops.range(frame_length), 0)
    indices_frames = array_ops.tile(indices_frame, [num_frames, 1])

    indices_step = array_ops.expand_dims(
        math_ops.range(num_frames) * frame_step, 1)
    indices_steps = array_ops.tile(indices_step, [1, frame_length])

    indices = indices_frames + indices_steps

    # TODO(androbin): remove `transpose` when `gather` gets `axis` support
    pad_signal = array_ops.transpose(pad_signal)
    signal_frames = array_ops.gather(pad_signal, indices)
    signal_frames = array_ops.transpose(signal_frames, perm=[2, 0, 1])

    return signal_frames
def _mask_probs(probs, eos_token, finished):
  """Masks log probabilities.

  The result is that finished beams allocate all probability mass to eos and
  unfinished beams remain unchanged.

  Args:
    probs: Log probabiltiies of shape `[batch_size, beam_width, vocab_size]`
    eos_token: An int32 id corresponding to the EOS token to allocate
      probability to.
    finished: A boolean tensor of shape `[batch_size, beam_width]` that
      specifies which elements in the beam are finished already.

  Returns:
    A tensor of shape `[batch_size, beam_width, vocab_size]`, where unfinished
    beams stay unchanged and finished beams are replaced with a tensor with all
    probability on the EOS token.
  """
  vocab_size = array_ops.shape(probs)[2]
  # All finished examples are replaced with a vector that has all
  # probability on EOS
  finished_row = array_ops.one_hot(
      eos_token,
      vocab_size,
      dtype=probs.dtype,
      on_value=0.,
      off_value=probs.dtype.min)
  finished_probs = array_ops.tile(
      array_ops.reshape(finished_row, [1, 1, -1]),
      array_ops.concat([array_ops.shape(finished), [1]], 0))
  finished_mask = array_ops.tile(
      array_ops.expand_dims(finished, 2), [1, 1, vocab_size])

  return array_ops.where(finished_mask, finished_probs, probs)
Example #3
0
def _SumGrad(op, grad):
  """Gradient for Sum."""
  # Fast path for when reducing to a scalar and ndims is known: adds only
  # Reshape and Tile ops (and possibly a Shape).
  input_0_shape = op.inputs[0]._shape_tuple()  # pylint: disable=protected-access
  if input_0_shape is not None:
    axes = tensor_util.constant_value(op.inputs[1])
    if axes is not None:
      rank = len(input_0_shape)
      if np.array_equal(axes, np.arange(rank)):  # Reduce all dims.
        grad = array_ops.reshape(grad, [1] * rank)
        # If shape is not fully defined (but rank is), we use Shape.
        if None not in input_0_shape:
          input_shape = input_0_shape
        else:
          input_shape = array_ops.shape(op.inputs[0])
        return [array_ops.tile(grad, input_shape), None]

  input_shape = array_ops.shape(op.inputs[0])
  # TODO(apassos) remove this once device placement for eager ops makes more
  # sense.
  with ops.colocate_with(input_shape):
    output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
    tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims)
  grad = array_ops.reshape(grad, output_shape_kept_dims)
  return [array_ops.tile(grad, tile_scaling), None]
Example #4
0
  def to_weighted_sum(self,
                      input_tensor,
                      num_outputs=1,
                      weight_collections=None,
                      trainable=True):
    """Returns a Tensor as linear predictions and a list of created Variable."""
    dimension = self.source_column.dimension
    batch_size = array_ops.shape(input_tensor)[0]

    if dimension > 1:
      i1 = array_ops.reshape(array_ops.tile(array_ops.expand_dims(
          math_ops.range(0, batch_size), 1), [1, dimension]), [-1])
      i2 = array_ops.tile(math_ops.range(0, dimension), [batch_size])
      # Flatten the bucket indices and unique them across dimensions
      # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets
      # TODO(chapelle): move that logic to insert_transformed_feature to ensure
      #   unique buckets across dimensions after crossing.
      bucket_indices = array_ops.reshape(input_tensor, [-1]) + self.length * i2
    else:
      # Simpler indices when dimension=1
      i1 = math_ops.range(0, batch_size)
      i2 = array_ops.zeros([batch_size], dtype=dtypes.int32)
      bucket_indices = array_ops.reshape(input_tensor, [-1])

    indices = math_ops.to_int64(array_ops.transpose(array_ops.pack((i1, i2))))
    shape = math_ops.to_int64(array_ops.pack([batch_size, 1]))
    sparse_id_values = ops.SparseTensor(indices, bucket_indices, shape)
    vocab_size = self.length * self.source_column.dimension

    return _create_embedding_lookup(
        sparse_id_values, vocab_size, num_outputs,
        _add_variable_collection(weight_collections), 0., "sum",
        trainable, self.name + "_weights")
Example #5
0
  def testShapeFunctionEdgeCases(self):
    # Unknown multiples shape.
    inp = constant_op.constant(0.0, shape=[4, 4, 4, 4])
    tiled = array_ops.tile(inp, array_ops.placeholder(dtypes.int32))
    self.assertEqual([None, None, None, None], tiled.get_shape().as_list())

    # Unknown input shape.
    inp = array_ops.placeholder(dtypes.float32)
    tiled = array_ops.tile(inp, [2, 2, 2, 2])
    self.assertEqual([None, None, None, None], tiled.get_shape().as_list())

    # Unknown input and multiples shape.
    inp = array_ops.placeholder(dtypes.float32)
    tiled = array_ops.tile(inp, array_ops.placeholder(dtypes.int32))
    self.assertIs(None, tiled.get_shape().ndims)

    # Known input and partially known multiples.
    inp = constant_op.constant(0.0, shape=[1, 1])
    tiled = array_ops.tile(inp, [array_ops.placeholder(dtypes.int32), 7])
    self.assertEqual([None, 7], tiled.get_shape().as_list())

    # Mismatched input rank and multiples length.
    inp = array_ops.placeholder(dtypes.float32, shape=[None, None])
    with self.assertRaises(ValueError):
      tiled = array_ops.tile(
          inp, array_ops.placeholder(
              dtypes.int32, shape=[3]))
def gather_tree_from_array(t, parent_ids, sequence_length):
  """Calculates the full beams for `TensorArray`s.

  Args:
    t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of
      shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]`
      where `s` is the depth shape.
    parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`.
    sequence_length: The sequence length of shape `[batch_size, beam_width]`.

  Returns:
    A `Tensor` which is a stacked `TensorArray` of the same size and type as
    `t` and where beams are sorted in each `Tensor` according to `parent_ids`.
  """
  max_time = parent_ids.shape[0].value or array_ops.shape(parent_ids)[0]
  batch_size = parent_ids.shape[1].value or array_ops.shape(parent_ids)[1]
  beam_width = parent_ids.shape[2].value or array_ops.shape(parent_ids)[2]

  # Generate beam ids that will be reordered by gather_tree.
  beam_ids = array_ops.expand_dims(
      array_ops.expand_dims(math_ops.range(beam_width), 0), 0)
  beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1])

  mask = array_ops.sequence_mask(
      sequence_length, maxlen=max_time, dtype=dtypes.int32)
  mask = array_ops.transpose(mask, perm=[2, 0, 1])

  # Use beam_width + 1 to mark the end of beam.
  masked_beam_ids = (beam_ids * mask) + (1 - mask) * (beam_width + 1)

  max_sequence_lengths = math_ops.to_int32(
      math_ops.reduce_max(sequence_length, axis=1))
  sorted_beam_ids = beam_search_ops.gather_tree(
      step_ids=masked_beam_ids,
      parent_ids=parent_ids,
      max_sequence_lengths=max_sequence_lengths,
      end_token=beam_width + 1)

  # For out of range steps, simply copy the same beam.
  sorted_beam_ids = array_ops.where(
      math_ops.cast(mask, dtypes.bool), x=sorted_beam_ids, y=beam_ids)

  # Generate indices for gather_nd.
  time_ind = array_ops.tile(array_ops.reshape(
      math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width])
  batch_ind = array_ops.tile(array_ops.reshape(
      math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width])
  batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2])
  indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1)

  # Gather from a tensor with collapsed additional dimensions.
  gather_from = t
  final_shape = array_ops.shape(gather_from)
  gather_from = array_ops.reshape(
      gather_from, [max_time, batch_size, beam_width, -1])
  ordered = array_ops.gather_nd(gather_from, indices)
  ordered = array_ops.reshape(ordered, final_shape)

  return ordered
Example #7
0
 def testInvalidDim(self):
   with self.test_session():
     inp = np.random.rand(4, 1).astype("f")
     a = constant_op.constant(
         [float(x) for x in inp.ravel(order="C")],
         shape=[4, 1],
         dtype=dtypes.float32)
     # Wrong length of multiples.
     with self.assertRaises(ValueError):
       array_ops.tile(a, [1, 4, 2])
     # Wrong rank for multiples.
     with self.assertRaises(ValueError):
       array_ops.tile(a, [[2, 3], [3, 4]]).eval()
 def make_tril_ids(n):
   """Internal helper to create vector of linear indices into y."""
   cols = array_ops.reshape(array_ops.tile(math_ops.range(n), [n]), [n, n])
   rows = array_ops.tile(
       array_ops.expand_dims(math_ops.range(n), -1), [1, n])
   pred = math_ops.greater(cols, rows)
   tril_ids = array_ops.tile(array_ops.reshape(
       math_ops.cumsum(math_ops.range(n)), [n, 1]), [1, n]) + cols
   tril_ids = math_ops.select(pred,
                              array_ops.zeros([n, n], dtype=dtypes.int32),
                              tril_ids + 1)
   tril_ids = array_ops.reshape(tril_ids, [-1])
   return tril_ids
Example #9
0
  def _initialize_variables(self, data, initial_means=None):
    """Initializes variables.

    Args:
      data: a list of Tensors with data, each row is a new example.
      initial_means: a Tensor with a matrix of means.
    """
    first_shard = data[0]
    # Initialize means: num_classes X 1 X dimensions.
    if initial_means is not None:
      means = array_ops.expand_dims(initial_means, 1)
    else:
      # Sample data randomly
      means = array_ops.expand_dims(
          _init_clusters_random(data, self._num_classes, self._random_seed), 1)

    # Initialize covariances.
    if self._covariance_type == FULL_COVARIANCE:
      cov = _covariance(first_shard, False) + self._min_var
      # A matrix per class, num_classes X dimensions X dimensions
      covs = array_ops.tile(
          array_ops.expand_dims(cov, 0), [self._num_classes, 1, 1])
    elif self._covariance_type == DIAG_COVARIANCE:
      cov = _covariance(first_shard, True) + self._min_var
      # A diagonal per row, num_classes X dimensions.
      covs = array_ops.tile(
          array_ops.expand_dims(array_ops.diag_part(cov), 0),
          [self._num_classes, 1])

    with ops.colocate_with(self._cluster_centers_initialized):
      initialized = control_flow_ops.with_dependencies(
          [means, covs],
          array_ops.identity(self._cluster_centers_initialized))
    self._init_ops = []
    with ops.colocate_with(self._means):
      init_means = state_ops.assign(self._means, means, validate_shape=False)
      init_means = control_flow_ops.with_dependencies(
          [init_means],
          state_ops.assign(self._cluster_centers_initialized, True))
      self._init_ops.append(control_flow_ops.cond(initialized,
                                                  control_flow_ops.no_op,
                                                  lambda: init_means).op)
    with ops.colocate_with(self._covs):
      init_covs = state_ops.assign(self._covs, covs, validate_shape=False)
      init_covs = control_flow_ops.with_dependencies(
          [init_covs],
          state_ops.assign(self._cluster_centers_initialized, True))
      self._init_ops.append(control_flow_ops.cond(initialized,
                                                  control_flow_ops.no_op,
                                                  lambda: init_covs).op)
    def runFiniteDifferences(self, shapes, dtypes=(dtypes_lib.float32, dtypes_lib.float64), scalarTest=False):
        with self.test_session(use_gpu=False):
            for shape in shapes:
                for batch in False, True:
                    for dtype in dtypes:
                        if not scalarTest:
                            x = constant_op.constant(np.random.randn(shape[0], shape[1]), dtype)
                            tensor = math_ops.matmul(x, array_ops.transpose(x)) / shape[0]
                        else:
                            # This is designed to be a faster test for larger matrices.
                            x = constant_op.constant(np.random.randn(), dtype)
                            R = constant_op.constant(np.random.randn(shape[0], shape[1]), dtype)
                            e = math_ops.mul(R, x)
                            tensor = math_ops.matmul(e, array_ops.transpose(e)) / shape[0]

                        # Inner-most matrices in tensor are positive definite.
                        if batch:
                            tensor = array_ops.tile(array_ops.expand_dims(tensor, 0), [4, 1, 1])
                        y = linalg_ops.cholesky(tensor)
                        if scalarTest:
                            y = math_ops.reduce_mean(y)
                        error = gradient_checker.compute_gradient_error(x, x._shape_as_list(), y, y._shape_as_list())
                        tf_logging.info("error = %f", error)
                        if dtype == dtypes_lib.float64:
                            self.assertLess(error, 1e-5)
                        else:
                            self.assertLess(error, 3e-3)
 def _entropy(self):
   if (not self.distribution.is_continuous or
       not self.bijector.is_constant_jacobian):
     raise NotImplementedError("entropy is not implemented")
   # Suppose Y = g(X) where g is a diffeomorphism and X is a continuous rv. It
   # can be shown that:
   #   H[Y] = H[X] + E_X[(log o abs o det o J o g)(X)].
   # If is_constant_jacobian then:
   #   E_X[(log o abs o det o J o g)(X)] = (log o abs o det o J o g)(c)
   # where c can by anything.
   entropy = self.distribution.entropy()
   if self._is_maybe_event_override:
     # H[X] = sum_i H[X_i] if X_i are mutually independent.
     # This means that a reduce_sum is a simple rescaling.
     entropy *= math_ops.cast(math_ops.reduce_prod(self._override_event_shape),
                              dtype=entropy.dtype.base_dtype)
   if self._is_maybe_batch_override:
     new_shape = array_ops.concat([
         _ones_like(self._override_batch_shape),
         self.distribution.batch_shape_tensor()
     ], 0)
     entropy = array_ops.reshape(entropy, new_shape)
     multiples = array_ops.concat([
         self._override_batch_shape,
         _ones_like(self.distribution.batch_shape_tensor())
     ], 0)
     entropy = array_ops.tile(entropy, multiples)
   dummy = array_ops.zeros([], self.dtype)
   entropy -= self.bijector.inverse_log_det_jacobian(dummy)
   entropy.set_shape(self.batch_shape)
   return entropy
Example #12
0
def _align_matrices(x, y):
  """Aligns x and y tensors to allow computations over pairs of their rows."""
  x_matrix = _to_matrix(x)
  y_matrix = _to_matrix(y)
  x_shape = x_matrix.shape
  y_shape = y_matrix.shape
  if y_shape[1] != x_shape[1]:  # dimensions do not match.
    raise ValueError(
        'The outermost dimensions of the input tensors should match. Given: {} '
        'vs {}.'.format(y_shape[1], x_shape[1]))

  x_tile = array_ops.tile(
      array_ops.expand_dims(x_matrix, 1), [1, y_shape[0], 1])
  y_tile = array_ops.tile(
      array_ops.expand_dims(y_matrix, 0), [x_shape[0], 1, 1])
  return x_tile, y_tile
  def testPrefetchBufferUtilization(self, dataset_transformation):
    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_ops.Dataset.range(100).map(
        lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch(-1)
    dataset = dataset_transformation(dataset, aggregator)
    iterator = dataset_ops.make_initializable_iterator(dataset)
    next_element = iterator.get_next()
    summary_t = aggregator.get_summary()

    with self.cached_session() as sess:
      self.evaluate(iterator.initializer)
      for i in range(100):
        self.assertAllEqual(
            np.array([i] * i, dtype=np.int64), self.evaluate(next_element))
        summary_str = self.evaluate(summary_t)
        self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
                                    float(i + 1))
        self._assertSummaryContains(summary_str, "Prefetch::buffer_capacity")
        self._assertSummaryContains(summary_str, "Prefetch::buffer_size")
        self._assertSummaryHasRange(summary_str, "Prefetch::buffer_utilization",
                                    0, 1)
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)
      summary_str = self.evaluate(summary_t)
      self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
                                  100)
 def dataset_fn():
   dataset = dataset_ops.Dataset.range(10).map(
       lambda x: array_ops.tile([x], ops.convert_to_tensor([x])),
       num_parallel_calls=optimization.AUTOTUNE)
   options = dataset_ops.Options()
   options.experimental_autotune = True
   return dataset.with_options(options)
Example #15
0
  def inverse_stft_window_fn_inner(frame_length, dtype):
    """Computes a window that can be used in `inverse_stft`.

    Args:
      frame_length: An integer scalar `Tensor`. The window length in samples.
      dtype: Data type of waveform passed to `stft`.

    Returns:
      A window suitable for reconstructing original waveform in `inverse_stft`.

    Raises:
      ValueError: If `frame_length` is not scalar, `forward_window_fn` is not a
      callable that takes a window length and a `dtype` keyword argument and
      returns a `[window_length]` `Tensor` of samples in the provided datatype
      `frame_step` is not scalar, or `frame_step` is not scalar.
    """
    with ops.name_scope(name, 'inverse_stft_window_fn', [forward_window_fn]):
      frame_length = ops.convert_to_tensor(frame_length, name='frame_length')
      frame_length.shape.assert_has_rank(0)

      # Use equation 7 from Griffin + Lim.
      forward_window = forward_window_fn(frame_length, dtype=dtype)
      denom = math_ops.square(forward_window)
      overlaps = -(-frame_length // frame_step)  # Ceiling division.
      denom = array_ops.pad(denom, [(0, overlaps * frame_step - frame_length)])
      denom = array_ops.reshape(denom, [overlaps, frame_step])
      denom = math_ops.reduce_sum(denom, 0, keep_dims=True)
      denom = array_ops.tile(denom, [overlaps, 1])
      denom = array_ops.reshape(denom, [overlaps * frame_step])

      return forward_window / denom[:frame_length]
def _ApplyLengthsToBatch(sequence_lengths, tf_output):
  # TODO(drpng): just use Update so that we don't carry over the gradients?
  """Sets the output to be zero at the end of the sequence."""
  # output is batch major.
  batch_size, max_time, vector_size = tf_output.shape
  output_time = array_ops.tile(math_ops.range(0, max_time), [batch_size])
  output_time = array_ops.reshape(output_time, [batch_size, max_time])
  lengths = array_ops.tile(
      array_ops.reshape(sequence_lengths, [-1, 1]), [1, max_time])
  is_less = math_ops.cast(
      math_ops.less(output_time, lengths), dtype=dtypes.float32)
  keep_mask = array_ops.tile(
      array_ops.expand_dims(is_less, -1),
      [1, 1, vector_size])
  final_output = keep_mask * tf_output
  return final_output
Example #17
0
def _BiasAddGradGrad(op, received_grad):
  """Gradient for the BiasAddGrad op.

  Args:
    op: BiasAddGrad op for which we are calculating gradients.
    received_grad: The gradients passed to the BiasAddGrad op.

  Returns:
    A single gradient Tensor for the input to BiasAddGrad (which
    is the gradient of the bias term in BiasAdd)
  """

  try:
    data_format = op.get_attr("data_format")
  except ValueError:
    data_format = None

  shape = array_ops.shape(op.inputs[0])
  rank = array_ops.rank(op.inputs[0])
  bias_shape = array_ops.shape(received_grad)

  if data_format == b"NCHW":
    expanded_shape = array_ops.concat([
        array_ops.ones_like(shape[:-3]), bias_shape,
        array_ops.ones_like(shape[-2:])
    ], 0)
    tile_mults = array_ops.concat([shape[:-3], [1], shape[-2:]], 0)
  else:
    expanded_shape = array_ops.concat(
        [array_ops.ones_like(shape[:-1]), bias_shape], 0)
    tile_mults = array_ops.concat([shape[:-1], [1]], 0)

  expanded_grad = array_ops.reshape(received_grad, expanded_shape)
  return array_ops.tile(expanded_grad, tile_mults)
Example #18
0
  def testUnknownInputShape(self):
    """Importing can call _TileShape without shape of <multiples> known."""
    with self.test_session():
      inp = array_ops.placeholder(dtypes.float32)  # unknown shape
      multiples = constant_op.constant([1, 2, 3, 4], dtype=np.int32)
      tiled = array_ops.tile(inp, multiples)
      gdef = tiled.graph.as_graph_def()

      # Move the tile op to the start of the graph so that shapes of its inputs
      # are not available when the shape function runs on import.
      swapped = False
      for i, n in enumerate(gdef.node):
        if n.op == "Tile":
          # Swap tile op to be first in gdef.node
          assert i != 0
          new_node = node_def_pb2.NodeDef()
          new_node.CopyFrom(gdef.node[i])
          gdef.node[i].CopyFrom(gdef.node[0])
          gdef.node[0].CopyFrom(new_node)
          swapped = True
      assert swapped

      tiled_imported, = importer.import_graph_def(
          gdef, return_elements=[tiled.name])
      self.assertEqual(4, tiled_imported.get_shape().ndims)
Example #19
0
def _SumGrad(op, grad):
  """Gradient for Sum."""
  input_shape = array_ops.shape(op.inputs[0])
  output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
  tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims)
  grad = array_ops.reshape(grad, output_shape_kept_dims)
  return [array_ops.tile(grad, tile_scaling), None]
Example #20
0
 def _model_start_state_placeholders(
     self, batch_size_tensor, static_batch_size=None):
   """Creates placeholders with zeroed start state for the current model."""
   gathered_state = {}
   # Models may not know the shape of their state without creating some
   # variables/ops. Avoid polluting the default graph by making a new one. We
   # use only static metadata from the returned Tensors.
   with ops.Graph().as_default():
     self._model.initialize_graph()
     # Evaluate the initial state as same-dtype "zero" values. These zero
     # constants aren't used, but are necessary for feeding to
     # placeholder_with_default for the "cold start" case where state is not
     # fed to the model.
     def _zeros_like_constant(tensor):
       return tensor_util.constant_value(array_ops.zeros_like(tensor))
     start_state = nest.map_structure(
         _zeros_like_constant, self._model.get_start_state())
   for prefixed_state_name, state in ts_head_lib.state_to_dictionary(
       start_state).items():
     state_shape_with_batch = tensor_shape.TensorShape(
         (static_batch_size,)).concatenate(state.shape)
     default_state_broadcast = array_ops.tile(
         state[None, ...],
         multiples=array_ops.concat(
             [batch_size_tensor[None],
              array_ops.ones(len(state.shape), dtype=dtypes.int32)],
             axis=0))
     gathered_state[prefixed_state_name] = array_ops.placeholder_with_default(
         input=default_state_broadcast,
         name=prefixed_state_name,
         shape=state_shape_with_batch)
   return gathered_state
  def get_placements(self, *args, **kwargs):
    num_children = self.hparams.num_children
    with variable_scope.variable_scope("controller_{}".format(self.ctrl_id)):
      actions_cache = variable_scope.get_local_variable(
          "actions_cache",
          initializer=init_ops.zeros_initializer,
          dtype=dtypes.int32,
          shape=[num_children, self.num_groups],
          trainable=False)

    x = array_ops.tile(self.seq2seq_input_layer, [num_children, 1, 1])
    last_c, last_h, attn_mem = self.encode(x)
    actions, log_probs = {}, {}
    actions["sample"], log_probs["sample"] = (
        self.decode(
            x, last_c, last_h, attn_mem, mode="sample"))
    actions["target"], log_probs["target"] = (
        self.decode(
            x,
            last_c,
            last_h,
            attn_mem,
            mode="target",
            y=actions_cache))
    actions["greedy"], log_probs["greedy"] = (
        self.decode(
            x, last_c, last_h, attn_mem, mode="greedy"))
    actions["sample"] = control_flow_ops.cond(
        self.global_step < self.hparams.stop_sampling,
        lambda: state_ops.assign(actions_cache, actions["sample"]),
        lambda: state_ops.assign(actions_cache, actions["target"]))
    self.actions_cache = actions_cache

    return actions, log_probs
  def testTile(self):
    if test.is_gpu_available(cuda_only=True):
      random_seed.set_random_seed(0)
      x = random_ops.truncated_normal([1, 784], seed=0)
      conv = _two_layer_model(x)
      multiple = array_ops.placeholder(dtype='int32')
      tile = array_ops.tile(conv, multiple)
      output = array_ops.identity(tile)

      multiple_val = [2, 3, 4, 1]
      with session.Session() as sess:
        output_val_ref = sess.run(output, feed_dict={multiple: multiple_val})

      with session.Session(config=_get_config()) as sess:
        metadata = config_pb2.RunMetadata()
        output_val = sess.run(
            output, run_metadata=metadata, feed_dict={
                multiple: multiple_val
            })

      nodes = []
      num_transposes = 0
      for node in metadata.cost_graph.node:
        if _is_transpose(node.name):
          num_transposes += 1
        nodes.append(node.name)

      # Four transposes were initially added in the Expand phase of
      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
      expected_num_transposes = 2
      self.assertEqual(expected_num_transposes, num_transposes)
      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
      self._assert_trans_nchw_to_nhwc('Tile-0-0', nodes)
      self._assert_vec_nhwc_to_nchw('Tile-1', nodes)
      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
  def testBytesProduced(self):
    stats_aggregator = stats_ops.StatsAggregator()
    dataset = dataset_ops.Dataset.range(100).map(
        lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply(
            stats_ops.bytes_produced_stats("bytes_produced")).apply(
                stats_ops.set_stats_aggregator(stats_aggregator))
    iterator = dataset.make_initializable_iterator()
    next_element = iterator.get_next()
    summary_t = stats_aggregator.get_summary()

    with self.test_session() as sess:
      sess.run(iterator.initializer)
      expected_sum = 0.0
      for i in range(100):
        self.assertAllEqual(
            np.array([i] * i, dtype=np.int64), sess.run(next_element))
        summary_str = sess.run(summary_t)
        self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1))
        expected_sum += i * 8.0
        self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)
      summary_str = sess.run(summary_t)
      self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0)
      self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
 def testPrefetchBufferUtilization(self, dataset_transformation):
   aggregator = stats_aggregator.StatsAggregator()
   dataset = dataset_ops.Dataset.range(100).map(
       lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch(-1)
   dataset = dataset_transformation(dataset, aggregator)
   next_element = self.getNext(dataset, requires_initialization=True)
   for i in range(100):
     self.assertAllEqual(
         np.array([i] * i, dtype=np.int64), self.evaluate(next_element()))
     summary_str = self.evaluate(aggregator.get_summary())
     self._assertSummaryHasCount(
         summary_str,
         self.regexForNodeName("PrefetchDataset", "buffer_utilization"),
         float(i + 1))
     self._assertSummaryContains(
         summary_str,
         self.regexForNodeName("PrefetchDataset", "buffer_capacity"))
     self._assertSummaryContains(
         summary_str, self.regexForNodeName("PrefetchDataset", "buffer_size"))
     self._assertSummaryHasRange(
         summary_str,
         self.regexForNodeName("PrefetchDataset", "buffer_utilization"), 0, 1)
   with self.assertRaises(errors.OutOfRangeError):
     self.evaluate(next_element())
   summary_str = self.evaluate(aggregator.get_summary())
   self._assertSummaryHasCount(
       summary_str,
       self.regexForNodeName("PrefetchDataset", "buffer_utilization"), 100)
Example #25
0
 def testTypes(self):
   types_to_test = {
       "bool": (dtypes.bool, bool),
       "float32": (dtypes.float32, float),
       "float64": (dtypes.float64, float),
       "complex64": (dtypes.complex64, complex),
       "complex128": (dtypes.complex128, complex),
       "uint8": (dtypes.uint8, int),
       "int8": (dtypes.int8, int),
       "int16": (dtypes.int16, int),
       "int32": (dtypes.int32, int),
       "int64": (dtypes.int64, int),
       bytes: (dtypes.string, bytes)
   }
   for dtype_np, (dtype_tf, cast) in types_to_test.items():
     with self.cached_session(use_gpu=True):
       inp = np.random.rand(4, 1).astype(dtype_np)
       a = constant_op.constant(
           [cast(x) for x in inp.ravel(order="C")],
           shape=[4, 1],
           dtype=dtype_tf)
       tiled = array_ops.tile(a, [1, 4])
       result = self.evaluate(tiled)
     self.assertEqual(result.shape, (4, 4))
     self.assertEqual([4, 4], tiled.get_shape())
     self.assertAllEqual(result, np.tile(inp, (1, 4)))
Example #26
0
def _centered_bias_step(targets, loss_fn, num_label_columns):
  centered_bias = ops.get_collection("centered_bias")
  batch_size = array_ops.shape(targets)[0]
  logits = array_ops.reshape(
      array_ops.tile(centered_bias[0], [batch_size]),
      [batch_size, num_label_columns])
  loss = loss_fn(logits, targets)
  return train.AdagradOptimizer(0.1).minimize(loss, var_list=centered_bias)
Example #27
0
def _ProdGrad(op, grad):
  """Gradient for Prod."""
  # TODO(kearnes): this gives NaNs for 0s in the input tensor
  _, new_output_shape, input_shape = _ReductionGradAssist(op)
  tile_scaling = input_shape // new_output_shape
  grad = array_ops.reshape(grad * op.outputs[0], new_output_shape)
  grad = math_ops.div(array_ops.tile(grad, tile_scaling), op.inputs[0])
  return grad, None
Example #28
0
 def testEmpty(self):
   with self.test_session():
     inp = np.random.rand(2, 3).astype(np.float32)
     a = constant_op.constant(inp)
     tiled = array_ops.tile(a, [5, 0])
     result = tiled.eval()
   self.assertEqual(result.shape, (10, 0))
   self.assertEqual([10, 0], tiled.get_shape())
Example #29
0
 def testGradientStridedReductionGC(self):
   with self.test_session():
     inp = np.random.rand(4, 2).astype("f")
     a = constant_op.constant(
         [float(x) for x in inp.flatten()], shape=[4, 2], dtype=dtypes.float32)
     tiled = array_ops.tile(a, [1, 2])
     err = gradient_checker.compute_gradient_error(a, [4, 2], tiled, [4, 4])
   self.assertLess(err, 1e-3)
Example #30
0
def _tile_ragged_values(rt_input, multiples, const_multiples=None):
  """Builds flat_values tensor for a tiled `RaggedTensor`.

  Returns a tensor that repeats the values in
  `rt_input.flat_values` in the
  appropriate pattern to construct a `RaggedTensor` that tiles `rt_input` as
  specified by `multiples`.

  Args:
    rt_input: The `RaggedTensor` whose values should be repeated.
    multiples: A 1-D integer `tensor`, indicating how many times each dimension
      should be repeated.
    const_multiples: Optional constant value for multiples.  Used to skip tiling
      dimensions where `multiples=1`.

  Returns:
    A `Tensor` with the same type and rank as `rt_input.flat_values`.

  #### Example:
    ```python
    >>> rt = tf.ragged.constant([[1, 2], [3]])
    >>> _tile_ragged_values(rt, [3, 2])
    [1, 2, 1, 2, 3, 3, 1, 2, 1, 2, 3, 3, 1, 2, 1, 2, 3, 3]
    ```
  """
  ragged_rank = rt_input.ragged_rank
  nested_splits = rt_input.nested_row_splits

  # Pointers to the values in `rt_input.flat_values`.
  inner_value_ids = math_ops.range(nested_splits[-1][-1])

  # For each ragged dimension (working from the innermost to outermost),
  # expand `inner_value_ids` as necessary to tile that dimension.
  prev_splits = None
  for axis in range(ragged_rank, 0, -1):
    # Ragged splits for this dimension.
    splits = nested_splits[axis - 1]

    # Adjust splits so they point into `inner_value_ids` (instead of just
    # pointing into the next dimension's values).
    if prev_splits is not None:  # Not the first pass through the loop.
      splits = array_ops.gather(prev_splits * multiples[axis + 1], splits)

    # Repeat each element in this ragged dimension `multiples[axis]` times.
    if const_multiples is None or const_multiples[axis] != 1:
      inner_value_ids = ragged_util.repeat_ranges(inner_value_ids, splits,
                                                  multiples[axis])

    prev_splits = splits

  # Gather the tiled inner values.
  ragged_tiled_values = array_ops.gather(rt_input.flat_values, inner_value_ids)

  # Tile the flat_values for the uniform dimensions (i.e., for `axis=0` plus
  # `axis=range(ragged_rank, rank)`).
  inner_repeats = array_ops.concat([multiples[:1], multiples[ragged_rank + 1:]],
                                   axis=0)
  return array_ops.tile(ragged_tiled_values, inner_repeats)
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      top_k=None,
                                      class_id=None,
                                      sample_weight=None):
    """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value or a python list or tuple of float thresholds in
      `[0, 1]`, or NEG_INF (used when top_k is set).
    top_k: Optional int, indicates that the positive labels should be limited to
      the top k predictions.
    class_id: Optional int, limits the prediction and labels to the class
      specified by this argument.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
    if variables_to_update is None:
        return
    y_true = ops.convert_to_tensor(y_true)
    y_pred = ops.convert_to_tensor(y_pred)
    y_pred.shape.assert_is_compatible_with(y_true.shape)

    if not any(key
               for key in variables_to_update if key in list(ConfusionMatrix)):
        raise ValueError(
            'Please provide at least one valid confusion matrix '
            'variable to update. Valid variable key options are: "{}". '
            'Received: "{}"'.format(list(ConfusionMatrix),
                                    variables_to_update.keys()))

    invalid_keys = [
        key for key in variables_to_update if key not in list(ConfusionMatrix)
    ]
    if invalid_keys:
        raise ValueError(
            'Invalid keys: {}. Valid variable key options are: "{}"'.format(
                invalid_keys, list(ConfusionMatrix)))

    with ops.control_dependencies([
            check_ops.assert_greater_equal(y_pred,
                                           math_ops.cast(0.0,
                                                         dtype=y_pred.dtype),
                                           message='predictions must be >= 0'),
            check_ops.assert_less_equal(y_pred,
                                        math_ops.cast(1.0, dtype=y_pred.dtype),
                                        message='predictions must be <= 1')
    ]):
        y_pred, y_true, sample_weight = squeeze_or_expand_dimensions(
            math_ops.cast(y_pred, dtype=dtypes.float32),
            math_ops.cast(y_true, dtype=dtypes.bool), sample_weight)

    if top_k is not None:
        y_pred = _filter_top_k(y_pred, top_k)
    if class_id is not None:
        y_true = y_true[..., class_id]
        y_pred = y_pred[..., class_id]

    thresholds = to_list(thresholds)
    num_thresholds = len(thresholds)
    num_predictions = array_ops.size(y_pred)

    # Reshape predictions and labels.
    predictions_2d = array_ops.reshape(y_pred, [1, -1])
    labels_2d = array_ops.reshape(math_ops.cast(y_true, dtype=dtypes.bool),
                                  [1, -1])

    # Tile the thresholds for every prediction.
    thresh_tiled = array_ops.tile(
        array_ops.expand_dims(array_ops.constant(thresholds), 1),
        array_ops.stack([1, num_predictions]))

    # Tile the predictions for every threshold.
    preds_tiled = array_ops.tile(predictions_2d, [num_thresholds, 1])

    # Compare predictions and threshold.
    pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled)

    # Tile labels by number of thresholds
    label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1])

    if sample_weight is not None:
        weights = weights_broadcast_ops.broadcast_weights(
            math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred)
        weights_tiled = array_ops.tile(array_ops.reshape(weights, [1, -1]),
                                       [num_thresholds, 1])
    else:
        weights_tiled = None

    update_ops = []

    def weighted_assign_add(label, pred, weights, var):
        label_and_pred = math_ops.cast(math_ops.logical_and(label, pred),
                                       dtype=dtypes.float32)
        if weights is not None:
            label_and_pred *= weights
        return state_ops.assign_add(var,
                                    math_ops.reduce_sum(label_and_pred, 1))

    loop_vars = {
        ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
    }
    update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
    update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
    update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

    if update_fn or update_tn:
        pred_is_neg = math_ops.logical_not(pred_is_pos)
        loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos,
                                                      pred_is_neg)

    if update_fp or update_tn:
        label_is_neg = math_ops.logical_not(label_is_pos)
        loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg,
                                                      pred_is_pos)
        if update_tn:
            loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg,
                                                         pred_is_neg)

    for matrix_cond, (label, pred) in loop_vars.items():
        if matrix_cond in variables_to_update:
            update_ops.append(
                weighted_assign_add(label, pred, weights_tiled,
                                    variables_to_update[matrix_cond]))
    return control_flow_ops.group(update_ops)
Example #32
0
 def map_fn(x):
     return array_ops.tile(x, x)
Example #33
0
    def create_estimator_spec(self,
                              features,
                              mode,
                              logits,
                              labels=None,
                              train_op_fn=None):
        """See `Head`."""
        # Predict.
        with ops.name_scope('head'):
            with ops.name_scope(None, 'predictions', (logits, )):
                pred_keys = prediction_keys.PredictionKeys
                logits = _check_logits(logits, self.logits_dimension)
                logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC)
                two_class_logits = array_ops.concat(
                    (array_ops.zeros_like(logits), logits),
                    1,
                    name='two_class_logits')
                scores = nn.softmax(two_class_logits,
                                    name=pred_keys.PROBABILITIES)
                class_ids = array_ops.reshape(math_ops.argmax(two_class_logits,
                                                              axis=1), (-1, 1),
                                              name='classes')
                if self._label_vocabulary:
                    table = lookup_ops.index_to_string_table_from_tensor(
                        vocabulary_list=self._label_vocabulary,
                        name='class_string_lookup')
                    classes = table.lookup(class_ids)
                else:
                    classes = string_ops.as_string(class_ids,
                                                   name='str_classes')
                predictions = {
                    pred_keys.LOGITS: logits,
                    pred_keys.LOGISTIC: logistic,
                    pred_keys.PROBABILITIES: scores,
                    pred_keys.CLASS_IDS: class_ids,
                    pred_keys.CLASSES: classes,
                }
            if mode == model_fn.ModeKeys.PREDICT:
                batch_size = array_ops.shape(logistic)[0]
                export_class_list = self._label_vocabulary
                if not export_class_list:
                    export_class_list = string_ops.as_string([0, 1])
                export_output_classes = array_ops.tile(
                    input=array_ops.expand_dims(input=export_class_list,
                                                axis=0),
                    multiples=[batch_size, 1])
                classifier_output = export_output.ClassificationOutput(
                    scores=scores,
                    # `ClassificationOutput` requires string classes.
                    classes=export_output_classes)
                return model_fn.EstimatorSpec(
                    mode=model_fn.ModeKeys.PREDICT,
                    predictions=predictions,
                    export_outputs={
                        '':
                        classifier_output,  # to be same as other heads.
                        'classification':
                        classifier_output,  # to be called by name.
                        _DEFAULT_SERVING_KEY:
                        classifier_output,  # default
                        'regression':
                        export_output.RegressionOutput(value=logistic)
                    })

            # Eval.
            unweighted_loss, processed_labels = self.create_loss(
                features=features, mode=mode, logits=logits, labels=labels)
            weights = _weights(features, self._weight_column)
            training_loss = losses.compute_weighted_loss(
                unweighted_loss,
                weights=weights,
                reduction=losses.Reduction.SUM)
            if mode == model_fn.ModeKeys.EVAL:
                return model_fn.EstimatorSpec(
                    mode=model_fn.ModeKeys.EVAL,
                    predictions=predictions,
                    loss=training_loss,
                    eval_metric_ops=self._eval_metric_ops(
                        labels=processed_labels,
                        logits=logits,
                        logistic=logistic,
                        scores=scores,
                        class_ids=class_ids,
                        unweighted_loss=unweighted_loss,
                        weights=weights))

            # Train.
            if train_op_fn is None:
                raise ValueError('train_op_fn can not be None.')
        with ops.name_scope(''):
            summary.scalar(
                _summary_key(self._head_name, metric_keys.MetricKeys.LOSS),
                training_loss)
            summary.scalar(
                _summary_key(self._head_name,
                             metric_keys.MetricKeys.LOSS_MEAN),
                losses.compute_weighted_loss(unweighted_loss,
                                             weights=weights,
                                             reduction=losses.Reduction.MEAN))
        return model_fn.EstimatorSpec(mode=model_fn.ModeKeys.TRAIN,
                                      predictions=predictions,
                                      loss=training_loss,
                                      train_op=train_op_fn(training_loss))
Example #34
0
    def call(self,
             inputs,
             initial_state=None,
             dtype=None,
             sequence_length=None):
        """Run this LSTM on inputs, starting from the given state.

    Args:
      inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`.
      initial_state: a tuple `(initial_cell_state, initial_output)` with tensors
        of shape `[batch_size, self._num_units]`. If this is not provided, the
        cell is expected to create a zero initial state of type `dtype`.
      dtype: The data type for the initial state and expected output. Required
        if `initial_state` is not provided or RNN state has a heterogeneous
        dtype.
      sequence_length: Specifies the length of each sequence in inputs. An
        `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
        time_len).`
        Defaults to `time_len` for each element.

    Returns:
      A pair containing:

      - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]`
        or a list of time_len tensors of shape `[batch_size, output_size]`,
        to match the type of the `inputs`.
      - Final state: a tuple `(cell_state, output)` matching `initial_state`.

    Raises:
      ValueError: in case of shape mismatches
    """
        is_list = isinstance(inputs, list)
        if is_list:
            inputs = array_ops.stack(inputs)
        inputs_shape = inputs.get_shape().with_rank(3)
        if not inputs_shape[2]:
            raise ValueError("Expecting inputs_shape[2] to be set: %s" %
                             inputs_shape)
        batch_size = inputs_shape.dims[1].value
        if batch_size is None:
            batch_size = array_ops.shape(inputs)[1]
        time_len = inputs_shape.dims[0].value
        if time_len is None:
            time_len = array_ops.shape(inputs)[0]

        # Provide default values for initial_state and dtype
        if initial_state is None:
            if dtype is None:
                raise ValueError(
                    "Either initial_state or dtype needs to be specified")
            z = array_ops.zeros(array_ops.stack([batch_size, self.num_units]),
                                dtype=dtype)
            initial_state = z, z
        else:
            if len(initial_state) != 2:
                raise ValueError(
                    "Expecting initial_state to be a tuple with length 2 or None"
                )
            if dtype is None:
                dtype = initial_state[0].dtype

        # create the actual cell
        if sequence_length is not None:
            sequence_length = ops.convert_to_tensor(sequence_length)
        initial_cell_state, initial_output = initial_state  # pylint: disable=unpacking-non-sequence
        cell_states, outputs = self._call_cell(inputs, initial_cell_state,
                                               initial_output, dtype,
                                               sequence_length)

        if sequence_length is not None:
            # Mask out the part beyond sequence_length
            mask = array_ops.transpose(
                array_ops.sequence_mask(sequence_length, time_len,
                                        dtype=dtype), [1, 0])
            mask = array_ops.tile(array_ops.expand_dims(mask, [-1]),
                                  [1, 1, self.num_units])
            outputs *= mask
            # Prepend initial states to cell_states and outputs for indexing to work
            # correctly,since we want to access the last valid state at
            # sequence_length - 1, which can even be -1, corresponding to the
            # initial state.
            mod_cell_states = array_ops.concat(
                [array_ops.expand_dims(initial_cell_state, [0]), cell_states],
                0)
            mod_outputs = array_ops.concat(
                [array_ops.expand_dims(initial_output, [0]), outputs], 0)
            final_cell_state = self._gather_states(mod_cell_states,
                                                   sequence_length, batch_size)
            final_output = self._gather_states(mod_outputs, sequence_length,
                                               batch_size)
        else:
            # No sequence_lengths used: final state is the last state
            final_cell_state = cell_states[-1]
            final_output = outputs[-1]

        if is_list:
            # Input was a list, so return a list
            outputs = array_ops.unstack(outputs)

        final_state = rnn_cell_impl.LSTMStateTuple(final_cell_state,
                                                   final_output)
        return outputs, final_state
Example #35
0
 def dataset_fn():
   return dataset_ops.Dataset.range(10).map(
       lambda x: array_ops.tile([x], ops.convert_to_tensor([x])),
       num_parallel_calls=4)
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight):
    """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `BeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

  Returns:
    A new beam state.
  """
    static_batch_size = tensor_util.constant_value(batch_size)

    # Calculate the current lengths of the predictions
    prediction_lengths = beam_state.lengths
    previously_finished = beam_state.finished

    # Calculate the total log probs for the new hypotheses
    # Final Shape: [batch_size, beam_width, vocab_size]
    step_log_probs = nn_ops.log_softmax(logits)
    step_log_probs = _mask_probs(step_log_probs, end_token,
                                 previously_finished)
    total_probs = array_ops.expand_dims(beam_state.log_probs,
                                        2) + step_log_probs

    # Calculate the continuation lengths by adding to all continuing beams.
    vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1]
    lengths_to_add = array_ops.one_hot(
        indices=array_ops.tile(array_ops.reshape(end_token, [1, 1]),
                               [batch_size, beam_width]),
        depth=vocab_size,
        on_value=constant_op.constant(0, dtype=dtypes.int64),
        off_value=constant_op.constant(1, dtype=dtypes.int64),
        dtype=dtypes.int64)
    add_mask = (1 - math_ops.to_int64(previously_finished))
    lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add
    new_prediction_lengths = (lengths_to_add +
                              array_ops.expand_dims(prediction_lengths, 2))

    # Calculate the scores for each beam
    scores = _get_scores(log_probs=total_probs,
                         sequence_lengths=new_prediction_lengths,
                         length_penalty_weight=length_penalty_weight)

    time = ops.convert_to_tensor(time, name="time")
    # During the first time step we only consider the initial beam
    scores_shape = array_ops.shape(scores)
    scores_flat = control_flow_ops.cond(
        time > 0, lambda: array_ops.reshape(scores, [batch_size, -1]),
        lambda: scores[:, 0])
    num_available_beam = control_flow_ops.cond(
        time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]),
        lambda: math_ops.reduce_prod(scores_shape[2:]))

    # Pick the next beams according to the specified successors function
    next_beam_size = math_ops.minimum(
        ops.convert_to_tensor(beam_width,
                              dtype=dtypes.int32,
                              name="beam_width"), num_available_beam)
    next_beam_scores, word_indices = nn_ops.top_k(scores_flat,
                                                  k=next_beam_size)

    next_beam_scores.set_shape([static_batch_size, beam_width])
    word_indices.set_shape([static_batch_size, beam_width])

    # Pick out the probs, beam_ids, and states according to the chosen predictions
    next_beam_probs = _tensor_gather_helper(gather_indices=word_indices,
                                            gather_from=total_probs,
                                            batch_size=batch_size,
                                            range_size=beam_width * vocab_size,
                                            gather_shape=[-1],
                                            name="next_beam_probs")
    # Note: just doing the following
    #   math_ops.to_int32(word_indices % vocab_size,
    #       name="next_beam_word_ids")
    # would be a lot cleaner but for reasons unclear, that hides the results of
    # the op which prevents capturing it with tfdbg debug ops.
    raw_next_word_ids = math_ops.mod(word_indices,
                                     vocab_size,
                                     name="next_beam_word_ids")
    next_word_ids = math_ops.to_int32(raw_next_word_ids)
    next_beam_ids = math_ops.to_int32(word_indices / vocab_size,
                                      name="next_beam_parent_ids")

    # Append new ids to current predictions
    previously_finished = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=previously_finished,
        batch_size=batch_size,
        range_size=beam_width,
        gather_shape=[-1])
    next_finished = math_ops.logical_or(previously_finished,
                                        math_ops.equal(next_word_ids,
                                                       end_token),
                                        name="next_beam_finished")

    # Calculate the length of the next predictions.
    # 1. Finished beams remain unchanged
    # 2. Beams that are now finished (EOS predicted) remain unchanged
    # 3. Beams that are not yet finished have their length increased by 1
    lengths_to_add = math_ops.to_int64(
        math_ops.not_equal(next_word_ids, end_token))
    lengths_to_add = (1 - math_ops.to_int64(next_finished)) * lengths_to_add
    next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids,
                                                gather_from=beam_state.lengths,
                                                batch_size=batch_size,
                                                range_size=beam_width,
                                                gather_shape=[-1])
    next_prediction_len += lengths_to_add

    # Pick out the cell_states according to the next_beam_ids. We use a
    # different gather_shape here because the cell_state tensors, i.e.
    # the tensors that would be gathered from, all have dimension
    # greater than two and we need to preserve those dimensions.
    # pylint: disable=g-long-lambda
    next_cell_state = nest.map_structure(
        lambda gather_from: _maybe_tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=gather_from,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1]), next_cell_state)
    # pylint: enable=g-long-lambda

    next_state = BeamSearchDecoderState(cell_state=next_cell_state,
                                        log_probs=next_beam_probs,
                                        lengths=next_prediction_len,
                                        finished=next_finished)

    output = BeamSearchDecoderOutput(scores=next_beam_scores,
                                     predicted_ids=next_word_ids,
                                     parent_ids=next_beam_ids)

    return output, next_state
Example #37
0
        def _serving_input_receiver_fn():
            """A receiver function to be passed to export_savedmodel."""
            placeholders = {}
            time_placeholder = array_ops.placeholder(
                name=feature_keys.TrainEvalFeatures.TIMES,
                dtype=dtypes.int64,
                shape=[default_batch_size, default_series_length])
            placeholders[
                feature_keys.TrainEvalFeatures.TIMES] = time_placeholder
            # Values are only necessary when filtering. For prediction the default
            # value will be ignored.
            placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
                array_ops.placeholder_with_default(
                    name=feature_keys.TrainEvalFeatures.VALUES,
                    input=array_ops.zeros(shape=[
                        default_batch_size if default_batch_size else 0,
                        default_series_length if default_series_length else 0,
                        self._model.num_features
                    ],
                                          dtype=self._model.dtype),
                    shape=(default_batch_size, default_series_length,
                           self._model.num_features)))
            if self._model.exogenous_feature_columns:
                with ops.Graph().as_default():
                    # Default placeholders have only an unknown batch dimension. Make them
                    # in a separate graph, then splice in the series length to the shapes
                    # and re-create them in the outer graph.
                    parsed_features = (feature_column.make_parse_example_spec(
                        self._model.exogenous_feature_columns))
                    placeholder_features = parsing_ops.parse_example(
                        serialized=array_ops.placeholder(shape=[None],
                                                         dtype=dtypes.string),
                        features=parsed_features)
                    exogenous_feature_shapes = {
                        key: (value.get_shape(), value.dtype)
                        for key, value in placeholder_features.items()
                    }
                for feature_key, (batch_only_feature_shape, value_dtype) in (
                        exogenous_feature_shapes.items()):
                    batch_only_feature_shape = (
                        batch_only_feature_shape.with_rank_at_least(
                            1).as_list())
                    feature_shape = (
                        [default_batch_size, default_series_length] +
                        batch_only_feature_shape[1:])
                    placeholders[feature_key] = array_ops.placeholder(
                        dtype=value_dtype,
                        name=feature_key,
                        shape=feature_shape)
            # Models may not know the shape of their state without creating some
            # variables/ops. Avoid polluting the default graph by making a new one. We
            # use only static metadata from the returned Tensors.
            with ops.Graph().as_default():
                self._model.initialize_graph()

                # Evaluate the initial state as same-dtype "zero" values. These zero
                # constants aren't used, but are necessary for feeding to
                # placeholder_with_default for the "cold start" case where state is not
                # fed to the model.
                def _zeros_like_constant(tensor):
                    return tensor_util.constant_value(
                        array_ops.zeros_like(tensor))

                start_state = nest.map_structure(_zeros_like_constant,
                                                 self._model.get_start_state())
            batch_size_tensor = array_ops.shape(time_placeholder)[0]
            for prefixed_state_name, state in ts_head_lib.state_to_dictionary(
                    start_state).items():
                state_shape_with_batch = tensor_shape.TensorShape(
                    (default_batch_size, )).concatenate(state.shape)
                default_state_broadcast = array_ops.tile(
                    state[None, ...],
                    multiples=array_ops.concat([
                        batch_size_tensor[None],
                        array_ops.ones(len(state.shape), dtype=dtypes.int32)
                    ],
                                               axis=0))
                placeholders[
                    prefixed_state_name] = array_ops.placeholder_with_default(
                        input=default_state_broadcast,
                        name=prefixed_state_name,
                        shape=state_shape_with_batch)
            return export_lib.ServingInputReceiver(placeholders, placeholders)
Example #38
0
 def _init_val_initializer(shape, dtype=None, partition_info=None):
     del dtype, partition_info  # Unused by this unit-testing initializer.
     return array_ops.tile(
         constant_op.constant([[self.init_val]], dtype=dtypes.float32),
         shape)
Example #39
0
 def initialize(self, name=None):
   finished = array_ops.tile([False], [self._batch_size])
   return (finished, self._start_inputs)
Example #40
0
def lifted_struct_loss(labels, embeddings, margin=1.0):
  """Computes the lifted structured loss.

  The loss encourages the positive distances (between a pair of embeddings
  with the same labels) to be smaller than any negative distances (between a
  pair of embeddings with different labels) in the mini-batch in a way
  that is differentiable with respect to the embedding vectors.
  See: https://arxiv.org/abs/1511.06452.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    lifted_loss: tf.float32 scalar.
  """
  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
  lshape = array_ops.shape(labels)
  assert lshape.shape == 1
  labels = array_ops.reshape(labels, [lshape[0], 1])

  # Build pairwise squared distance matrix.
  pairwise_distances = pairwise_distance(embeddings)

  # Build pairwise binary adjacency matrix.
  adjacency = math_ops.equal(labels, array_ops.transpose(labels))
  # Invert so we can select negatives only.
  adjacency_not = math_ops.logical_not(adjacency)

  batch_size = array_ops.size(labels)

  diff = margin - pairwise_distances
  mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
  # Safe maximum: Temporarily shift negative distances
  #   above zero before taking max.
  #     this is to take the max only among negatives.
  row_minimums = math_ops.reduce_min(diff, 1, keep_dims=True)
  row_negative_maximums = math_ops.reduce_max(
      math_ops.multiply(
          diff - row_minimums, mask), 1, keep_dims=True) + row_minimums

  # Compute the loss.
  # Keep track of matrix of maximums where M_ij = max(m_i, m_j)
  #   where m_i is the max of alpha - negative D_i's.
  # This matches the Caffe loss layer implementation at:
  #   https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp  # pylint: disable=line-too-long

  max_elements = math_ops.maximum(
      row_negative_maximums, array_ops.transpose(row_negative_maximums))
  diff_tiled = array_ops.tile(diff, [batch_size, 1])
  mask_tiled = array_ops.tile(mask, [batch_size, 1])
  max_elements_vect = array_ops.reshape(
      array_ops.transpose(max_elements), [-1, 1])

  loss_exp_left = array_ops.reshape(
      math_ops.reduce_sum(math_ops.multiply(
          math_ops.exp(
              diff_tiled - max_elements_vect),
          mask_tiled), 1, keep_dims=True), [batch_size, batch_size])

  loss_mat = max_elements + math_ops.log(
      loss_exp_left + array_ops.transpose(loss_exp_left))
  # Add the positive distance.
  loss_mat += pairwise_distances

  mask_positives = math_ops.cast(
      adjacency, dtype=dtypes.float32) - array_ops.diag(
          array_ops.ones([batch_size]))

  # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
  num_positives = math_ops.reduce_sum(mask_positives) / 2.0

  lifted_loss = math_ops.truediv(
      0.25 * math_ops.reduce_sum(
          math_ops.square(
              math_ops.maximum(
                  math_ops.multiply(loss_mat, mask_positives), 0.0))),
      num_positives,
      name='liftedstruct_loss')
  return lifted_loss
Example #41
0
def triplet_semihard_loss(labels, embeddings, margin=1.0):
  """Computes the triplet loss with semi-hard negative mining.

  The loss encourages the positive distances (between a pair of embeddings with
  the same labels) to be smaller than the minimum negative distance among
  which are at least greater than the positive distance plus the margin constant
  (called semi-hard negative) in the mini-batch. If no such negative exists,
  uses the largest negative distance instead.
  See: https://arxiv.org/abs/1503.03832.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    triplet_loss: tf.float32 scalar.
  """
  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
  lshape = array_ops.shape(labels)
  assert lshape.shape == 1
  labels = array_ops.reshape(labels, [lshape[0], 1])

  # Build pairwise squared distance matrix.
  pdist_matrix = pairwise_distance(embeddings, squared=True)
  # Build pairwise binary adjacency matrix.
  adjacency = math_ops.equal(labels, array_ops.transpose(labels))
  # Invert so we can select negatives only.
  adjacency_not = math_ops.logical_not(adjacency)

  batch_size = array_ops.size(labels)

  # Compute the mask.
  pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
  mask = math_ops.logical_and(
      array_ops.tile(adjacency_not, [batch_size, 1]),
      math_ops.greater(
          pdist_matrix_tile, array_ops.reshape(
              array_ops.transpose(pdist_matrix), [-1, 1])))
  mask_final = array_ops.reshape(
      math_ops.greater(
          math_ops.reduce_sum(
              math_ops.cast(
                  mask, dtype=dtypes.float32), 1, keep_dims=True),
          0.0), [batch_size, batch_size])
  mask_final = array_ops.transpose(mask_final)

  adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
  mask = math_ops.cast(mask, dtype=dtypes.float32)

  # negatives_outside: smallest D_an where D_an > D_ap.
  negatives_outside = array_ops.reshape(
      masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
  negatives_outside = array_ops.transpose(negatives_outside)

  # negatives_inside: largest D_an.
  negatives_inside = array_ops.tile(
      masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
  semi_hard_negatives = array_ops.where(
      mask_final, negatives_outside, negatives_inside)

  loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

  mask_positives = math_ops.cast(
      adjacency, dtype=dtypes.float32) - array_ops.diag(
          array_ops.ones([batch_size]))

  # In lifted-struct, the authors multiply 0.5 for upper triangular
  #   in semihard, they take all positive pairs except the diagonal.
  num_positives = math_ops.reduce_sum(mask_positives)

  triplet_loss = math_ops.truediv(
      math_ops.reduce_sum(
          math_ops.maximum(
              math_ops.multiply(loss_mat, mask_positives), 0.0)),
      num_positives,
      name='triplet_semihard_loss')

  return triplet_loss
Example #42
0
 def update_estimate_and_tile(num_examples_per_class_seen, c):
     updated_examples_per_class_seen, dist = _estimate_data_distribution(
         c, num_examples_per_class_seen)
     tiled_dist = array_ops.tile(array_ops.expand_dims(dist, 0),
                                 [dist_estimation_batch_size, 1])
     return updated_examples_per_class_seen, tiled_dist
Example #43
0
def _SumGrad(op, grad):
    """Gradient for Sum."""
    _, new_output_shape, input_shape = _ReductionGradAssist(op)
    tile_scaling = input_shape // new_output_shape
    grad = array_ops.reshape(grad, new_output_shape)
    return [array_ops.tile(grad, tile_scaling), None]
    def __init__(self,
                 cell,
                 embedding,
                 start_tokens,
                 end_token,
                 initial_state,
                 beam_width,
                 output_layer=None,
                 length_penalty_weight=0.0):
        """Initialize the BeamSearchDecoder.

    Args:
      cell: An `RNNCell` instance.
      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
        or the `params` argument for `embedding_lookup`.
      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
      end_token: `int32` scalar, the token that marks end of decoding.
      initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
      beam_width:  Python integer, the number of beams.
      output_layer: (Optional) An instance of `tf.layers.Layer`, i.e.,
        `tf.layers.Dense`.  Optional layer to apply to the RNN output prior
        to storing the result or sampling.
      length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

    Raises:
      TypeError: if `cell` is not an instance of `RNNCell`,
        or `output_layer` is not an instance of `tf.layers.Layer`.
      ValueError: If `start_tokens` is not a vector or
        `end_token` is not a scalar.
    """
        if not rnn_cell_impl._like_rnncell(cell):  # pylint: disable=protected-access
            raise TypeError("cell must be an RNNCell, received: %s" %
                            type(cell))
        if (output_layer is not None
                and not isinstance(output_layer, layers_base.Layer)):
            raise TypeError("output_layer must be a Layer, received: %s" %
                            type(output_layer))
        self._cell = cell
        self._output_layer = output_layer

        if callable(embedding):
            self._embedding_fn = embedding
        else:
            self._embedding_fn = (
                lambda ids: embedding_ops.embedding_lookup(embedding, ids))

        self._start_tokens = ops.convert_to_tensor(start_tokens,
                                                   dtype=dtypes.int32,
                                                   name="start_tokens")
        if self._start_tokens.get_shape().ndims != 1:
            raise ValueError("start_tokens must be a vector")
        self._end_token = ops.convert_to_tensor(end_token,
                                                dtype=dtypes.int32,
                                                name="end_token")
        if self._end_token.get_shape().ndims != 0:
            raise ValueError("end_token must be a scalar")

        self._batch_size = array_ops.size(start_tokens)
        self._beam_width = beam_width
        self._length_penalty_weight = length_penalty_weight
        self._initial_cell_state = nest.map_structure(
            self._maybe_split_batch_beams, initial_state,
            self._cell.state_size)
        self._start_tokens = array_ops.tile(
            array_ops.expand_dims(self._start_tokens, 1),
            [1, self._beam_width])
        self._start_inputs = self._embedding_fn(self._start_tokens)
        self._finished = array_ops.zeros([self._batch_size, self._beam_width],
                                         dtype=dtypes.bool)
 def initialize(self, name=None):
   finished = array_ops.tile([False], [self._batch_size])
   print('finished',finished,'and shape', finished.shape)
   return (finished, self._start_inputs)
Example #46
0
  def training_graph(self, input_data, input_labels, random_seed,
                     data_spec, epoch=None):

    """Constructs a TF graph for training a random tree.

    Args:
      input_data: A tensor or SparseTensor or placeholder for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      random_seed: The random number generator seed to use for this tree.  0
        means use the current time as the seed.
      data_spec: A list of tf.dtype values specifying the original types of
        each column.
      epoch: A tensor or placeholder for the epoch the training data comes from.

    Returns:
      The last op in the random tree training graph.
    """
    epoch = [0] if epoch is None else epoch

    sparse_indices = []
    sparse_values = []
    sparse_shape = []
    if isinstance(input_data, ops.SparseTensor):
      sparse_indices = input_data.indices
      sparse_values = input_data.values
      sparse_shape = input_data.shape
      input_data = []

    # Count extremely random stats.
    (node_sums, node_squares, splits_indices, splits_sums,
     splits_squares, totals_indices, totals_sums,
     totals_squares, input_leaves) = (
         self.training_ops.count_extremely_random_stats(
             input_data, sparse_indices, sparse_values, sparse_shape,
             data_spec, input_labels, self.variables.tree,
             self.variables.tree_thresholds,
             self.variables.node_to_accumulator_map,
             self.variables.candidate_split_features,
             self.variables.candidate_split_thresholds,
             self.variables.start_epoch, epoch,
             num_classes=self.params.num_output_columns,
             regression=self.params.regression))
    node_update_ops = []
    node_update_ops.append(
        state_ops.assign_add(self.variables.node_sums, node_sums))

    splits_update_ops = []
    splits_update_ops.append(self.training_ops.scatter_add_ndim(
        self.variables.candidate_split_sums,
        splits_indices, splits_sums))
    splits_update_ops.append(self.training_ops.scatter_add_ndim(
        self.variables.accumulator_sums, totals_indices,
        totals_sums))

    if self.params.regression:
      node_update_ops.append(state_ops.assign_add(self.variables.node_squares,
                                                  node_squares))
      splits_update_ops.append(self.training_ops.scatter_add_ndim(
          self.variables.candidate_split_squares,
          splits_indices, splits_squares))
      splits_update_ops.append(self.training_ops.scatter_add_ndim(
          self.variables.accumulator_squares, totals_indices,
          totals_squares))

    # Sample inputs.
    update_indices, feature_updates, threshold_updates = (
        self.training_ops.sample_inputs(
            input_data, sparse_indices, sparse_values, sparse_shape,
            self.variables.node_to_accumulator_map,
            input_leaves, self.variables.candidate_split_features,
            self.variables.candidate_split_thresholds,
            split_initializations_per_input=(
                self.params.split_initializations_per_input),
            split_sampling_random_seed=random_seed))
    update_features_op = state_ops.scatter_update(
        self.variables.candidate_split_features, update_indices,
        feature_updates)
    update_thresholds_op = state_ops.scatter_update(
        self.variables.candidate_split_thresholds, update_indices,
        threshold_updates)

    # Calculate finished nodes.
    with ops.control_dependencies(splits_update_ops):
      children = array_ops.squeeze(array_ops.slice(
          self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1])
      is_leaf = math_ops.equal(constants.LEAF_NODE, children)
      leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf),
                                                   squeeze_dims=[1]))
      finished, stale = self.training_ops.finished_nodes(
          leaves, self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          self.variables.start_epoch, epoch,
          num_split_after_samples=self.params.split_after_samples,
          min_split_samples=self.params.min_split_samples)

    # Update leaf scores.
    non_fertile_leaves = array_ops.boolean_mask(
        leaves, math_ops.less(array_ops.gather(
            self.variables.node_to_accumulator_map, leaves), 0))

    # TODO(gilberth): It should be possible to limit the number of non
    # fertile leaves we calculate scores for, especially since we can only take
    # at most array_ops.shape(finished)[0] of them.
    with ops.control_dependencies(node_update_ops):
      sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves)
      if self.params.regression:
        squares = array_ops.gather(self.variables.node_squares,
                                   non_fertile_leaves)
        non_fertile_leaf_scores = self._variance(sums, squares)
      else:
        non_fertile_leaf_scores = self._weighted_gini(sums)

    # Calculate best splits.
    with ops.control_dependencies(splits_update_ops):
      split_indices = self.training_ops.best_splits(
          finished, self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          regression=self.params.regression)

    # Grow tree.
    with ops.control_dependencies([update_features_op, update_thresholds_op]):
      (tree_update_indices, tree_children_updates,
       tree_threshold_updates, tree_depth_updates, new_eot) = (
           self.training_ops.grow_tree(
               self.variables.end_of_tree, self.variables.tree_depths,
               self.variables.node_to_accumulator_map, finished, split_indices,
               self.variables.candidate_split_features,
               self.variables.candidate_split_thresholds))
      tree_update_op = state_ops.scatter_update(
          self.variables.tree, tree_update_indices, tree_children_updates)
      thresholds_update_op = state_ops.scatter_update(
          self.variables.tree_thresholds, tree_update_indices,
          tree_threshold_updates)
      depth_update_op = state_ops.scatter_update(
          self.variables.tree_depths, tree_update_indices, tree_depth_updates)
      # TODO(thomaswc): Only update the epoch on the new leaves.
      new_epoch_updates = epoch * array_ops.ones_like(tree_depth_updates)
      epoch_update_op = state_ops.scatter_update(
          self.variables.start_epoch, tree_update_indices,
          new_epoch_updates)

    # Update fertile slots.
    with ops.control_dependencies([depth_update_op]):
      (node_map_updates, accumulators_cleared, accumulators_allocated) = (
          self.training_ops.update_fertile_slots(
              finished, non_fertile_leaves,
              non_fertile_leaf_scores,
              self.variables.end_of_tree, self.variables.tree_depths,
              self.variables.accumulator_sums,
              self.variables.node_to_accumulator_map,
              stale,
              max_depth=self.params.max_depth,
              regression=self.params.regression))

    # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has
    # used it to calculate new leaves.
    gated_new_eot, = control_flow_ops.tuple([new_eot],
                                            control_inputs=[node_map_updates])
    eot_update_op = state_ops.assign(self.variables.end_of_tree, gated_new_eot)

    updates = []
    updates.append(eot_update_op)
    updates.append(tree_update_op)
    updates.append(thresholds_update_op)
    updates.append(epoch_update_op)

    updates.append(state_ops.scatter_update(
        self.variables.node_to_accumulator_map,
        array_ops.squeeze(array_ops.slice(node_map_updates, [0, 0], [1, -1]),
                          squeeze_dims=[0]),
        array_ops.squeeze(array_ops.slice(node_map_updates, [1, 0], [1, -1]),
                          squeeze_dims=[0])))

    cleared_and_allocated_accumulators = array_ops.concat(
        0, [accumulators_cleared, accumulators_allocated])
    # Calculate values to put into scatter update for candidate counts.
    # Candidate split counts are always reset back to 0 for both cleared
    # and allocated accumulators. This means some accumulators might be doubly
    # reset to 0 if the were released and not allocated, then later allocated.
    split_values = array_ops.tile(
        array_ops.expand_dims(array_ops.expand_dims(
            array_ops.zeros_like(cleared_and_allocated_accumulators,
                                 dtype=dtypes.float32), 1), 2),
        [1, self.params.num_splits_to_consider, self.params.num_output_columns])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_sums,
        cleared_and_allocated_accumulators, split_values))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.candidate_split_squares,
          cleared_and_allocated_accumulators, split_values))

    # Calculate values to put into scatter update for total counts.
    total_cleared = array_ops.tile(
        array_ops.expand_dims(
            math_ops.neg(array_ops.ones_like(accumulators_cleared,
                                             dtype=dtypes.float32)), 1),
        [1, self.params.num_output_columns])
    total_reset = array_ops.tile(
        array_ops.expand_dims(
            array_ops.zeros_like(accumulators_allocated,
                                 dtype=dtypes.float32), 1),
        [1, self.params.num_output_columns])
    accumulator_updates = array_ops.concat(0, [total_cleared, total_reset])
    updates.append(state_ops.scatter_update(
        self.variables.accumulator_sums,
        cleared_and_allocated_accumulators, accumulator_updates))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.accumulator_squares,
          cleared_and_allocated_accumulators, accumulator_updates))

    # Calculate values to put into scatter update for candidate splits.
    split_features_updates = array_ops.tile(
        array_ops.expand_dims(
            math_ops.neg(array_ops.ones_like(
                cleared_and_allocated_accumulators)), 1),
        [1, self.params.num_splits_to_consider])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_features,
        cleared_and_allocated_accumulators, split_features_updates))

    updates += self.finish_iteration()

    return control_flow_ops.group(*updates)
Example #47
0
def safe_embedding_lookup_sparse(embedding_weights,
                                 sparse_ids,
                                 sparse_weights=None,
                                 combiner="mean",
                                 default_id=None,
                                 name=None,
                                 partition_strategy="div",
                                 max_norm=None):
  """Lookup embedding results, accounting for invalid IDs and empty features.

  The partitioned embedding in `embedding_weights` must all be the same shape
  except for the first dimension. The first dimension is allowed to vary as the
  vocabulary size is not necessarily a multiple of `P`.  `embedding_weights`
  may be a `PartitionedVariable` as returned by using
  `tf.compat.v1.get_variable()` with a
  partitioner.

  Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs
  with non-positive weight. For an entry with no features, the embedding vector
  for `default_id` is returned, or the 0-vector if `default_id` is not supplied.

  The ids and weights may be multi-dimensional. Embeddings are always aggregated
  along the last dimension.

  Args:
    embedding_weights:  A list of `P` float `Tensor`s or values representing
      partitioned embedding `Tensor`s.  Alternatively, a `PartitionedVariable`
      created by partitioning along dimension 0.  The total unpartitioned shape
      should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size
      and `e_1, ..., e_m` are the embedding dimensions.
    sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the
      ids. `d_0` is typically batch size.
    sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
      float weights corresponding to `sparse_ids`, or `None` if all weights are
      be assumed to be 1.0.
    combiner: A string specifying how to combine embedding results for each
      entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the
      default.
    default_id: The id to use for an entry with no features.
    name: A name for this operation (optional).
    partition_strategy: A string specifying the partitioning strategy. Currently
      `"div"` and `"mod"` are supported. Default is `"div"`.
    max_norm: If not `None`, all embeddings are l2-normalized to max_norm before
      combining.

  Returns:
    Dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`.

  Raises:
    ValueError: if `embedding_weights` is empty.
  """
  if embedding_weights is None:
    raise ValueError("Missing embedding_weights %s." % embedding_weights)
  if isinstance(embedding_weights, variables.PartitionedVariable):
    embedding_weights = list(embedding_weights)  # get underlying Variables.
  if not isinstance(embedding_weights, list):
    embedding_weights = [embedding_weights]
  if len(embedding_weights) < 1:
    raise ValueError("Missing embedding_weights %s." % embedding_weights)

  dtype = sparse_weights.dtype if sparse_weights is not None else None
  embedding_weights = [
      w if (isinstance(w, resource_variable_ops.ResourceVariable)
            and dtype in (None, w.dtype))
      else ops.convert_to_tensor(w, dtype=dtype)
      for w in embedding_weights
  ]

  with ops.name_scope(name, "embedding_lookup", embedding_weights +
                      [sparse_ids, sparse_weights]) as scope:
    # Reshape higher-rank sparse ids and weights to linear segment ids.
    original_shape = sparse_ids.dense_shape
    original_rank_dim = tensor_shape.dimension_value(
        sparse_ids.dense_shape.get_shape()[0])
    original_rank = (
        array_ops.size(original_shape)
        if original_rank_dim is None else original_rank_dim)
    sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [
        math_ops.reduce_prod(
            array_ops.slice(original_shape, [0], [original_rank - 1])),
        array_ops.gather(original_shape, original_rank - 1)
    ])
    if sparse_weights is not None:
      sparse_weights = sparse_tensor.SparseTensor(sparse_ids.indices,
                                                  sparse_weights.values,
                                                  sparse_ids.dense_shape)

    # Prune invalid ids and weights.
    sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights)
    if combiner != "sum":
      sparse_ids, sparse_weights = _prune_invalid_weights(
          sparse_ids, sparse_weights)

    # Fill in dummy values for empty features, if necessary.
    sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
        sparse_ids, default_id or 0)
    if sparse_weights is not None:
      sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0)

    result = embedding_lookup_sparse(
        embedding_weights,
        sparse_ids,
        sparse_weights,
        combiner=combiner,
        partition_strategy=partition_strategy,
        name=None if default_id is None else scope,
        max_norm=max_norm)

    if default_id is None:
      # Broadcast is_row_empty to the same shape as embedding_lookup_result,
      # for use in Select.
      is_row_empty = array_ops.tile(
          array_ops.reshape(is_row_empty, [-1, 1]),
          array_ops.stack([1, array_ops.shape(result)[1]]))

      result = array_ops.where(
          is_row_empty, array_ops.zeros_like(result), result, name=scope)

    # Reshape back from linear ids back into higher-dimensional dense result.
    final_result = array_ops.reshape(
        result,
        array_ops.concat([
            array_ops.slice(
                math_ops.cast(original_shape, dtypes.int32), [0],
                [original_rank - 1]),
            array_ops.slice(array_ops.shape(result), [1], [-1])
        ], 0))
    final_result.set_shape(
        tensor_shape.unknown_shape(
            (tensor_shape.Dimension(original_rank_dim) - 1).value).concatenate(
                result.get_shape()[1:]))
    return final_result
Example #48
0
    def __init__(self,
                 cell,
                 embedding,
                 start_tokens,
                 end_token,
                 initial_state,
                 beam_width,
                 output_layer=None,
                 length_penalty_weight=0.0,
                 coverage_penalty_weight=0.0,
                 reorder_tensor_arrays=True):
        """Initialize the BeamSearchDecoder.

    Args:
      cell: An `RNNCell` instance.
      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
        or the `params` argument for `embedding_lookup`.
      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
      end_token: `int32` scalar, the token that marks end of decoding.
      initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
      beam_width:  Python integer, the number of beams.
      output_layer: (Optional) An instance of `tf.layers.Layer`, i.e.,
        `tf.layers.Dense`.  Optional layer to apply to the RNN output prior
        to storing the result or sampling.
      length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
      coverage_penalty_weight: Float weight to penalize the coverage of source
        sentence. Disabled with 0.0.
      reorder_tensor_arrays: If `True`, `TensorArray`s' elements within the cell
        state will be reordered according to the beam search path. If the
        `TensorArray` can be reordered, the stacked form will be returned.
        Otherwise, the `TensorArray` will be returned as is. Set this flag to
        `False` if the cell state contains `TensorArray`s that are not amenable
        to reordering.

    Raises:
      TypeError: if `cell` is not an instance of `RNNCell`,
        or `output_layer` is not an instance of `tf.layers.Layer`.
      ValueError: If `start_tokens` is not a vector or
        `end_token` is not a scalar.
    """
        rnn_cell_impl.assert_like_rnncell("cell", cell)  # pylint: disable=protected-access
        if (output_layer is not None
                and not isinstance(output_layer, layers_base.Layer)):
            raise TypeError("output_layer must be a Layer, received: %s" %
                            type(output_layer))
        self._cell = cell
        self._output_layer = output_layer
        self._reorder_tensor_arrays = reorder_tensor_arrays

        if callable(embedding):
            self._embedding_fn = embedding
        else:
            self._embedding_fn = (
                lambda ids: embedding_ops.embedding_lookup(embedding, ids))

        self._start_tokens = ops.convert_to_tensor(start_tokens,
                                                   dtype=dtypes.int32,
                                                   name="start_tokens")
        if self._start_tokens.get_shape().ndims != 1:
            raise ValueError("start_tokens must be a vector")
        self._end_token = ops.convert_to_tensor(end_token,
                                                dtype=dtypes.int32,
                                                name="end_token")
        if self._end_token.get_shape().ndims != 0:
            raise ValueError("end_token must be a scalar")

        self._batch_size = array_ops.size(start_tokens)
        self._beam_width = beam_width
        self._length_penalty_weight = length_penalty_weight
        self._coverage_penalty_weight = coverage_penalty_weight
        self._initial_cell_state = nest.map_structure(
            self._maybe_split_batch_beams, initial_state,
            self._cell.state_size)
        self._start_tokens = array_ops.tile(
            array_ops.expand_dims(self._start_tokens, 1),
            [1, self._beam_width])
        self._start_inputs = self._embedding_fn(self._start_tokens)

        self._finished = array_ops.one_hot(array_ops.zeros([self._batch_size],
                                                           dtype=dtypes.int32),
                                           depth=self._beam_width,
                                           on_value=False,
                                           off_value=True,
                                           dtype=dtypes.bool)
Example #49
0
    def training_graph(self,
                       input_data,
                       input_labels,
                       random_seed,
                       data_spec,
                       sparse_features=None,
                       input_weights=None):
        """Constructs a TF graph for training a random tree.

    Args:
      input_data: A tensor or placeholder for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      random_seed: The random number generator seed to use for this tree.  0
        means use the current time as the seed.
      data_spec: A data_ops.TensorForestDataSpec object specifying the
        original feature/columns of the data.
      sparse_features: A tf.SparseTensor for sparse input data.
      input_weights: A float tensor or placeholder holding per-input weights,
        or None if all inputs are to be weighted equally.

    Returns:
      The last op in the random tree training graph.
    """
        epoch = math_ops.to_int32(get_epoch_variable())

        serialized_input_spec = data_spec.SerializeToString()

        if input_weights is None:
            input_weights = []

        if input_data is None:
            input_data = []

        sparse_indices = []
        sparse_values = []
        sparse_shape = []
        if sparse_features is not None:
            sparse_indices = sparse_features.indices
            sparse_values = sparse_features.values
            sparse_shape = sparse_features.dense_shape

        # Count extremely random stats.
        (node_sums, node_squares, splits_indices, splits_sums, splits_squares,
         totals_indices, totals_sums, totals_squares,
         input_leaves) = (tensor_forest_ops.count_extremely_random_stats(
             input_data,
             sparse_indices,
             sparse_values,
             sparse_shape,
             input_labels,
             input_weights,
             self.variables.tree,
             self.variables.tree_thresholds,
             self.variables.node_to_accumulator_map,
             self.variables.candidate_split_features,
             self.variables.candidate_split_thresholds,
             self.variables.start_epoch,
             epoch,
             input_spec=serialized_input_spec,
             num_classes=self.params.num_output_columns,
             regression=self.params.regression))
        node_update_ops = []
        node_update_ops.append(
            state_ops.assign_add(self.variables.node_sums, node_sums))

        splits_update_ops = []
        splits_update_ops.append(
            tensor_forest_ops.scatter_add_ndim(
                self.variables.candidate_split_sums, splits_indices,
                splits_sums))
        splits_update_ops.append(
            tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_sums,
                                               totals_indices, totals_sums))

        if self.params.regression:
            node_update_ops.append(
                state_ops.assign_add(self.variables.node_squares,
                                     node_squares))
            splits_update_ops.append(
                tensor_forest_ops.scatter_add_ndim(
                    self.variables.candidate_split_squares, splits_indices,
                    splits_squares))
            splits_update_ops.append(
                tensor_forest_ops.scatter_add_ndim(
                    self.variables.accumulator_squares, totals_indices,
                    totals_squares))

        # Sample inputs.
        update_indices, feature_updates, threshold_updates = (
            tensor_forest_ops.sample_inputs(
                input_data,
                sparse_indices,
                sparse_values,
                sparse_shape,
                input_weights,
                self.variables.node_to_accumulator_map,
                input_leaves,
                self.variables.candidate_split_features,
                self.variables.candidate_split_thresholds,
                input_spec=serialized_input_spec,
                split_initializations_per_input=(
                    self.params.split_initializations_per_input),
                split_sampling_random_seed=random_seed))
        update_features_op = state_ops.scatter_update(
            self.variables.candidate_split_features, update_indices,
            feature_updates)
        update_thresholds_op = state_ops.scatter_update(
            self.variables.candidate_split_thresholds, update_indices,
            threshold_updates)

        # Calculate finished nodes.
        with ops.control_dependencies(splits_update_ops):
            # Passing input_leaves to finished nodes here means that nodes that
            # have become stale won't be deallocated until an input reaches them,
            # because we're trying to avoid considering every fertile node for
            # performance reasons.
            finished, stale = tensor_forest_ops.finished_nodes(
                input_leaves,
                self.variables.node_to_accumulator_map,
                self.variables.candidate_split_sums,
                self.variables.candidate_split_squares,
                self.variables.accumulator_sums,
                self.variables.accumulator_squares,
                self.variables.start_epoch,
                epoch,
                num_split_after_samples=self.params.split_after_samples,
                min_split_samples=self.params.min_split_samples,
                dominate_method=self.params.dominate_method,
                dominate_fraction=self.params.dominate_fraction)

        # Update leaf scores.
        # TODO(thomaswc): Store the leaf scores in a TopN and only update the
        # scores of the leaves that were touched by this batch of input.
        children = array_ops.squeeze(array_ops.slice(self.variables.tree,
                                                     [0, 0], [-1, 1]),
                                     squeeze_dims=[1])
        is_leaf = math_ops.equal(constants.LEAF_NODE, children)
        leaves = math_ops.to_int32(
            array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1]))
        non_fertile_leaves = array_ops.boolean_mask(
            leaves,
            math_ops.less(
                array_ops.gather(self.variables.node_to_accumulator_map,
                                 leaves), 0))

        # TODO(gilberth): It should be possible to limit the number of non
        # fertile leaves we calculate scores for, especially since we can only take
        # at most array_ops.shape(finished)[0] of them.
        with ops.control_dependencies(node_update_ops):
            sums = array_ops.gather(self.variables.node_sums,
                                    non_fertile_leaves)
            if self.params.regression:
                squares = array_ops.gather(self.variables.node_squares,
                                           non_fertile_leaves)
                non_fertile_leaf_scores = self._variance(sums, squares)
            else:
                non_fertile_leaf_scores = self._weighted_gini(sums)

        # Calculate best splits.
        with ops.control_dependencies(splits_update_ops):
            split_indices = tensor_forest_ops.best_splits(
                finished,
                self.variables.node_to_accumulator_map,
                self.variables.candidate_split_sums,
                self.variables.candidate_split_squares,
                self.variables.accumulator_sums,
                self.variables.accumulator_squares,
                regression=self.params.regression)

        # Grow tree.
        with ops.control_dependencies(
            [update_features_op, update_thresholds_op]):
            (tree_update_indices, tree_children_updates,
             tree_threshold_updates, new_eot) = (tensor_forest_ops.grow_tree(
                 self.variables.end_of_tree,
                 self.variables.node_to_accumulator_map, finished,
                 split_indices, self.variables.candidate_split_features,
                 self.variables.candidate_split_thresholds))
            tree_update_op = state_ops.scatter_update(self.variables.tree,
                                                      tree_update_indices,
                                                      tree_children_updates)
            thresholds_update_op = state_ops.scatter_update(
                self.variables.tree_thresholds, tree_update_indices,
                tree_threshold_updates)
            # TODO(thomaswc): Only update the epoch on the new leaves.
            new_epoch_updates = epoch * array_ops.ones_like(
                tree_threshold_updates, dtype=dtypes.int32)
            epoch_update_op = state_ops.scatter_update(
                self.variables.start_epoch, tree_update_indices,
                new_epoch_updates)

        # Update fertile slots.
        with ops.control_dependencies([tree_update_op]):
            (n2a_map_updates, a2n_map_updates, accumulators_cleared,
             accumulators_allocated) = (tensor_forest_ops.update_fertile_slots(
                 finished,
                 non_fertile_leaves,
                 non_fertile_leaf_scores,
                 self.variables.end_of_tree,
                 self.variables.accumulator_sums,
                 self.variables.node_to_accumulator_map,
                 stale,
                 self.variables.node_sums,
                 regression=self.params.regression))

        # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has
        # used it to calculate new leaves.
        gated_new_eot, = control_flow_ops.tuple(
            [new_eot], control_inputs=[n2a_map_updates])
        eot_update_op = state_ops.assign(self.variables.end_of_tree,
                                         gated_new_eot)

        updates = []
        updates.append(eot_update_op)
        updates.append(tree_update_op)
        updates.append(thresholds_update_op)
        updates.append(epoch_update_op)

        updates.append(
            state_ops.scatter_update(self.variables.node_to_accumulator_map,
                                     n2a_map_updates[0], n2a_map_updates[1]))

        updates.append(
            state_ops.scatter_update(self.variables.accumulator_to_node_map,
                                     a2n_map_updates[0], a2n_map_updates[1]))

        cleared_and_allocated_accumulators = array_ops.concat_v2(
            [accumulators_cleared, accumulators_allocated], 0)

        # Calculate values to put into scatter update for candidate counts.
        # Candidate split counts are always reset back to 0 for both cleared
        # and allocated accumulators. This means some accumulators might be doubly
        # reset to 0 if the were released and not allocated, then later allocated.
        split_values = array_ops.tile(
            array_ops.expand_dims(
                array_ops.expand_dims(
                    array_ops.zeros_like(cleared_and_allocated_accumulators,
                                         dtype=dtypes.float32), 1), 2),
            [
                1, self.params.num_splits_to_consider,
                self.params.num_output_columns
            ])
        updates.append(
            state_ops.scatter_update(self.variables.candidate_split_sums,
                                     cleared_and_allocated_accumulators,
                                     split_values))
        if self.params.regression:
            updates.append(
                state_ops.scatter_update(
                    self.variables.candidate_split_squares,
                    cleared_and_allocated_accumulators, split_values))

        # Calculate values to put into scatter update for total counts.
        total_cleared = array_ops.tile(
            array_ops.expand_dims(
                math_ops.negative(
                    array_ops.ones_like(accumulators_cleared,
                                        dtype=dtypes.float32)), 1),
            [1, self.params.num_output_columns])
        total_reset = array_ops.tile(
            array_ops.expand_dims(
                array_ops.zeros_like(accumulators_allocated,
                                     dtype=dtypes.float32), 1),
            [1, self.params.num_output_columns])
        accumulator_updates = array_ops.concat_v2([total_cleared, total_reset],
                                                  0)
        updates.append(
            state_ops.scatter_update(self.variables.accumulator_sums,
                                     cleared_and_allocated_accumulators,
                                     accumulator_updates))
        if self.params.regression:
            updates.append(
                state_ops.scatter_update(self.variables.accumulator_squares,
                                         cleared_and_allocated_accumulators,
                                         accumulator_updates))

        # Calculate values to put into scatter update for candidate splits.
        split_features_updates = array_ops.tile(
            array_ops.expand_dims(
                math_ops.negative(
                    array_ops.ones_like(cleared_and_allocated_accumulators)),
                1), [1, self.params.num_splits_to_consider])
        updates.append(
            state_ops.scatter_update(self.variables.candidate_split_features,
                                     cleared_and_allocated_accumulators,
                                     split_features_updates))

        updates += self.finish_iteration()

        return control_flow_ops.group(*updates)
Example #50
0
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      top_k=None,
                                      class_id=None,
                                      sample_weight=None,
                                      multi_label=False,
                                      label_weights=None):
    """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value, float tensor, python list, or tuple of float
      thresholds in `[0, 1]`, or NEG_INF (used when top_k is set).
    top_k: Optional int, indicates that the positive labels should be limited to
      the top k predictions.
    class_id: Optional int, limits the prediction and labels to the class
      specified by this argument.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).
    multi_label: Optional boolean indicating whether multidimensional
      prediction/labels should be treated as multilabel responses, or flattened
      into a single label. When True, the valus of `variables_to_update` must
      have a second dimension equal to the number of labels in y_true and
      y_pred, and those tensors must not be RaggedTensors.
    label_weights: (optional) tensor of non-negative weights for multilabel
      data. The weights are applied when calculating TP, FP, FN, and TN without
      explicit multilabel handling (i.e. when the data is to be flattened).

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
    if multi_label and label_weights is not None:
        raise ValueError(
            '`label_weights` for multilabel data should be handled '
            'outside of `update_confusion_matrix_variables` when '
            '`multi_label` is True.')
    if variables_to_update is None:
        return
    if not any(key
               for key in variables_to_update if key in list(ConfusionMatrix)):
        raise ValueError(
            'Please provide at least one valid confusion matrix '
            'variable to update. Valid variable key options are: "{}". '
            'Received: "{}"'.format(list(ConfusionMatrix),
                                    variables_to_update.keys()))

    variable_dtype = list(variables_to_update.values())[0].dtype

    y_true = math_ops.cast(y_true, dtype=variable_dtype)
    y_pred = math_ops.cast(y_pred, dtype=variable_dtype)
    thresholds = ops.convert_to_tensor_v2_with_dispatch(thresholds,
                                                        dtype=variable_dtype)
    num_thresholds = thresholds.shape[0]
    if multi_label:
        one_thresh = math_ops.equal(math_ops.cast(1, dtype=dtypes.int32),
                                    array_ops.rank(thresholds),
                                    name='one_set_of_thresholds_cond')
    else:
        [y_pred, y_true
         ], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true],
                                                             sample_weight)
        one_thresh = math_ops.cast(True, dtype=dtypes.bool)

    invalid_keys = [
        key for key in variables_to_update if key not in list(ConfusionMatrix)
    ]
    if invalid_keys:
        raise ValueError(
            'Invalid keys: {}. Valid variable key options are: "{}"'.format(
                invalid_keys, list(ConfusionMatrix)))

    with ops.control_dependencies([
            check_ops.assert_greater_equal(y_pred,
                                           math_ops.cast(0.0,
                                                         dtype=y_pred.dtype),
                                           message='predictions must be >= 0'),
            check_ops.assert_less_equal(y_pred,
                                        math_ops.cast(1.0, dtype=y_pred.dtype),
                                        message='predictions must be <= 1')
    ]):
        if sample_weight is None:
            y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(
                y_pred, y_true)
        else:
            sample_weight = math_ops.cast(sample_weight, dtype=variable_dtype)
            y_pred, y_true, sample_weight = (
                losses_utils.squeeze_or_expand_dimensions(
                    y_pred, y_true, sample_weight=sample_weight))
    y_pred.shape.assert_is_compatible_with(y_true.shape)

    if top_k is not None:
        y_pred = _filter_top_k(y_pred, top_k)
    if class_id is not None:
        y_true = y_true[..., class_id]
        y_pred = y_pred[..., class_id]

    pred_shape = array_ops.shape(y_pred)
    num_predictions = pred_shape[0]
    if y_pred.shape.ndims == 1:
        num_labels = 1
    else:
        num_labels = gen_math_ops.Prod(input=pred_shape[1:], axis=0)
    thresh_label_tile = control_flow_ops.cond(
        one_thresh, lambda: num_labels,
        lambda: math_ops.cast(1, dtype=dtypes.int32))

    # Reshape predictions and labels, adding a dim for thresholding.
    if multi_label:
        predictions_extra_dim = array_ops.expand_dims(y_pred, 0)
        labels_extra_dim = array_ops.expand_dims(
            math_ops.cast(y_true, dtype=dtypes.bool), 0)
    else:
        # Flatten predictions and labels when not multilabel.
        predictions_extra_dim = array_ops.reshape(y_pred, [1, -1])
        labels_extra_dim = array_ops.reshape(
            math_ops.cast(y_true, dtype=dtypes.bool), [1, -1])

    # Tile the thresholds for every prediction.
    if multi_label:
        thresh_pretile_shape = [num_thresholds, 1, -1]
        thresh_tiles = [1, num_predictions, thresh_label_tile]
        data_tiles = [num_thresholds, 1, 1]
    else:
        thresh_pretile_shape = [num_thresholds, -1]
        thresh_tiles = [1, num_predictions * num_labels]
        data_tiles = [num_thresholds, 1]

    thresh_tiled = array_ops.tile(
        array_ops.reshape(thresholds, thresh_pretile_shape),
        array_ops.stack(thresh_tiles))

    # Tile the predictions for every threshold.
    preds_tiled = array_ops.tile(predictions_extra_dim, data_tiles)

    # Compare predictions and threshold.
    pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled)

    # Tile labels by number of thresholds
    label_is_pos = array_ops.tile(labels_extra_dim, data_tiles)

    if sample_weight is not None:
        sample_weight = weights_broadcast_ops.broadcast_weights(
            math_ops.cast(sample_weight, dtype=variable_dtype), y_pred)
        weights_tiled = array_ops.tile(
            array_ops.reshape(sample_weight, thresh_tiles), data_tiles)
    else:
        weights_tiled = None

    if label_weights is not None and not multi_label:
        label_weights = array_ops.expand_dims(label_weights, 0)
        label_weights = weights_broadcast_ops.broadcast_weights(
            label_weights, y_pred)
        label_weights_tiled = array_ops.tile(
            array_ops.reshape(label_weights, thresh_tiles), data_tiles)
        if weights_tiled is None:
            weights_tiled = label_weights_tiled
        else:
            weights_tiled = math_ops.multiply(weights_tiled,
                                              label_weights_tiled)

    update_ops = []

    def weighted_assign_add(label, pred, weights, var):
        label_and_pred = math_ops.cast(math_ops.logical_and(label, pred),
                                       dtype=var.dtype)
        if weights is not None:
            label_and_pred *= math_ops.cast(weights, dtype=var.dtype)
        return var.assign_add(math_ops.reduce_sum(label_and_pred, 1))

    loop_vars = {
        ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
    }
    update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
    update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
    update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

    if update_fn or update_tn:
        pred_is_neg = math_ops.logical_not(pred_is_pos)
        loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos,
                                                      pred_is_neg)

    if update_fp or update_tn:
        label_is_neg = math_ops.logical_not(label_is_pos)
        loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg,
                                                      pred_is_pos)
        if update_tn:
            loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg,
                                                         pred_is_neg)

    for matrix_cond, (label, pred) in loop_vars.items():

        if matrix_cond in variables_to_update:
            update_ops.append(
                weighted_assign_add(label, pred, weights_tiled,
                                    variables_to_update[matrix_cond]))

    return control_flow_ops.group(update_ops)
Example #51
0
    def create_estimator_spec(self,
                              features,
                              mode,
                              logits,
                              labels=None,
                              train_op_fn=None):
        """See `Head`."""
        with ops.name_scope('head'):
            logits = _check_logits(logits, self.logits_dimension)

            # Predict.
            pred_keys = prediction_keys.PredictionKeys
            with ops.name_scope(None, 'predictions', (logits, )):
                # class_ids's shape is [batch_size]
                class_ids = math_ops.argmax(logits,
                                            1,
                                            name=pred_keys.CLASS_IDS)
                class_ids = array_ops.expand_dims(class_ids, axis=(1, ))
                if self._label_vocabulary:
                    table = lookup_ops.index_to_string_table_from_tensor(
                        vocabulary_list=self._label_vocabulary,
                        name='class_string_lookup')
                    classes = table.lookup(class_ids)
                else:
                    classes = string_ops.as_string(class_ids,
                                                   name='str_classes')

                probabilities = nn.softmax(logits,
                                           name=pred_keys.PROBABILITIES)
                predictions = {
                    pred_keys.LOGITS: logits,
                    pred_keys.PROBABILITIES: probabilities,
                    # Expand to [batch_size, 1]
                    pred_keys.CLASS_IDS: class_ids,
                    pred_keys.CLASSES: classes,
                }
            if mode == model_fn.ModeKeys.PREDICT:
                batch_size = array_ops.shape(probabilities)[0]
                export_class_list = self._label_vocabulary
                if not export_class_list:
                    export_class_list = string_ops.as_string(
                        math_ops.range(self._n_classes))
                export_output_classes = array_ops.tile(
                    input=array_ops.expand_dims(input=export_class_list,
                                                axis=0),
                    multiples=[batch_size, 1])
                return model_fn.EstimatorSpec(
                    mode=model_fn.ModeKeys.PREDICT,
                    predictions=predictions,
                    export_outputs={
                        '':
                        export_output.ClassificationOutput(
                            scores=probabilities,
                            # `ClassificationOutput` requires string classes.
                            classes=export_output_classes)
                    })

            # Eval.
            unweighted_loss, label_ids = self.create_loss(features=features,
                                                          mode=mode,
                                                          logits=logits,
                                                          labels=labels)
            weights = _weights(features, self._weight_column)
            training_loss = losses.compute_weighted_loss(
                unweighted_loss,
                weights=weights,
                reduction=losses.Reduction.SUM)
            if mode == model_fn.ModeKeys.EVAL:
                return model_fn.EstimatorSpec(
                    mode=model_fn.ModeKeys.EVAL,
                    predictions=predictions,
                    loss=training_loss,
                    eval_metric_ops=self._eval_metric_ops(
                        labels=label_ids,
                        probabilities=probabilities,
                        logits=logits,
                        class_ids=class_ids,
                        unweighted_loss=unweighted_loss,
                        weights=weights))

            # Train.
            if train_op_fn is None:
                raise ValueError('train_op_fn can not be None.')
        with ops.name_scope(''):
            summary.scalar(
                _summary_key(self._head_name, metric_keys.MetricKeys.LOSS),
                training_loss)
            summary.scalar(
                _summary_key(self._head_name,
                             metric_keys.MetricKeys.LOSS_MEAN),
                losses.compute_weighted_loss(unweighted_loss,
                                             weights=weights,
                                             reduction=losses.Reduction.MEAN))
        return model_fn.EstimatorSpec(mode=model_fn.ModeKeys.TRAIN,
                                      predictions=predictions,
                                      loss=training_loss,
                                      train_op=train_op_fn(training_loss))
def _MatrixSquareRootGrad(op, grad):
    """Gradient for MatrixSquareRoot."""

    # Let A be an m x m square matrix (or batch of matrices)
    # Let R = sqrtm(A)
    # By definition, A = RR
    # Take the differential: dA = d(RR) = RdR + dRR
    # Solve the resulting Sylvester equation for dR

    # Used to find Kronecker products within the Sylvester equation
    def _KroneckerProduct(b1, b2):
        """Computes the Kronecker product of two batches of square matrices"""
        b1_shape = array_ops.shape(b1)
        b2_shape = array_ops.shape(b2)
        b1_order = b1_shape[-1]
        b2_order = b2_shape[-1]

        shape_slice_size = [math_ops.subtract(array_ops.size(b1_shape), 2)]
        shape_slice = array_ops.slice(
            b1_shape, [0], shape_slice_size)  # Same for both batches
        b1_reshape_shape = array_ops.concat(
            [shape_slice, [b1_order], [1], [b1_order], [1]], 0)
        b2_reshape_shape = array_ops.concat(
            [shape_slice, [1], [b2_order], [1], [b2_order]], 0)

        b1_reshape = array_ops.reshape(b1, b1_reshape_shape)
        b2_reshape = array_ops.reshape(b2, b2_reshape_shape)

        order_prod = b1_order * b2_order
        kprod_shape = array_ops.concat(
            [shape_slice, [order_prod], [order_prod]], 0)
        return array_ops.reshape(b1_reshape * b2_reshape, kprod_shape)

    sqrtm = op.outputs[0]  # R
    shape = array_ops.shape(sqrtm)
    order = shape[-1]  # m
    matrix_count = math_ops.reduce_prod(shape[0:-2])

    # Get batch of m x m identity matrices
    eye = linalg_ops.eye(order, dtype=sqrtm.dtype)  # m x m identity matrix
    eye_flat = array_ops.reshape(eye, [-1])
    eye_tiled = array_ops.tile(eye_flat, [matrix_count])
    eye_batch = array_ops.reshape(eye_tiled, shape)

    # The transpose of R is taken in the k1 term instead of k2 in
    # order to prevent redundant transposition of R (i.e. (R')' = R)
    sqrtm_transpose = array_ops.matrix_transpose(sqrtm)
    k1 = _KroneckerProduct(eye_batch, sqrtm_transpose)
    k2 = _KroneckerProduct(sqrtm, eye_batch)
    ksum = math_ops.add(k1, k2)

    # Vectorize dA
    shape_slice_size = [math_ops.subtract(array_ops.size(shape), 2)]
    shape_slice = array_ops.slice(shape, [0], shape_slice_size)
    shape_vec_da = array_ops.concat([shape_slice, [order * order], [1]], 0)
    vec_da = array_ops.reshape(array_ops.matrix_transpose(grad), shape_vec_da)

    # Solve for vec(dR)
    vec_dsqrtm = linalg_ops.matrix_solve(ksum, vec_da)

    # Solve for dR by inverse vectorizing vec(dR)
    dsqrtm_transpose = array_ops.reshape(vec_dsqrtm, shape)
    return array_ops.matrix_transpose(dsqrtm_transpose)
def safe_embedding_lookup_sparse(
    embedding_weights,
    sparse_ids,
    sparse_weights=None,
    combiner="mean",
    default_id=None,
    name="safe_embedding_lookup_sparse",
    partition_strategy=None,  # no used
    max_norm=None,
    return_trainable=False,
):
    """Provides a dynamic version of `tf.nn.safe_embedding_lookup_sparse`.

    Lookup embedding results, accounting for empty features and invalid weights.

    Any IDs will be treated as valid include non-positive IDs.
    Invalid weights (<= 0) are pruned from input weights, as well as any IDs
    with non-positive weight. For an entry with no features, the embedding vector
    for `default_id` is returned, or the 0-vector if `default_id` is not supplied.

    The ids and weights may be multi-dimensional. Embeddings are always aggregated
    along the last dimension.

    Args:
      embedding_weights: A single `dynamic_embedding.Variable` instance
        representing the complete embedding tensor.
      sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the
        ids. `d_0` is typically batch size.
      sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
        float weights corresponding to `sparse_ids`, or `None` if all weights are
        be assumed to be 1.0.
      combiner: A string specifying how to combine embedding results for each
        entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the
        default.
      default_id: The id to use for an entry with no features.
      name: A name for this operation (optional).
      partition_strategy: A string specifying the partitioning strategy. Currently
        `"div"` and `"mod"` are supported. Default is `"div"`.
      max_norm: If not `None`, all embeddings are l2-normalized to max_norm before
        combining.

    Returns:
      combined_embeddings:
        A dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`.
      trainable_wrap:
        A TrainableWrapper object used to fill the Optimizers `var_list`
          Only provided if `return_trainable` is True.

    Raises:
      ValueError: if `embedding_weights` is empty.
  """
    if embedding_weights is None:
        raise ValueError("Missing embedding_weights %s." % embedding_weights)

    if embedding_weights.key_dtype != sparse_ids.dtype:
        raise TypeError(
            "embedding_weights.key_dtype should be same with sparse_ids.dtype: "
            "{} vs. {}".format(embedding_weights.key_dtype, sparse_ids.dtype))

    weights_dtype = sparse_weights.dtype if sparse_weights is not None else None
    if weights_dtype and embedding_weights.value_dtype != weights_dtype:
        raise TypeError(
            "embedding_weights.value_dtype should be same with sparse_weights.dtype"
            ": {} vs. {}".format(embedding_weights.value_dtype, weights_dtype))

    scope = variable_scope.get_variable_scope()
    full_name = scope.name + "/" + name if scope.name else name
    with ops.name_scope(full_name + "/"):
        # Reshape higher-rank sparse ids and weights to linear segment ids.
        original_shape = sparse_ids.dense_shape
        original_rank_dim = tensor_shape.dimension_value(
            sparse_ids.dense_shape.get_shape()[0])
        original_rank = (array_ops.size(original_shape)
                         if original_rank_dim is None else original_rank_dim)
        sparse_ids = sparse_ops.sparse_reshape(
            sparse_ids,
            [
                math_ops.reduce_prod(
                    array_ops.slice(original_shape, [0], [original_rank - 1])),
                array_ops.gather(original_shape, original_rank - 1),
            ],
        )
        if sparse_weights is not None:
            sparse_weights = sparse_tensor.SparseTensor(
                sparse_ids.indices, sparse_weights.values,
                sparse_ids.dense_shape)

        # Prune invalid weights.
        if combiner != "sum":
            sparse_ids, sparse_weights = _prune_invalid_weights(
                sparse_ids, sparse_weights)

        # Fill in dummy values for empty features, if necessary.
        sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
            sparse_ids, default_id or 0)
        if sparse_weights is not None:
            sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(
                sparse_weights, 1.0)

        result, trainable_ = embedding_lookup_sparse(
            embedding_weights,
            sparse_ids,
            sparse_weights,
            combiner=combiner,
            partition_strategy=partition_strategy,
            name=name + "/embedding_lookup_sparse",
            max_norm=max_norm,
            return_trainable=True,
        )

        if default_id is None:
            # Broadcast is_row_empty to the same shape as embedding_lookup_result,
            # for use in Select.
            is_row_empty = array_ops.tile(
                array_ops.reshape(is_row_empty, [-1, 1]),
                array_ops.stack([1, array_ops.shape(result)[1]]),
            )

            result = array_ops.where(is_row_empty,
                                     array_ops.zeros_like(result),
                                     result,
                                     name="where")

        # Reshape back from linear ids back into higher-dimensional dense result.
        final_result = array_ops.reshape(
            result,
            array_ops.concat(
                [
                    array_ops.slice(
                        math_ops.cast(original_shape, dtypes.int32),
                        [0],
                        [original_rank - 1],
                    ),
                    array_ops.slice(array_ops.shape(result), [1], [-1]),
                ],
                0,
            ),
        )
        final_result.set_shape(
            tensor_shape.unknown_shape(
                (tensor_shape.Dimension(original_rank_dim) -
                 1).value).concatenate(result.get_shape()[1:]))
        return (final_result, trainable_) if return_trainable else final_result
Example #54
0
 def loop_fn(i):
   x1 = array_ops.gather(x, i)
   return array_ops.tile(x1, [i, 1])
Example #55
0
def batch_matrix_pow(matrices, powers):
    """Compute powers of matrices, e.g. A^3 = matmul(matmul(A, A), A).

  Uses exponentiation by squaring, with O(log(p)) matrix multiplications to
  compute A^p.

  Args:
    matrices: [batch size x N x N]
    powers: Which integer power to raise each matrix to [batch size]
  Returns:
    The matrices raised to their respective powers, same dimensions as the
    "matrices" argument.
  """
    def terminate_when_all_zero(current_argument, residual_powers,
                                accumulator):
        del current_argument, accumulator  # not used for condition
        do_exit = math_ops.reduce_any(
            math_ops.greater(residual_powers,
                             array_ops.ones_like(residual_powers)))
        return do_exit

    def do_iteration(current_argument, residual_powers, accumulator):
        """Compute one step of iterative exponentiation by squaring.

    The recursive form is:
      power(A, p) = { power(matmul(A, A), p / 2) for even p
                    { matmul(A, power(matmul(A, A), (p - 1) / 2)) for odd p
      power(A, 0) = I

    The power(A, 0) = I case is handeled by starting with accumulator set to the
    identity matrix; matrices with zero residual powers are passed through
    unchanged.

    Args:
      current_argument: On this step, what is the first argument (A^2..^2) to
          the (unrolled) recursive function? [batch size x N x N]
      residual_powers: On this step, what is the second argument (residual p)?
          [batch_size]
      accumulator: Accumulates the exterior multiplications from the odd
          powers (initially the identity matrix). [batch_size x N x N]
    Returns:
      Updated versions of each argument for one step of the unrolled
      computation. Does not change parts of the batch which have a residual
      power of zero.
    """
        is_even = math_ops.equal(
            residual_powers % 2,
            array_ops.zeros(array_ops.shape(residual_powers),
                            dtype=dtypes.int32))
        new_accumulator = array_ops.where(
            is_even, accumulator, math_ops.matmul(accumulator,
                                                  current_argument))
        new_argument = math_ops.matmul(current_argument, current_argument)
        do_update = math_ops.greater(residual_powers, 1)
        new_residual_powers = residual_powers - residual_powers % 2
        new_residual_powers //= 2
        # Stop updating if we've reached our base case; some batch elements may
        # finish sooner than others
        accumulator = array_ops.where(do_update, new_accumulator, accumulator)
        current_argument = array_ops.where(do_update, new_argument,
                                           current_argument)
        residual_powers = array_ops.where(do_update, new_residual_powers,
                                          residual_powers)
        return (current_argument, residual_powers, accumulator)

    matrices = ops.convert_to_tensor(matrices)
    powers = math_ops.cast(powers, dtype=dtypes.int32)
    ident = array_ops.expand_dims(
        array_ops.diag(
            array_ops.ones([array_ops.shape(matrices)[1]],
                           dtype=matrices.dtype)), 0)
    ident_tiled = array_ops.tile(ident, [array_ops.shape(matrices)[0], 1, 1])
    (final_argument,
     final_residual_power, final_accumulator) = control_flow_ops.while_loop(
         terminate_when_all_zero, do_iteration,
         [matrices, powers, ident_tiled])
    return array_ops.where(
        math_ops.equal(
            final_residual_power,
            array_ops.zeros_like(final_residual_power, dtype=dtypes.int32)),
        ident_tiled, math_ops.matmul(final_argument, final_accumulator))
Example #56
0
    def _sample_n(self, n, seed=None):
        with ops.control_dependencies(self._assertions):
            n = ops.convert_to_tensor(n, name="n")
            static_n = tensor_util.constant_value(n)
            n = int(static_n) if static_n is not None else n
            cat_samples = self.cat.sample_n(n, seed=seed)

            static_samples_shape = cat_samples.get_shape()
            if static_samples_shape.is_fully_defined():
                samples_shape = static_samples_shape.as_list()
                samples_size = static_samples_shape.num_elements()
            else:
                samples_shape = array_ops.shape(cat_samples)
                samples_size = array_ops.size(cat_samples)
            static_batch_shape = self.get_batch_shape()
            if static_batch_shape.is_fully_defined():
                batch_shape = static_batch_shape.as_list()
                batch_size = static_batch_shape.num_elements()
            else:
                batch_shape = self.batch_shape()
                batch_size = array_ops.reduce_prod(batch_shape)
            static_event_shape = self.get_event_shape()
            if static_event_shape.is_fully_defined():
                event_shape = np.array(static_event_shape.as_list(),
                                       dtype=np.int32)
            else:
                event_shape = self.event_shape()

            # Get indices into the raw cat sampling tensor.  We will
            # need these to stitch sample values back out after sampling
            # within the component partitions.
            samples_raw_indices = array_ops.reshape(
                math_ops.range(0, samples_size), samples_shape)

            # Partition the raw indices so that we can use
            # dynamic_stitch later to reconstruct the samples from the
            # known partitions.
            partitioned_samples_indices = data_flow_ops.dynamic_partition(
                data=samples_raw_indices,
                partitions=cat_samples,
                num_partitions=self.num_components)

            # Copy the batch indices n times, as we will need to know
            # these to pull out the appropriate rows within the
            # component partitions.
            batch_raw_indices = array_ops.reshape(
                array_ops.tile(math_ops.range(0, batch_size), [n]),
                samples_shape)

            # Explanation of the dynamic partitioning below:
            #   batch indices are i.e., [0, 1, 0, 1, 0, 1]
            # Suppose partitions are:
            #     [1 1 0 0 1 1]
            # After partitioning, batch indices are cut as:
            #     [batch_indices[x] for x in 2, 3]
            #     [batch_indices[x] for x in 0, 1, 4, 5]
            # i.e.
            #     [1 1] and [0 0 0 0]
            # Now we sample n=2 from part 0 and n=4 from part 1.
            # For part 0 we want samples from batch entries 1, 1 (samples 0, 1),
            # and for part 1 we want samples from batch entries 0, 0, 0, 0
            #   (samples 0, 1, 2, 3).
            partitioned_batch_indices = data_flow_ops.dynamic_partition(
                data=batch_raw_indices,
                partitions=cat_samples,
                num_partitions=self.num_components)
            samples_class = [None for _ in range(self.num_components)]

            for c in range(self.num_components):
                n_class = array_ops.size(partitioned_samples_indices[c])
                seed = distribution_util.gen_new_seed(seed, "mixture")
                samples_class_c = self.components[c].sample_n(n_class,
                                                              seed=seed)

                # Pull out the correct batch entries from each index.
                # To do this, we may have to flatten the batch shape.

                # For sample s, batch element b of component c, we get the
                # partitioned batch indices from
                # partitioned_batch_indices[c]; and shift each element by
                # the sample index.  The final lookup can be thought of as
                # a matrix gather along locations (s, b) in
                # samples_class_c where the n_class rows correspond to
                # samples within this component and the batch_size columns
                # correspond to batch elements within the component.
                #
                # Thus the lookup index is
                #   lookup[c, i] = batch_size * s[i] + b[c, i]
                # for i = 0 ... n_class[c] - 1.
                lookup_partitioned_batch_indices = (
                    batch_size * math_ops.range(n_class) +
                    partitioned_batch_indices[c])
                samples_class_c = array_ops.reshape(
                    samples_class_c,
                    array_ops.concat_v2(([n_class * batch_size], event_shape),
                                        0))
                samples_class_c = array_ops.gather(
                    samples_class_c,
                    lookup_partitioned_batch_indices,
                    name="samples_class_c_gather")
                samples_class[c] = samples_class_c

            # Stitch back together the samples across the components.
            lhs_flat_ret = data_flow_ops.dynamic_stitch(
                indices=partitioned_samples_indices, data=samples_class)
            # Reshape back to proper sample, batch, and event shape.
            ret = array_ops.reshape(
                lhs_flat_ret,
                array_ops.concat_v2((samples_shape, self.event_shape()), 0))
            ret.set_shape(
                tensor_shape.TensorShape(static_samples_shape).concatenate(
                    self.get_event_shape()))
            return ret
Example #57
0
def matrix_to_powers(matrix, powers):
    """Raise a single matrix to multiple powers."""
    matrix_tiled = array_ops.tile(array_ops.expand_dims(matrix, 0),
                                  [array_ops.size(powers), 1, 1])
    return batch_matrix_pow(matrix_tiled, powers)
Example #58
0
 def initialize(self, name=None):
     with ops.name_scope(name, "TrainingHelperInitialize"):
         finished = array_ops.tile([False], [self._batch_size])
         all_finished = math_ops.reduce_all(finished)
         next_inputs = self._embedding_fn(self._input_tas.read(0))
         return (finished, next_inputs)
Example #59
0
def _find_loss_augmented_facility_idx(pairwise_distances, labels, chosen_ids,
                                      candidate_ids, margin_multiplier,
                                      margin_type):
  """Find the next centroid that maximizes the loss augmented inference.

  This function is a subroutine called from compute_augmented_facility_locations

  Args:
    pairwise_distances: 2-D Tensor of pairwise distances.
    labels: 1-D Tensor of ground truth cluster assignment.
    chosen_ids: 1-D Tensor of current centroid indices.
    candidate_ids: 1-D Tensor of candidate indices.
    margin_multiplier: multiplication constant.
    margin_type: Type of structured margin to use. Default is nmi.

  Returns:
    integer index.
  """
  num_candidates = array_ops.shape(candidate_ids)[0]

  pairwise_distances_chosen = array_ops.gather(pairwise_distances, chosen_ids)
  pairwise_distances_candidate = array_ops.gather(
      pairwise_distances, candidate_ids)
  pairwise_distances_chosen_tile = array_ops.tile(
      pairwise_distances_chosen, [1, num_candidates])

  candidate_scores = -1.0 * math_ops.reduce_sum(
      array_ops.reshape(
          math_ops.reduce_min(
              array_ops.concat([
                  pairwise_distances_chosen_tile,
                  array_ops.reshape(pairwise_distances_candidate, [1, -1])
              ], 0),
              axis=0,
              keep_dims=True), [num_candidates, -1]),
      axis=1)

  nmi_scores = array_ops.zeros([num_candidates])
  iteration = array_ops.constant(0)

  def func_cond(iteration, nmi_scores):
    del nmi_scores  # Unused in func_cond()
    return iteration < num_candidates

  def func_body(iteration, nmi_scores):
    predictions = get_cluster_assignment(
        pairwise_distances,
        array_ops.concat([chosen_ids, [candidate_ids[iteration]]], 0))
    nmi_score_i = compute_clustering_score(labels, predictions, margin_type)
    pad_before = array_ops.zeros([iteration])
    pad_after = array_ops.zeros([num_candidates - 1 - iteration])
    # return 1 - NMI score as the structured loss.
    #   because NMI is higher the better [0,1].
    return iteration + 1, nmi_scores + array_ops.concat(
        [pad_before, [1.0 - nmi_score_i], pad_after], 0)

  _, nmi_scores = control_flow_ops.while_loop(
      func_cond, func_body, [iteration, nmi_scores])

  candidate_scores = math_ops.add(
      candidate_scores, margin_multiplier * nmi_scores)

  argmax_index = math_ops.to_int32(
      math_ops.argmax(candidate_scores, dimension=0))

  return candidate_ids[argmax_index]
Example #60
0
def triplet_loss_adapted_from_tf(y_true, y_pred):
    del y_true
    margin = 1.
    labels = y_pred[:, :1]

    labels = tf.cast(labels, dtype='int32')

    embeddings = y_pred[:, 1:]

    ### Code from Tensorflow function [tf.contrib.losses.metric_learning.triplet_semihard_loss] starts here:

    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    # lshape=array_ops.shape(labels)
    # assert lshape.shape == 1
    # labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    # global batch_size
    batch_size = array_ops.size(labels)  # was 'array_ops.size(labels)'

    # Compute the mask.
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32),
                                1,
                                keepdims=True), 0.0), [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(mask_final, negatives_outside,
                                          negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    semi_hard_triplet_loss_distance = math_ops.truediv(
        math_ops.reduce_sum(
            math_ops.maximum(math_ops.multiply(loss_mat, mask_positives),
                             0.0)),
        num_positives,
        name='triplet_semihard_loss')

    ### Code from Tensorflow function semi-hard triplet loss ENDS here.
    return semi_hard_triplet_loss_distance