Exemple #1
0
def _get_examples(file_name_queue, reader, num_threads, read_batch_size,
                  filter_fn, parse_fn):
  with ops.name_scope('read'):
    example_list = []
    for _ in range(num_threads):
      if read_batch_size > 1:
        keys, examples_proto = reader().read_up_to(file_name_queue,
                                                   read_batch_size)
      else:
        keys, examples_proto = reader().read(file_name_queue)
      if filter_fn:
        mask = filter_fn(keys, examples_proto)
        keys = array_ops.boolean_mask(keys, mask)
        examples_proto = array_ops.boolean_mask(examples_proto, mask)
      if parse_fn:
        parsed_examples = parse_fn(examples_proto)
        # Map keys into example map because batch_join doesn't support
        # tuple of Tensor + dict.
        if isinstance(parsed_examples, dict):
          parsed_examples[KEY_FEATURE_NAME] = keys
          example_list.append(parsed_examples)
        else:
          example_list.append((keys, parsed_examples))
      else:
        example_list.append((keys, examples_proto))
    return example_list
Exemple #2
0
def dense_labels_to_sparse(dense, length):
  """Convert dense labels with sequence lengths to sparse tensor.

  Args:
    dense: tensor of shape [batch, max_length]
    length: int tensor of shape [batch]
      The length of each sequence in dense.

  Returns:
    tf.SparseTensor with values only for the valid elements of sequences.
  """

  flat_values = array_ops.reshape(dense, [-1])
  flat_indices = math_ops.range(
      array_ops.shape(flat_values, out_type=dtypes.int64)[0])
  mask = array_ops.sequence_mask(length, maxlen=array_ops.shape(dense)[1])
  flat_mask = array_ops.reshape(mask, [-1])
  indices = array_ops.expand_dims(
      array_ops.boolean_mask(flat_indices, flat_mask), 1)
  values = array_ops.boolean_mask(flat_values, flat_mask)
  sparse = sparse_tensor.SparseTensor(
      indices=indices, values=math_ops.cast(values, dtypes.int32),
      dense_shape=array_ops.shape(flat_values, out_type=dtypes.int64))
  reshaped = sparse_ops.sparse_reshape(sparse, array_ops.shape(dense))
  max_length = math_ops.reduce_max(length)
  return sparse_tensor.SparseTensor(
      indices=reshaped.indices,
      values=reshaped.values,
      dense_shape=[
          math_ops.cast(reshaped.dense_shape[0], dtypes.int64),
          math_ops.cast(max_length, dtypes.int64)])
Exemple #3
0
def collapse_repeated(labels, seq_length, name=None):
  """Merge repeated labels into single labels.

  Args:
    labels: Tensor of shape [batch, max value in seq_length]
    seq_length: Tensor of shape [batch], sequence length of each batch element.
    name: A name for this `Op`. Defaults to "collapse_repeated_labels".

  Returns:
    A tuple `(collapsed_labels, new_seq_length)` where

    collapsed_labels: Tensor of shape [batch, max_seq_length] with repeated
    labels collapsed and padded to max_seq_length, eg:
    `[[A, A, B, B, A], [A, B, C, D, E]] => [[A, B, A, 0, 0], [A, B, C, D, E]]`

    new_seq_length: int tensor of shape [batch] with new sequence lengths.
  """

  with ops.name_scope(name, "collapse_repeated_labels", [labels, seq_length]):
    labels = ops.convert_to_tensor(labels, name="labels")
    seq_length = ops.convert_to_tensor(seq_length, name="seq_length")

    # Mask labels that don't equal previous label.
    label_mask = array_ops.concat([
        array_ops.ones_like(labels[:, :1], dtypes.bool),
        math_ops.not_equal(labels[:, 1:], labels[:, :-1])
    ],
                                  axis=1)

    # Filter labels that aren't in the original sequence.
    maxlen = _get_dim(labels, 1)
    seq_mask = array_ops.sequence_mask(seq_length, maxlen=maxlen)
    label_mask = math_ops.logical_and(label_mask, seq_mask)

    # Count masks for new sequence lengths.
    new_seq_len = math_ops.reduce_sum(
        math_ops.cast(label_mask, dtypes.int32), axis=1)

    # Mask indexes based on sequence length mask.
    new_maxlen = math_ops.reduce_max(new_seq_len)
    idx_mask = array_ops.sequence_mask(new_seq_len, maxlen=new_maxlen)

    # Flatten everything and mask out labels to keep and sparse indices.
    flat_labels = array_ops.reshape(labels, [-1])
    flat_label_mask = array_ops.reshape(label_mask, [-1])
    flat_idx_mask = array_ops.reshape(idx_mask, [-1])
    idx = math_ops.range(_get_dim(flat_idx_mask, 0))

    # Scatter to flat shape.
    flat = array_ops.scatter_nd(
        indices=array_ops.expand_dims(
            array_ops.boolean_mask(idx, flat_idx_mask), axis=1),
        updates=array_ops.boolean_mask(flat_labels, flat_label_mask),
        shape=array_ops.shape(flat_idx_mask))

    # Reshape back to square batch.
    batch_size = _get_dim(labels, 0)
    new_shape = [batch_size, new_maxlen]
    return (array_ops.reshape(flat, new_shape),
            math_ops.cast(new_seq_len, seq_length.dtype))
def _filter_input(input_tensor, vocab_freq_table, vocab_min_count,
                  vocab_subsampling, corpus_size, seed):
  """Filters input tensor based on vocab freq, threshold, and subsampling."""
  if vocab_freq_table is None:
    return input_tensor

  if not isinstance(vocab_freq_table, lookup.InitializableLookupTableBase):
    raise ValueError(
        "vocab_freq_table must be a subclass of "
        "InitializableLookupTableBase (such as HashTable) instead of type "
        "{}.".format(type(vocab_freq_table)))

  with ops.name_scope(
      "filter_vocab", values=[vocab_freq_table, input_tensor, vocab_min_count]):
    freq = vocab_freq_table.lookup(input_tensor)
    # Filters out elements in input_tensor that are not found in
    # vocab_freq_table (table returns a default value of -1 specified above when
    # an element is not found).
    mask = math_ops.not_equal(freq, vocab_freq_table.default_value)

    # Filters out elements whose vocab frequencies are less than the threshold.
    if vocab_min_count is not None:
      cast_threshold = math_ops.cast(vocab_min_count, freq.dtype)
      mask = math_ops.logical_and(mask,
                                  math_ops.greater_equal(freq, cast_threshold))

    input_tensor = array_ops.boolean_mask(input_tensor, mask)
    freq = array_ops.boolean_mask(freq, mask)

  if not vocab_subsampling:
    return input_tensor

  if vocab_subsampling < 0 or vocab_subsampling > 1:
    raise ValueError(
        "Invalid vocab_subsampling={} - it should be within range [0, 1].".
        format(vocab_subsampling))

  # Subsamples the input tokens based on vocabulary frequency and
  # vocab_subsampling threshold (ie randomly discard commonly appearing
  # tokens).
  with ops.name_scope(
      "subsample_vocab", values=[input_tensor, freq, vocab_subsampling]):
    corpus_size = math_ops.cast(corpus_size, dtypes.float64)
    freq = math_ops.cast(freq, dtypes.float64)
    vocab_subsampling = math_ops.cast(vocab_subsampling, dtypes.float64)

    # From tensorflow_models/tutorials/embedding/word2vec_kernels.cc, which is
    # suppose to correlate with Eq. 5 in http://arxiv.org/abs/1310.4546.
    keep_prob = ((math_ops.sqrt(freq /
                                (vocab_subsampling * corpus_size)) + 1.0) *
                 (vocab_subsampling * corpus_size / freq))
    random_prob = random_ops.random_uniform(
        array_ops.shape(freq),
        minval=0,
        maxval=1,
        dtype=dtypes.float64,
        seed=seed)

    mask = math_ops.less_equal(random_prob, keep_prob)
    return array_ops.boolean_mask(input_tensor, mask)
Exemple #5
0
  def _apply_transform(self, input_tensors, **kwargs):
    """Applies the transformation to the `transform_input`.

    Args:
      input_tensors: a list of Tensors representing the input to
        the Transform.
      **kwargs: Additional keyword arguments, unused here.

    Returns:
        A namedtuple of Tensors representing the transformed output.
    """
    d = input_tensors[0]

    if self.strip_value is np.nan:
      strip_hot = math_ops.is_nan(d)
    else:
      strip_hot = math_ops.equal(d,
                                 array_ops.constant([self.strip_value],
                                                    dtype=d.dtype))
    keep_hot = math_ops.logical_not(strip_hot)

    length = array_ops.reshape(array_ops.shape(d), [])
    indices = array_ops.boolean_mask(math_ops.range(length), keep_hot)
    values = array_ops.boolean_mask(d, keep_hot)

    sparse_indices = array_ops.reshape(
        math_ops.cast(indices, dtypes.int64), [-1, 1])
    shape = math_ops.cast(array_ops.shape(d), dtypes.int64)

    # pylint: disable=not-callable
    return self.return_type(ops.SparseTensor(sparse_indices, values, shape))
def mask_activations_and_labels(activations, labels, sequence_lengths):
  """Remove entries outside `sequence_lengths` and returned flattened results.

  Args:
    activations: Output of the RNN, shape `[batch_size, padded_length, k]`.
    labels: Label values, shape `[batch_size, padded_length]`.
    sequence_lengths: A `Tensor` of shape `[batch_size]` with the unpadded
      length of each sequence. If `None`, then each sequence is unpadded.

  Returns:
    activations_masked: `logit` values with those beyond `sequence_lengths`
      removed for each batch. Batches are then concatenated. Shape
      `[tf.sum(sequence_lengths), k]` if `sequence_lengths` is not `None` and
      shape `[batch_size * padded_length, k]` otherwise.
    labels_masked: Label values after removing unneeded entries. Shape
      `[tf.sum(sequence_lengths)]` if `sequence_lengths` is not `None` and shape
      `[batch_size * padded_length]` otherwise.
  """
  with ops.name_scope('mask_activations_and_labels',
                      values=[activations, labels, sequence_lengths]):
    labels_shape = array_ops.shape(labels)
    batch_size = labels_shape[0]
    padded_length = labels_shape[1]
    if sequence_lengths is None:
      flattened_dimension = padded_length * batch_size
      activations_masked = array_ops.reshape(activations,
                                             [flattened_dimension, -1])
      labels_masked = array_ops.reshape(labels, [flattened_dimension])
    else:
      mask = array_ops.sequence_mask(sequence_lengths, padded_length)
      activations_masked = array_ops.boolean_mask(activations, mask)
      labels_masked = array_ops.boolean_mask(labels, mask)
    return activations_masked, labels_masked
Exemple #7
0
 def testMaskDimensionsSetToNoneRaises(self):
   # The leading dimensions of tensor can be None, allowing for minibatch size
   # None.  This is explained in the docstring as well.
   with self.test_session():
     tensor = array_ops.placeholder(dtypes.int32, shape=[None, 2])
     mask = array_ops.placeholder(dtypes.bool, shape=None)
     with self.assertRaisesRegexp(ValueError, "dimensions must be specified"):
       array_ops.boolean_mask(tensor, mask)
 def testMaskDimensionsSetToNoneRaises(self):
   # The rank of the mask tensor must be specified. This is explained
   # in the docstring as well.
   with self.test_session():
     tensor = array_ops.placeholder(dtypes.int32, shape=[None, 2])
     mask = array_ops.placeholder(dtypes.bool, shape=None)
     with self.assertRaisesRegexp(ValueError, "dimensions must be specified"):
       array_ops.boolean_mask(tensor, mask)
Exemple #9
0
 def shortlist_insert():
   larger_ids = array_ops.boolean_mask(
       math_ops.to_int64(ids), larger_scores)
   larger_score_values = array_ops.boolean_mask(scores, larger_scores)
   shortlist_ids, new_ids, new_scores = tensor_forest_ops.top_n_insert(
       self.sl_ids, self.sl_scores, larger_ids, larger_score_values)
   u1 = state_ops.scatter_update(self.sl_ids, shortlist_ids, new_ids)
   u2 = state_ops.scatter_update(self.sl_scores, shortlist_ids, new_scores)
   return control_flow_ops.group(u1, u2)
Exemple #10
0
def report_uninitialized_resources(resource_list=None,
                                   name="report_uninitialized_resources"):
  """Returns the names of all uninitialized resources in resource_list.

  If the returned tensor is empty then all resources have been initialized.

  Args:
   resource_list: resources to check. If None, will use shared_resources() +
    local_resources().
   name: name for the resource-checking op.

  Returns:
   Tensor containing names of the handles of all resources which have not
   yet been initialized.

  """
  if resource_list is None:
    resource_list = shared_resources() + local_resources()
  with ops.name_scope(name):
    # Run all operations on CPU
    with ops.device("/cpu:0"):
      if not resource_list:
        # Return an empty tensor so we only need to check for returned tensor
        # size being 0 as an indication of model ready.
        return array_ops.constant([], dtype=dtypes.string)
      # Get a 1-D boolean tensor listing whether each resource is initialized.
      variables_mask = math_ops.logical_not(
          array_ops.stack([r.is_initialized for r in resource_list]))
      # Get a 1-D string tensor containing all the resource names.
      variable_names_tensor = array_ops.constant(
          [s.handle.name for s in resource_list])
      # Return a 1-D tensor containing all the names of uninitialized resources.
      return array_ops.boolean_mask(variable_names_tensor, variables_mask)
Exemple #11
0
 def test(self):
   mask = core.LabeledTensor(math_ops.range(7) > 3, [self.a0])
   masked_lt = ops.boolean_mask(self.original_lt, mask)
   golden_lt = core.LabeledTensor(
       array_ops.boolean_mask(self.original_lt.tensor, mask.tensor),
       ['x', self.a1, self.a2, self.a3])
   self.assertLabeledTensorsEqual(masked_lt, golden_lt)
Exemple #12
0
def boolean_mask(labeled_tensor, mask, name=None):
  """Apply a boolean mask to a labeled tensor.

  Unlike `tf.boolean_mask`, this currently only works on 1-dimensional masks.
  The mask is applied to the first axis of `labeled_tensor`. Labels on the first
  axis are removed, because True indices in `mask` may not be known dynamically.

  Args:
    labeled_tensor: The input tensor.
    mask: The type of the returned tensor.
    name: Optional op name.

  Returns:
    The masked labeled tensor.

  Raises:
    ValueError: if the first axis of the mask
  """
  with ops.name_scope(name, 'lt_boolean_mask', [labeled_tensor, mask]) as scope:
    labeled_tensor = core.convert_to_labeled_tensor(labeled_tensor)
    mask = core.convert_to_labeled_tensor(mask)

    if len(mask.axes) > 1:
      raise NotImplementedError(
          "LabeledTensor's boolean_mask currently only supports 1D masks")
    mask_axis = list(mask.axes.values())[0]
    lt_axis = list(labeled_tensor.axes.values())[0]
    if mask_axis != lt_axis:
      raise ValueError('the first axis of the labeled tensor and the mask '
                       'are not equal:\n%r\n%r' % (lt_axis, mask_axis))
    op = array_ops.boolean_mask(labeled_tensor.tensor, mask.tensor, name=scope)
    # TODO(shoyer): attempt to infer labels for the masked values, by calling
    # tf.contrib.util.constant_value on the mask?
    axes = [lt_axis.name] + list(labeled_tensor.axes.values())[1:]
    return core.LabeledTensor(op, axes)
  def pack_uint8_r2_to_uint32(self, test_input):
    num_rows, num_columns = test_input.get_shape().as_list()
    num_output_columns = int(math.ceil(num_columns / 4.0))
    padding_input = array_ops.pad(
        math_ops.cast(test_input, dtype=dtypes.uint8),
        constant_op.constant([[
            0,
            0,
        ], [0, num_output_columns * 4 - num_columns]]))
    output = array_ops.zeros([num_rows, num_output_columns],
                             dtype=dtypes.uint32)
    num_elements_per_pack = 4
    shift_bits = 8

    iota_r1 = math_ops.range(num_output_columns * num_elements_per_pack)

    for p in range(num_elements_per_pack):
      selected_index = math_ops.equal(
          math_ops.mod(iota_r1, num_elements_per_pack), p)
      gather_index = array_ops.boolean_mask(iota_r1, selected_index)
      gathered_input = array_ops.gather(padding_input, gather_index, axis=1)
      total_shift_bits = shift_bits * (num_elements_per_pack - p - 1)
      left_shift_input = bitwise_ops.left_shift(
          math_ops.cast(gathered_input, dtype=dtypes.uint32), total_shift_bits)
      output = bitwise_ops.bitwise_or(output, left_shift_input)
    return output
Exemple #14
0
def report_uninitialized_variables(var_list=None, name="report_uninitialized_variables"):
    """Adds ops to list the names of uninitialized variables.

  When run, it returns a 1-D tensor containing the names of uninitialized
  variables if there are any, or an empty array if there are none.

  Args:
    var_list: List of `Variable` objects to check. Defaults to the
      value of `all_variables() + local_variables()`
    name: Optional name of the `Operation`.

  Returns:
    A 1-D tensor containing names of the unintialized variables, or an empty 1-D
    tensor if there are no variables or no uninitialized variables.
  """
    if var_list is None:
        var_list = all_variables() + local_variables()
    # Backwards compatibility for old-style variables. TODO(touts): remove.
    if not var_list:
        var_list = []
        for op in ops.get_default_graph().get_operations():
            if op.type in ["Variable", "AutoReloadVariable"]:
                var_list.append(op.outputs[0])
    if not var_list:
        # Return an empty tensor so we only need to check for returned tensor
        # size being 0 as an indication of model ready.
        return array_ops.constant([], dtype=dtypes.string, name=name)
    else:
        # Get a 1-D boolean tensor listing whether each variable is initialized.
        variables_mask = math_ops.logical_not(array_ops.pack([state_ops.is_variable_initialized(v) for v in var_list]))
        # Get a 1-D string tensor containing all the variable names.
        variable_names_tensor = array_ops.constant([s.op.name for s in var_list])
        # Return a 1-D tensor containing all the names of uninitialized variables.
        return array_ops.boolean_mask(variable_names_tensor, variables_mask, name=name)
 def testEmptyInput1D(self):
   mask = np.array([]).astype(bool)
   arr = np.array([]).astype(np.float32)
   numpy_result = arr[mask]
   tf_result = array_ops.boolean_mask(arr, mask)
   self.assertAllEqual(numpy_result.shape[1:], tf_result.get_shape()[1:])
   with self.test_session():
     self.assertAllClose(numpy_result, tf_result.eval())
Exemple #16
0
def _get_examples(file_name_queue, reader, num_threads, read_batch_size,
                  filter_fn, parse_fn):
  """Get example filenames matching.

  Args:
    file_name_queue: A queue implementation that dequeues elements in
      first-in first-out order.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    num_threads: The number of threads enqueuing examples.
    read_batch_size: An int or scalar `Tensor` specifying the number of
      records to read at once.
    filter_fn: Filtering function, takes both keys as well as an `Example`
      Tensors and returns a boolean mask of the same shape as the input Tensors
      to be applied for filtering. If `None`, no filtering is done.
    parse_fn: Parsing function, takes `Example` Tensor returns parsed
      representation. If `None`, no parsing is done.

  Returns:
    List of example file names matching `file_name_queue`.
  """
  with ops.name_scope('read'):
    example_list = []
    for _ in range(num_threads):
      keys, examples_proto = utils.smart_cond(
          read_batch_size > 1,
          lambda: reader().read_up_to(file_name_queue, read_batch_size),
          lambda: reader().read(file_name_queue))

      if filter_fn:
        mask = filter_fn(keys, examples_proto)
        keys = array_ops.boolean_mask(keys, mask)
        examples_proto = array_ops.boolean_mask(examples_proto, mask)
      if parse_fn:
        parsed_examples = parse_fn(examples_proto)
        # Map keys into example map because batch_join doesn't support
        # tuple of Tensor + dict.
        if isinstance(parsed_examples, dict):
          parsed_examples[KEY_FEATURE_NAME] = keys
          example_list.append(parsed_examples)
        else:
          example_list.append((keys, parsed_examples))
      else:
        example_list.append((keys, examples_proto))
    return example_list
def _make_auc_histograms(boolean_labels, scores, score_range, nbins):
  """Create histogram tensors from one batch of labels/scores."""

  with variable_scope.variable_op_scope(
      [boolean_labels, scores, nbins], None, 'make_auc_histograms'):
    # Histogram of scores for records in this batch with True label.
    hist_true = histogram_ops.histogram_fixed_width(
        array_ops.boolean_mask(scores, boolean_labels),
        score_range,
        nbins=nbins,
        dtype=dtypes.int64,
        name='hist_true')
    # Histogram of scores for records in this batch with False label.
    hist_false = histogram_ops.histogram_fixed_width(
        array_ops.boolean_mask(scores, math_ops.logical_not(boolean_labels)),
        score_range,
        nbins=nbins,
        dtype=dtypes.int64,
        name='hist_false')
    return hist_true, hist_false
  def testWorksWithDimensionsEqualToNoneDuringGraphBuild(self):
    # The rank of the mask tensor must be specified. This is explained
    # in the docstring as well.
    with self.test_session() as sess:
      ph_tensor = array_ops.placeholder(dtypes.int32, shape=None)
      ph_mask = array_ops.placeholder(dtypes.bool, shape=[None])

      arr = np.array([[1, 2], [3, 4]])
      mask = np.array([False, True])

      masked_tensor = sess.run(array_ops.boolean_mask(ph_tensor, ph_mask),
                               feed_dict={ph_tensor: arr,
                                          ph_mask: mask})
      np.testing.assert_allclose(masked_tensor, arr[mask])
Exemple #19
0
  def testWorksWithDimensionsEqualToNoneDuringGraphBuild(self):
    # The leading dimensions of tensor can be None, allowing for minibatch size
    # None.  This is explained in the docstring as well.
    with self.test_session() as sess:
      ph_tensor = array_ops.placeholder(dtypes.int32, shape=[None, 2])
      ph_mask = array_ops.placeholder(dtypes.bool, shape=[None])

      arr = np.array([[1, 2], [3, 4]])
      mask = np.array([False, True])

      masked_tensor = sess.run(
          array_ops.boolean_mask(ph_tensor, ph_mask),
          feed_dict={ph_tensor: arr, ph_mask: mask})
      np.testing.assert_allclose(masked_tensor, arr[mask])
  def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None):
    """Check equivalence between boolean_mask and numpy masking."""
    if make_mask is None:
      make_mask = lambda shape: self.rng.randint(0, 2, size=shape).astype(bool)
    arr = np.random.rand(*arr_shape)
    mask = make_mask(arr_shape[:ndims_mask])
    masked_arr = arr[mask]
    with self.test_session():
      masked_tensor = array_ops.boolean_mask(arr, mask)

      # Leading dimension size of masked_tensor is always unknown until runtime
      # since we don't how many elements will be kept.
      self.assertAllEqual(masked_tensor.get_shape()[1:], masked_arr.shape[1:])

      self.assertAllClose(masked_arr, masked_tensor.eval())
Exemple #21
0
 def CheckVersusNumpy(self, ndims_mask, arr_shape):
   """Check equivalence between boolean_mask and numpy masking."""
   arr_size = arr_shape.prod()
   arr = np.random.rand(arr_size).reshape(arr_shape)
   mask_shape = arr_shape[: ndims_mask]
   mask_size = mask_shape.prod()
   mask = np.random.randint(
       0, 2, size=mask_size).reshape(mask_shape).astype(bool)
   masked_arr = arr[mask]
   with self.test_session():
     masked_tensor = array_ops.boolean_mask(arr, mask)
     np.testing.assert_allclose(
         masked_arr,
         masked_tensor.eval(),
         err_msg="masked_arr:\n%s\n\nmasked_tensor:\n%s" % (
             masked_arr, masked_tensor.eval()))
Exemple #22
0
 def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None):
   """Check equivalence between boolean_mask and numpy masking."""
   if make_mask is None:
     make_mask = lambda shape: np.random.randint(0, 2, size=shape).astype(bool)
   arr = np.random.rand(*arr_shape)
   mask = make_mask(arr_shape[: ndims_mask])
   masked_arr = arr[mask]
   with self.test_session():
     masked_tensor = array_ops.boolean_mask(arr, mask)
     np.testing.assert_allclose(
         masked_arr,
         masked_tensor.eval(),
         err_msg="masked_arr:\n%s\n\nmasked_tensor:\n%s" % (
             masked_arr, masked_tensor.eval()))
     masked_tensor.get_shape().assert_is_compatible_with(masked_arr.shape)
     self.assertSequenceEqual(
         masked_tensor.get_shape()[1:].as_list(),
         masked_arr.shape[1:],
         msg="shape information lost %s -> %s" % (
             masked_arr.shape, masked_tensor.get_shape()))
Exemple #23
0
def report_uninitialized_variables(var_list=None,
                                   name="report_uninitialized_variables"):
  """Adds ops to list the names of uninitialized variables.

  When run, it returns a 1-D tensor containing the names of uninitialized
  variables if there are any, or an empty array if there are none.

  Args:
    var_list: List of `Variable` objects to check. Defaults to the
      value of `global_variables() + local_variables()`
    name: Optional name of the `Operation`.

  Returns:
    A 1-D tensor containing names of the uninitialized variables, or an empty
    1-D tensor if there are no variables or no uninitialized variables.
  """
  if var_list is None:
    var_list = global_variables() + local_variables()
    # Backwards compatibility for old-style variables. TODO(touts): remove.
    if not var_list:
      var_list = []
      for op in ops.get_default_graph().get_operations():
        if op.type in ["Variable", "VariableV2", "AutoReloadVariable"]:
          var_list.append(op.outputs[0])
  with ops.name_scope(name):
    if not var_list:
      # Return an empty tensor so we only need to check for returned tensor
      # size being 0 as an indication of model ready.
      return array_ops.constant([], dtype=dtypes.string)
    else:
      # Get a 1-D boolean tensor listing whether each variable is initialized.
      variables_mask = math_ops.logical_not(
          array_ops.stack(
              [state_ops.is_variable_initialized(v) for v in var_list]))
      # Get a 1-D string tensor containing all the variable names.
      variable_names_tensor = array_ops.constant([s.op.name for s in var_list])
      # Return a 1-D tensor containing all the names of uninitialized variables.
      return array_ops.boolean_mask(variable_names_tensor, variables_mask)
Exemple #24
0
def _mean_squared_loss(labels,
                       logits,
                       weights=None,
                       reduction=core_losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
                       name=None):
    """Computes the mean squared loss for a list.

  Given the labels of graded relevance l_i and the logits s_i, we calculate
  the squared error for each ith position and aggregate the per position
  losses.

  Args:
    labels: A `Tensor` of the same shape as `logits` representing graded
      relevance.
    logits: A `Tensor` with shape [batch_size, list_size]. Each value is the
      ranking score of the corresponding item.
    weights: A scalar, a `Tensor` with shape [batch_size, 1] for list-wise
      weights, or a `Tensor` with shape [batch_size, list_size] for item-wise
      weights.
    reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch.
    name: A string used as the name for this loss.

  Returns:
    An op for the mean squared error as a loss.
  """
    with ops.name_scope(name, 'mean_squared_loss', (labels, logits, weights)):
        is_label_valid = array_ops.reshape(utils.is_label_valid(labels), [-1])
        weights = 1.0 if weights is None else ops.convert_to_tensor(weights)
        weights = array_ops.ones_like(labels) * weights
        label_vector, logit_vector, weight_vector = [
            array_ops.boolean_mask(array_ops.reshape(x, [-1]), is_label_valid)
            for x in [labels, logits, weights]
        ]
        return core_losses.mean_squared_error(label_vector,
                                              logit_vector,
                                              weights=weight_vector,
                                              reduction=reduction)
Exemple #25
0
def copy_lc_weights_2_to_1(lc_layer_2_from, lc_layer_1_to):
    lc_2_kernel, lc_2_bias = lc_layer_2_from.weights
    lc_2_kernel_masked = lc_2_kernel * lc_layer_2_from.kernel_mask

    data_format = lc_layer_2_from.data_format

    if data_format == 'channels_first':
        if isinstance(lc_layer_2_from, keras.layers.LocallyConnected1D):
            permutation = (3, 0, 1, 2)
        elif isinstance(lc_layer_2_from, keras.layers.LocallyConnected2D):
            permutation = (4, 5, 0, 1, 2, 3)
        else:
            raise NotImplementedError(lc_layer_2_from)

    elif data_format == 'channels_last':
        if isinstance(lc_layer_2_from, keras.layers.LocallyConnected1D):
            permutation = (2, 0, 1, 3)
        elif isinstance(lc_layer_2_from, keras.layers.LocallyConnected2D):
            permutation = (3, 4, 0, 1, 2, 5)
        else:
            raise NotImplementedError(lc_layer_2_from)

    else:
        raise NotImplementedError(data_format)

    lc_2_kernel_masked = keras.backend.permute_dimensions(
        lc_2_kernel_masked, permutation)

    lc_2_kernel_mask = math_ops.not_equal(lc_2_kernel_masked, 0)
    lc_2_kernel_flat = array_ops.boolean_mask(lc_2_kernel_masked,
                                              lc_2_kernel_mask)
    lc_2_kernel_reshaped = keras.backend.reshape(lc_2_kernel_flat,
                                                 lc_layer_1_to.kernel.shape)

    lc_2_kernel_reshaped = keras.backend.get_value(lc_2_kernel_reshaped)
    lc_2_bias = keras.backend.get_value(lc_2_bias)

    lc_layer_1_to.set_weights([lc_2_kernel_reshaped, lc_2_bias])
Exemple #26
0
def gen_crossentropy(y_true, y_pred, q=0.7, k=-1.0):
    # Filter true values ("y_true") in "y_pred"
    y_ok = array_ops.boolean_mask(y_pred, gen_math_ops.equal(y_true, 1))
    # Conversion for Float64 for valid operations in TensorFlow
    um = np.float64(1.)
    q = np.float64(q)

    if k == -1:  # cross entropy loss
        # mean[ (1-y_ok^q)/q ]
        return K.mean(math_ops.divide(
            math_ops.subtract(um, math_ops.pow(y_ok, q)), q),
                      axis=-1)
    else:  # truncated cross entropy loss

        k = np.float64(k)
        # if y_ok < k
        #     [ (1-k^q)/q    ]  (no broadcasting in Where())
        #     [ (1-y_ok^q)/q ]
        vfunct = array_ops.where(
            gen_math_ops.less_equal(y_ok, k),
            gen_array_ops.fill(array_ops.shape(y_ok), (um - k**q) / q),
            math_ops.divide(math_ops.subtract(um, math_ops.pow(y_ok, q)), q))
        return K.mean(vfunct, axis=-1)  # mean [ above values ]
def report_uninitialized_resources(resource_list=None,
                                   name="report_uninitialized_resources"):
    """Returns the names of all uninitialized resources in resource_list.

  If the returned tensor is empty then all resources have been initialized.

  Args:
   resource_list: resources to check. If None, will use shared_resources() +
    local_resources().
   name: name for the resource-checking op.

  Returns:
   Tensor containing names of the handles of all resources which have not
   yet been initialized.

  """
    if resource_list is None:
        resource_list = shared_resources() + local_resources()
    with ops.name_scope(name):
        # Run all operations on CPU
        local_device = os.environ.get(
            "TF_DEVICE_FOR_UNINITIALIZED_VARIABLE_REPORTING", "/cpu:0")
        with ops.device(local_device):
            if not resource_list:
                # Return an empty tensor so we only need to check for returned tensor
                # size being 0 as an indication of model ready.
                return array_ops.constant([], dtype=dtypes.string)
            # Get a 1-D boolean tensor listing whether each resource is initialized.
            variables_mask = math_ops.logical_not(
                array_ops.stack([r.is_initialized for r in resource_list]))
            # Get a 1-D string tensor containing all the resource names.
            variable_names_tensor = array_ops.constant(
                [s.handle.name for s in resource_list])
            # Return a 1-D tensor containing all the names of uninitialized resources.
            return array_ops.boolean_mask(variable_names_tensor,
                                          variables_mask)
Exemple #28
0
  def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None, axis=None):
    """Check equivalence between boolean_mask and numpy masking."""
    if make_mask is None:
      make_mask = lambda shape: self.rng.randint(0, 2, size=shape).astype(bool)
    arr = np.random.rand(*arr_shape)
    mask = make_mask(arr_shape[:ndims_mask])
    if axis is not None:
      mask = make_mask(arr_shape[axis:ndims_mask+axis])
    if axis is None or axis == 0:
      masked_arr = arr[mask]
    elif axis == 1:
      masked_arr = arr[:,mask]
    elif axis == 2:
      masked_arr = arr[:,:,mask]
    with self.test_session() as sess:
      masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)

      # Leading dimension size of masked_tensor is always unknown until runtime
      # since we don't how many elements will be kept.
      leading = 1 if axis is None else axis + 1
      self.assertAllEqual(masked_tensor.get_shape()[leading:],
          masked_arr.shape[leading:])

      self.assertAllClose(masked_arr, masked_tensor.eval())
  def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None, axis=None):
    """Check equivalence between boolean_mask and numpy masking."""
    if make_mask is None:
      make_mask = lambda shape: self.rng.randint(0, 2, size=shape).astype(bool)
    arr = np.random.rand(*arr_shape)
    mask = make_mask(arr_shape[:ndims_mask])
    if axis is not None:
      mask = make_mask(arr_shape[axis:ndims_mask + axis])
    if axis is None or axis == 0:
      masked_arr = arr[mask]
    elif axis == 1:
      masked_arr = arr[:, mask]
    elif axis == 2:
      masked_arr = arr[:, :, mask]
    with self.test_session():
      masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)

      # Leading dimension size of masked_tensor is always unknown until runtime
      # since we don't how many elements will be kept.
      leading = 1 if axis is None else axis + 1
      self.assertAllEqual(masked_tensor.get_shape()[leading:],
                          masked_arr.shape[leading:])

      self.assertAllClose(masked_arr, masked_tensor.eval())
 def testMaskIsScalarRaises(self):
   mask = True
   tensor = 1
   with self.test_session():
     with self.assertRaisesRegexp(ValueError, "mask.*scalar"):
       array_ops.boolean_mask(tensor, mask).eval()
 def testMaskHasMoreDimsThanTensorRaises(self):
   mask = [[True, True], [False, False]]
   tensor = [1, 2, 3, 4]
   with self.test_session():
     with self.assertRaisesRegexp(ValueError, "incompatible"):
       array_ops.boolean_mask(tensor, mask).eval()
Exemple #32
0
def collapse_repeated(labels, seq_length, name=None):
    """Merge repeated labels into single labels.

  Args:
    labels: Tensor of shape [batch, max value in seq_length]
    seq_length: Tensor of shape [batch], sequence length of each batch element.
    name: A name for this `Op`. Defaults to "collapse_repeated_labels".

  Returns:
    A tuple `(collapsed_labels, new_seq_length)` where

    collapsed_labels: Tensor of shape [batch, max_seq_length] with repeated
    labels collapsed and padded to max_seq_length, eg:
    `[[A, A, B, B, A], [A, B, C, D, E]] => [[A, B, A, 0, 0], [A, B, C, D, E]]`

    new_seq_length: int tensor of shape [batch] with new sequence lengths.
  """

    with ops.name_scope(name, "collapse_repeated_labels",
                        [labels, seq_length]):
        labels = ops.convert_to_tensor(labels, name="labels")
        seq_length = ops.convert_to_tensor(seq_length, name="seq_length")

        # Mask labels that don't equal previous label.
        label_mask = array_ops.concat([
            array_ops.ones_like(labels[:, :1], dtypes.bool),
            math_ops.not_equal(labels[:, 1:], labels[:, :-1])
        ],
                                      axis=1)

        # Filter labels that aren't in the original sequence.
        maxlen = _get_dim(labels, 1)
        seq_mask = array_ops.sequence_mask(seq_length, maxlen=maxlen)
        label_mask = math_ops.logical_and(label_mask, seq_mask)

        # Count masks for new sequence lengths.
        new_seq_len = math_ops.reduce_sum(math_ops.cast(
            label_mask, dtypes.int32),
                                          axis=1)

        # Mask indexes based on sequence length mask.
        new_maxlen = math_ops.reduce_max(new_seq_len)
        idx_mask = array_ops.sequence_mask(new_seq_len, maxlen=new_maxlen)

        # Flatten everything and mask out labels to keep and sparse indices.
        flat_labels = array_ops.reshape(labels, [-1])
        flat_label_mask = array_ops.reshape(label_mask, [-1])
        flat_idx_mask = array_ops.reshape(idx_mask, [-1])
        idx = math_ops.range(_get_dim(flat_idx_mask, 0))

        # Scatter to flat shape.
        flat = array_ops.scatter_nd(indices=array_ops.expand_dims(
            array_ops.boolean_mask(idx, flat_idx_mask), axis=1),
                                    updates=array_ops.boolean_mask(
                                        flat_labels, flat_label_mask),
                                    shape=array_ops.shape(flat_idx_mask))

        # Reshape back to square batch.
        batch_size = _get_dim(labels, 0)
        new_shape = [batch_size, new_maxlen]
        return (array_ops.reshape(flat, new_shape),
                math_ops.cast(new_seq_len, seq_length.dtype))
def boolean_mask(data, mask, name=None):
  """Applies a boolean mask to `data` without flattening the mask dimensions.

  Returns a potentially ragged tensor that is formed by retaining the elements
  in `data` where the corresponding value in `mask` is `True`.

  * `output[a1...aA, i, b1...bB] = data[a1...aA, j, b1...bB]`

     Where `j` is the `i`th `True` entry of `mask[a1...aA]`.

  Note that `output` preserves the mask dimensions `a1...aA`; this differs
  from `tf.boolean_mask`, which flattens those dimensions.

  Args:
    data: A potentially ragged tensor.
    mask: A potentially ragged boolean tensor.  `mask`'s shape must be a prefix
      of `data`'s shape.  `rank(mask)` must be known statically.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A potentially ragged tensor that is formed by retaining the elements in
    `data` where the corresponding value in `mask` is `True`.

    * `rank(output) = rank(data)`.
    * `output.ragged_rank = max(data.ragged_rank, rank(mask) - 1)`.

  Raises:
    ValueError: if `rank(mask)` is not known statically; or if `mask.shape` is
      not a prefix of `data.shape`.

  #### Examples:

  >>> # Aliases for True & False so data and mask line up.
  >>> T, F = (True, False)

  >>> tf.ragged.boolean_mask(  # Mask a 2D Tensor.
  ...     data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  ...     mask=[[T, F, T], [F, F, F], [T, F, F]]).to_list()
  [[1, 3], [], [7]]

  >>> tf.ragged.boolean_mask(  # Mask a 2D RaggedTensor.
  ...     tf.ragged.constant([[1, 2, 3], [4], [5, 6]]),
  ...     tf.ragged.constant([[F, F, T], [F], [T, T]])).to_list()
  [[3], [], [5, 6]]

  >>> tf.ragged.boolean_mask(  # Mask rows of a 2D RaggedTensor.
  ...     tf.ragged.constant([[1, 2, 3], [4], [5, 6]]),
  ...     tf.ragged.constant([True, False, True])).to_list()
  [[1, 2, 3], [5, 6]]
  """
  with ops.name_scope(name, 'RaggedMask', [data, mask]):
    # Convert inputs to tensors.
    data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data')
    mask = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        mask, dtypes.bool, name='mask')
    row_splits_dtype, (data, mask) = ragged_tensor.match_row_splits_dtypes(
        data, mask, return_dtype=True)

    # Get static rank of mask.
    if mask.shape.ndims is None:
      raise ValueError('mask.shape.ndims must be known statically.')
    elif mask.shape.ndims == 0:
      raise ValueError('mask cannot be scalar.')

    # If mask is ragged, then recurse with a non-ragged mask.
    if ragged_tensor.is_ragged(mask):
      if not ragged_tensor.is_ragged(data):
        data = ragged_tensor.RaggedTensor.from_tensor(
            data,
            ragged_rank=mask.ragged_rank,
            row_splits_dtype=mask.row_splits.dtype)
      # Check that mask.nested_row_splits is a prefix of
      # data.nested_row_splits.
      splits_list = [
          mask.nested_row_splits, data.nested_row_splits[:mask.ragged_rank]
      ]
      with ops.control_dependencies(
          ragged_util.assert_splits_match(splits_list)):
        # Strip off ragged `splits` until `mask` is non-ragged.  Keep the splits
        # that we strip off in `splits`, so we can add them back on after
        # we recursively mask the non-ragged data.
        splits = []
        while ragged_tensor.is_ragged(mask):
          if mask.shape.ndims > 2:
            splits.append(mask.row_splits)
          else:
            # Count the number of True mask values in each row to find the
            # lengths of the filtered rows; then convert to splits.
            int_mask = ragged_functional_ops.map_flat_values(
                math_ops.cast, mask, dtype=row_splits_dtype)
            masked_row_lengths = ragged_math_ops.reduce_sum(int_mask, axis=1)
            splits.append(ragged_util.lengths_to_splits(masked_row_lengths))
          mask = mask.values
          data = data.values

        # Recursively apply the nested non-ragged mask to the nested data.
        masked_values = boolean_mask(data, mask)

        # Add the ragged `splits` back to the result.
        masked_values = ragged_tensor.RaggedTensor.from_nested_row_splits(
            masked_values, splits, validate=False)

        return masked_values

    # If mask is non-ragged and has rank 1, and data is ragged, then build a
    # ragged tensor with the indicated rows.
    elif ragged_tensor.is_ragged(data) and mask.shape.ndims == 1:
      # Get the masked splits: first get the length of each row, then filter
      # out the rows that we are deleting, and convert that filtered set of
      # masks back to a splits tensor.
      lengths = data.row_lengths()
      masked_lengths = array_ops.boolean_mask(lengths, mask)
      masked_splits = ragged_util.lengths_to_splits(masked_lengths)

      # Get the masked values: first get row ids corresponding to each
      # value, then use tf.gather to build a boolean mask that's false for
      # values that come from rows that we are deleting, and use that mask to
      # construct the masked values tensor.
      segment_ids = segment_id_ops.row_splits_to_segment_ids(data.row_splits)
      segment_mask = array_ops.gather(mask, segment_ids)
      masked_values = boolean_mask(data.values, segment_mask)

      return ragged_tensor.RaggedTensor.from_row_splits(
          masked_values, masked_splits, validate=False)

    # If mask is non-ragged and has rank>1, then convert it to be ragged,
    # with a ragged rank matching data.
    if ragged_tensor.is_ragged(data):
      mask = ragged_tensor.RaggedTensor.from_tensor(
          mask,
          ragged_rank=min(data.ragged_rank, mask.shape.ndims - 1),
          row_splits_dtype=data.row_splits.dtype)
      return boolean_mask(data, mask)

    # Otherwise, data and mask are both `Tensor`s.
    else:
      # Apply `boolean_mask` to get the masked values.
      masked_values = array_ops.boolean_mask(data, mask)

      if mask.shape.ndims >= 2:
        # Add the innermost ragged dimension.  For each innermost cell, get the
        # number of values it contains.  Then flatten that to get a list of
        # cell lengths, and convert it to splits.  Finally, combine the splits
        # and values to get the innermost ragged tensor.
        masked_lengths = math_ops.count_nonzero(
            mask, axis=-1, dtype=row_splits_dtype)
        flattened_masked_lengths = array_ops.reshape(masked_lengths, [-1])
        masked_values = ragged_tensor.RaggedTensor.from_row_lengths(
            masked_values, flattened_masked_lengths, validate=False)

        # Wrap remaining ragged dimensions.
        if mask.shape.ndims > 2:
          mask_shape = array_ops.shape(mask, out_type=row_splits_dtype)
          split_size = math_ops.cumprod(mask_shape) + 1
          for dim in range(mask.shape.ndims - 3, -1, -1):
            elt_size = mask_shape[dim + 1]
            masked_splits = math_ops.range(split_size[dim]) * elt_size
            masked_values = ragged_tensor.RaggedTensor.from_row_splits(
                masked_values, masked_splits, validate=False)

      return masked_values
Exemple #34
0
        def _training_examples_and_variables():
            """Returns dictionaries for training examples and variables."""
            batch_size = targets.get_shape()[0]

            # Iterate over all feature columns and create appropriate lists for dense
            # and sparse features as well as dense and sparse weights (variables) for
            # SDCA.
            # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
            # dict as 1-dimensional tensors.
            dense_features, sparse_features, sparse_feature_with_values = [], [], []
            dense_feature_weights = []
            sparse_feature_weights, sparse_feature_with_values_weights = [], []
            for column in sorted(columns_to_variables.keys(),
                                 key=lambda x: x.key):
                transformed_tensor = features[column]
                if isinstance(column, layers.feature_column._RealValuedColumn):  # pylint: disable=protected-access
                    # A real-valued column corresponds to a dense feature in SDCA. A
                    # transformed tensor corresponding to a RealValuedColumn should have
                    # rank at most 2. In order to be passed to SDCA, its rank needs to be
                    # exactly 2 (i.e., its shape should be [batch_size, column.dim]).
                    check_rank_op = control_flow_ops.Assert(
                        math_ops.less_equal(array_ops.rank(transformed_tensor),
                                            2),
                        ['transformed_tensor shouls have rank at most 2.'])
                    # Reshape to [batch_size, dense_column_dimension].
                    with ops.control_dependencies([check_rank_op]):
                        transformed_tensor = array_ops.reshape(
                            transformed_tensor,
                            [array_ops.shape(transformed_tensor)[0], -1])

                    dense_features.append(transformed_tensor)
                    # For real valued columns, the variables list contains exactly one
                    # element.
                    dense_feature_weights.append(
                        columns_to_variables[column][0])
                elif isinstance(column,
                                layers.feature_column._BucketizedColumn):  # pylint: disable=protected-access
                    # A bucketized column corresponds to a sparse feature in SDCA. The
                    # bucketized feature is "sparsified" for SDCA by converting it to a
                    # SparseFeatureColumn respresenting the one-hot encoding of the
                    # bucketized feature.
                    #
                    # TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a
                    # bucketized feature column to a dense feature in SDCA. This will
                    # likely depend on the number of buckets.
                    dense_bucket_tensor = column._to_dnn_input_layer(
                        transformed_tensor)  # pylint: disable=protected-access
                    sparse_feature_column = _dense_tensor_to_sparse_feature_column(
                        dense_bucket_tensor)
                    sparse_feature_with_values.append(sparse_feature_column)
                    # For bucketized columns, the variables list contains exactly one
                    # element.
                    sparse_feature_with_values_weights.append(
                        columns_to_variables[column][0])
                elif isinstance(
                        column,
                    (
                        layers.feature_column._WeightedSparseColumn,  # pylint: disable=protected-access
                        layers.feature_column._CrossedColumn,  # pylint: disable=protected-access
                        layers.feature_column._SparseColumn)):  # pylint: disable=protected-access

                    if isinstance(column,
                                  layers.feature_column._WeightedSparseColumn):  # pylint: disable=protected-access
                        id_tensor = column.id_tensor(transformed_tensor)
                        weight_tensor = array_ops.reshape(
                            column.weight_tensor(transformed_tensor).values,
                            [-1])
                    else:
                        id_tensor = transformed_tensor
                        weight_tensor = array_ops.ones(
                            [array_ops.shape(id_tensor.indices)[0]],
                            dtypes.float32)

                    example_ids = array_ops.reshape(id_tensor.indices[:, 0],
                                                    [-1])

                    flat_ids = array_ops.reshape(id_tensor.values, [-1])
                    # Prune invalid IDs (< 0) from the flat_ids, example_ids, and
                    # weight_tensor.  These can come from looking up an OOV entry in the
                    # vocabulary (default value being -1).
                    is_id_valid = math_ops.greater_equal(flat_ids, 0)
                    flat_ids = array_ops.boolean_mask(flat_ids, is_id_valid)
                    example_ids = array_ops.boolean_mask(
                        example_ids, is_id_valid)
                    weight_tensor = array_ops.boolean_mask(
                        weight_tensor, is_id_valid)

                    projection_length = math_ops.reduce_max(flat_ids) + 1
                    # project ids based on example ids so that we can dedup ids that
                    # occur multiple times for a single example.
                    projected_ids = projection_length * example_ids + flat_ids

                    # Remove any redudant ids.
                    ids, idx = array_ops.unique(projected_ids)
                    # Keep only one example id per duplicated ids.
                    example_ids_filtered = math_ops.unsorted_segment_min(
                        example_ids, idx,
                        array_ops.shape(ids)[0])

                    # reproject ids back feature id space.
                    reproject_ids = (ids -
                                     projection_length * example_ids_filtered)

                    weights = array_ops.reshape(
                        math_ops.unsorted_segment_sum(weight_tensor, idx,
                                                      array_ops.shape(ids)[0]),
                        [-1])
                    sparse_feature_with_values.append(
                        SparseFeatureColumn(example_ids_filtered,
                                            reproject_ids, weights))
                    sparse_feature_with_values_weights.append(
                        columns_to_variables[column][0])
                else:
                    raise ValueError(
                        'SDCAOptimizer does not support column type %s.' %
                        type(column).__name__)

            example_weights = array_ops.reshape(
                features[weight_column_name], shape=[
                    -1
                ]) if weight_column_name else array_ops.ones([batch_size])
            example_ids = features[self._example_id_column]
            sparse_feature_with_values.extend(sparse_features)
            sparse_feature_with_values_weights.extend(sparse_feature_weights)
            examples = dict(sparse_features=sparse_feature_with_values,
                            dense_features=dense_features,
                            example_labels=math_ops.to_float(
                                array_ops.reshape(targets, shape=[-1])),
                            example_weights=example_weights,
                            example_ids=example_ids)
            sdca_variables = dict(
                sparse_features_weights=sparse_feature_with_values_weights,
                dense_features_weights=dense_feature_weights)
            return examples, sdca_variables
Exemple #35
0
 def testMaskIsScalarRaises(self):
     mask = True
     tensor = 1
     with self.test_session():
         with self.assertRaisesRegexp(ValueError, "mask.*scalar"):
             array_ops.boolean_mask(tensor, mask).eval()
Exemple #36
0
def adaptive_softmax_loss(inputs,
                          labels,
                          cutoff,
                          project_factor=4,
                          initializer=None,
                          name=None):
  """Computes and returns the adaptive softmax loss (a improvement of 
  hierarchical softmax).
    
  See [Efficient softmax approximation for GPUs](https://arxiv.org/pdf/1609.04309v2.pdf).
        
  This is a faster way to train a softmax classifier over a huge number of 
  classes, and can be used for **both training and prediction**. For example, it 
  can be used for training a Language Model with a very huge vocabulary, and 
  the trained languaed model can be used in speech recognition, text generation, 
  and machine translation very efficiently.
  
  Args:
    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
      activations of the input network.
    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
      `int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
    cutoff: A list indicating the limits of the different clusters.
    project_factor: A floating point value greater or equal to 1.0. The projection 
      factor between two neighboring clusters.
    initializer: Initializer for adaptive softmax variables (optional).
    name: A name for the operation (optional).
  Returns:
    loss: A `batch_size` 1-D tensor of the adaptive softmax cross entropy loss.
    training_losses: A list of 1-D tensors of adaptive softmax loss for each 
      cluster, which can be used for calculating the gradients and back 
      propagation when training.
  """
  input_dim = int(inputs.get_shape()[1])
  sample_num = int(inputs.get_shape()[0])
  cluster_num = len(cutoff) - 1
  with ops.name_scope(name, "AdaptiveSoftmax"):
    if initializer is None:
      stdv = math.sqrt(1. / input_dim)
      initializer = init_ops.random_uniform_initializer(-stdv * 0.8, stdv * 0.8)

    head_dim = cutoff[0] + cluster_num
    head_w = variable_scope.get_variable("adaptive_softmax_head_w", 
                             [input_dim, head_dim], initializer=initializer)

    tail_project_factor = project_factor
    tail_w = []
    for i in range(cluster_num):
      project_dim = max(1, input_dim // tail_project_factor)
      tail_dim = cutoff[i + 1] - cutoff[i]
      tail_w.append([
        variable_scope.get_variable("adaptive_softmax_tail{}_proj_w".format(i+1), 
                        [input_dim, project_dim], initializer=initializer),
        variable_scope.get_variable("adaptive_softmax_tail{}_w".format(i+1), 
                        [project_dim, tail_dim], initializer=initializer)
      ])
      tail_project_factor *= project_factor

    # Get tail masks and update head labels
    training_losses = []
    loss = array_ops.zeros([sample_num], dtype=dtypes.float32)
    head_labels = labels
    for i in range(cluster_num):
      mask = math_ops.logical_and(math_ops.greater_equal(labels, cutoff[i]), 
                                  math_ops.less(labels, cutoff[i + 1]))
      
      # Update head labels
      head_labels = tf.where(mask, array_ops.constant([cutoff[0] + i] *
                            sample_num), head_labels)

      # Compute tail loss
      tail_inputs = array_ops.boolean_mask(inputs, mask)
      tail_logits = math_ops.matmul(math_ops.matmul(tail_inputs, tail_w[i][0]), 
                                    tail_w[i][1])
      tail_labels = array_ops.boolean_mask(labels - cutoff[i], mask)
      tail_loss = nn.sparse_softmax_cross_entropy_with_logits(labels=tail_labels, logits=tail_logits)
      training_losses.append(tail_loss)
      aligned_tail_loss = sparse_tensor.SparseTensor(
        array_ops.squeeze(array_ops.where(mask)), tail_loss, [sample_num])
      loss += sparse_ops.sparse_tensor_to_dense(aligned_tail_loss)

    # Compute head loss
    head_logits = math_ops.matmul(inputs, head_w)
    head_loss = nn.sparse_softmax_cross_entropy_with_logits(logits=head_logits, labels=head_labels)
    loss += head_loss
    training_losses.append(head_loss)

    return loss, training_losses
Exemple #37
0
def _slice_helper(tensor, slice_spec, var=None):
    """Copied from array_ops._slice_helper, will be merged back later."""
    if isinstance(slice_spec, bool) or \
    (isinstance(slice_spec, ops.Tensor) and slice_spec.dtype == dtypes.bool) or \
    (isinstance(slice_spec, np.ndarray) and slice_spec.dtype == bool):
        return array_ops.boolean_mask(tensor=tensor, mask=slice_spec)

    if not isinstance(slice_spec, (list, tuple)):
        slice_spec = [slice_spec]

    begin, end, strides = [], [], []
    index = 0

    new_axis_mask, shrink_axis_mask = 0, 0
    begin_mask, end_mask = 0, 0
    ellipsis_mask = 0
    for s in slice_spec:
        if isinstance(s, slice):
            if s.start is not None and not _is_undefined_dimension(s.start):
                _check_index(s.start)
                begin.append(s.start)
            else:
                begin.append(0)
                begin_mask |= (1 << index)
            if s.stop is not None and not _is_undefined_dimension(s.stop):
                _check_index(s.stop)
                end.append(s.stop)
            else:
                end.append(0)
                end_mask |= (1 << index)
            if s.step is not None and not _is_undefined_dimension(s.step):
                _check_index(s.step)
                strides.append(s.step)
            else:
                strides.append(1)
        elif s is Ellipsis:
            begin.append(0)
            end.append(0)
            strides.append(1)
            ellipsis_mask |= (1 << index)
        elif s is array_ops.newaxis:
            begin.append(0)
            end.append(0)
            strides.append(1)
            new_axis_mask |= (1 << index)
        else:
            _check_index(s)
            begin.append(s)
            end.append(s + 1)
            strides.append(1)
            shrink_axis_mask |= (1 << index)
        index += 1

    # stack possibly involves no tensors, so we must use op_scope correct graph.
    with ops.name_scope(None,
                        'strided_slice', [tensor] + begin + end + strides,
                        skip_on_eager=False) as name:
        if begin:
            packed_begin, packed_end, packed_strides = (
                array_ops.stack(begin), array_ops.stack(end),
                array_ops.stack(strides))
            if (packed_begin.dtype == dtypes.int64
                    or packed_end.dtype == dtypes.int64
                    or packed_strides.dtype == dtypes.int64):
                if packed_begin.dtype != dtypes.int64:
                    packed_begin = math_ops.cast(packed_begin, dtypes.int64)
                if packed_end.dtype != dtypes.int64:
                    packed_end = math_ops.cast(packed_end, dtypes.int64)
                if packed_strides.dtype != dtypes.int64:
                    packed_strides = math_ops.cast(packed_strides,
                                                   dtypes.int64)
        else:
            var_empty = constant_op.constant([], dtype=dtypes.int32)
            packed_begin = packed_end = packed_strides = var_empty
        return array_ops.strided_slice(tensor,
                                       packed_begin,
                                       packed_end,
                                       packed_strides,
                                       begin_mask=begin_mask,
                                       end_mask=end_mask,
                                       shrink_axis_mask=shrink_axis_mask,
                                       new_axis_mask=new_axis_mask,
                                       ellipsis_mask=ellipsis_mask,
                                       var=var,
                                       name=name)
    def filter(self, input_tensor=None, **kwargs):
        """
    Filter new feature keys by probability before training.
    Prevent unpopular features from affecting training.

    Args:
      **kwargs: keyword arguments, including
      input_tensor: SparseTensor or DenseTensor.
                    Feature keys need to filter.
      probability: float. Filtering new feature keys by
                   probability, and permitting old keys.

    Returns:
      Tensor that are filtered for training.
    """

        if input_tensor is None:
            raise KeyError("filter method expects parameter `input_tensor`.")
        elif isinstance(input_tensor, ops.Tensor):
            input_type = "DenseTensor"
            values = input_tensor
        elif isinstance(input_tensor, sparse_tensor.SparseTensor):
            input_type = "SparseTensor"
            values = input_tensor.values
            indices = input_tensor.indices
        else:
            raise TypeError("input_tensor must be " \
                      "either a SparseTensor or dense Tensor.")

        if 'probability' in kwargs:
            probability = kwargs['probability']
        else:
            raise KeyError("filter method expects parameter `probability`.")
        if not isinstance(probability, float):
            raise TypeError("probability must be a float.")
        if probability < 0.0 or probability > 1.0:
            raise ValueError("probability value must be in [0.0, 1.0].")

        idx = 0
        status_values = array_ops.reshape(values, (-1, ))
        partition_index = \
            self.var.partition_fn(status_values, self.var.shard_num)
        partitioned_values_list, partitioned_indices_list = \
            dynamic_embedding_ops._partition(status_values,
                                             partition_index,
                                             self.var.shard_num)

        fv_list = []
        for idx, dev in enumerate(self.tstp_var.devices):
            with ops.device(dev):
                feature_status = \
                    self.tstp_var.tables[idx].lookup(
                        partitioned_values_list[idx],
                        dynamic_default_values=self.default_tstp,
                    )

                sub_fv = array_ops.reshape(feature_status, (-1, ))
                fv_list.append(sub_fv)

        total_fv = dynamic_embedding_ops._stitch(fv_list,
                                                 partitioned_indices_list)
        value_size = array_ops.size(values)
        old_prob = array_ops.ones(value_size)
        new_prob = array_ops.fill([value_size], probability)
        random_prob = random_ops.random_uniform([value_size], maxval=1.0)

        condition = math_ops.greater(total_fv, self.default_tstp)
        total_prob = array_ops.where(condition, old_prob, new_prob)

        total_mask = math_ops.greater_equal(total_prob, random_prob)
        filter_values = array_ops.boolean_mask(values, total_mask)

        if input_type == "DenseTensor":
            filter_tensor = filter_values
        elif input_type == "SparseTensor":
            filter_indices = array_ops.boolean_mask(indices, total_mask)
            filter_tensor = sparse_tensor.SparseTensor(
                indices=filter_indices,
                values=filter_values,
                dense_shape=input_tensor.dense_shape)

        return filter_tensor
Exemple #39
0
def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
    """Assert the condition `x == y` holds element-wise.

  Example of adding a dependency to an operation:

  ```python
  with tf.control_dependencies([tf.assert_equal(x, y)]):
    output = tf.reduce_sum(x)
  ```

  This condition holds if for every pair of (possibly broadcast) elements
  `x[i]`, `y[i]`, we have `x[i] == y[i]`.
  If both `x` and `y` are empty, this is trivially satisfied.

  Args:
    x:  Numeric `Tensor`.
    y:  Numeric `Tensor`, same dtype as and broadcastable to `x`.
    data:  The tensors to print out if the condition is False.  Defaults to
      error message and first few entries of `x`, `y`.
    summarize: Print this many entries of each tensor.
    message: A string to prefix to the default message.
    name: A name for this operation (optional).  Defaults to "assert_equal".

  Returns:
    Op that raises `InvalidArgumentError` if `x == y` is False.
    @compatibility{eager} returns None

  Raises:
    InvalidArgumentError: if the check can be performed immediately and
      `x == y` is False. The check can be performed immediately during eager
      execution or if `x` and `y` are statically known.
  """
    message = message or ''
    with ops.name_scope(name, 'assert_equal', [x, y, data]):
        x = ops.convert_to_tensor(x, name='x')
        y = ops.convert_to_tensor(y, name='y')

        if context.executing_eagerly():
            eq = math_ops.equal(x, y)
            condition = math_ops.reduce_all(eq)
            if not condition:
                # Prepare a message with first elements of x and y.
                summary_msg = ''
                # Default to printing 3 elements like control_flow_ops.Assert (used
                # by graph mode) does.
                summarize = 3 if summarize is None else summarize
                if summarize:
                    # reshape((-1,)) is the fastest way to get a flat array view.
                    x_np = x.numpy().reshape((-1, ))
                    y_np = y.numpy().reshape((-1, ))
                    x_sum = min(x_np.size, summarize)
                    y_sum = min(y_np.size, summarize)
                    summary_msg = ('First %d elements of x:\n%s\n'
                                   'First %d elements of y:\n%s\n' %
                                   (x_sum, x_np[:x_sum], y_sum, y_np[:y_sum]))

                index_and_values_str = ''
                if x.shape == y.shape and x.shape.as_list():
                    # If the shapes of x and y are the same (and not scalars),
                    # Get the values that actually differed and their indices.
                    # If shapes are different this information is more confusing
                    # than useful.
                    mask = math_ops.logical_not(eq)
                    indices = array_ops.where(mask)
                    indices_np = indices.numpy()
                    x_vals = array_ops.boolean_mask(x, mask)
                    y_vals = array_ops.boolean_mask(y, mask)
                    summarize = min(summarize, indices_np.shape[0])
                    index_and_values_str = (
                        'Indices of first %s different values:\n%s\n'
                        'Corresponding x values:\n%s\n'
                        'Corresponding y values:\n%s\n' %
                        (summarize, indices_np[:summarize],
                         x_vals.numpy().reshape(
                             (-1, ))[:summarize], y_vals.numpy().reshape(
                                 (-1, ))[:summarize]))

                raise errors.InvalidArgumentError(
                    node_def=None,
                    op=None,
                    message=(
                        '%s\nCondition x == y did not hold.\n%s%s' %
                        (message or '', index_and_values_str, summary_msg)))
            return

        if data is None:
            data = [
                message, 'Condition x == y did not hold element-wise:',
                'x (%s) = ' % x.name, x,
                'y (%s) = ' % y.name, y
            ]
        condition = math_ops.reduce_all(math_ops.equal(x, y))
        x_static = tensor_util.constant_value(x)
        y_static = tensor_util.constant_value(y)
        if x_static is not None and y_static is not None:
            condition_static = (x_static == y_static).all()
            _assert_static(condition_static, data)
        return control_flow_ops.Assert(condition, data, summarize=summarize)
    def filter(self, input_tensor=None, **kwargs):
        """
    Filter feature keys below the threshold before training.
    Prevent unpopular feature keys from affecting training.

    Args:
      **kwargs: keyword arguments, including
      input_tensor: SparseTensor or DenseTensor.
                    Feature keys need to filter.
      frequency_threshold: int. Filtering feature keys whose frequency values
                      are less than the threshold.

    Returns:
      Tensor that are filtered for training.
    """

        if input_tensor is None:
            raise KeyError("filter method expects parameter `input_tensor`.")
        elif isinstance(input_tensor, ops.Tensor):
            input_type = "DenseTensor"
            values = input_tensor
        elif isinstance(input_tensor, sparse_tensor.SparseTensor):
            input_type = "SparseTensor"
            values = input_tensor.values
            indices = input_tensor.indices
        else:
            raise TypeError("input_tensor must be " \
                      "either a SparseTensor or dense Tensor.")

        if 'frequency_threshold' in kwargs:
            frequency_threshold = kwargs['frequency_threshold']
        else:
            raise KeyError(
                "filter method expects parameter `frequency_threshold`.")
        if not isinstance(frequency_threshold, int):
            raise TypeError("frequency_threshold must be an integer.")
        if frequency_threshold < 0:
            raise ValueError(
                "frequency_threshold must be greater or equal to zero.")

        idx = 0
        status_values = array_ops.reshape(values, (-1, ))
        partition_index = \
            self.var.partition_fn(status_values, self.var.shard_num)
        partitioned_values_list, partitioned_indices_list = \
            dynamic_embedding_ops._partition(status_values,
                                             partition_index,
                                             self.var.shard_num)

        mask = []
        for idx, dev in enumerate(self.freq_var.devices):
            with ops.device(dev):
                feature_status = \
                    self.freq_var.tables[idx].lookup(
                        partitioned_values_list[idx],
                        dynamic_default_values=self.default_count,
                    )

                feature_counts = array_ops.slice(feature_status, [0, 0],
                                                 [-1, 1])
                sub_fv = array_ops.reshape(feature_counts, (-1, ))
                partitioned_mask = math_ops.greater_equal(
                    sub_fv, frequency_threshold)
                mask.append(partitioned_mask)

        total_mask = dynamic_embedding_ops._stitch(mask,
                                                   partitioned_indices_list)
        filter_values = array_ops.boolean_mask(values, total_mask)
        if input_type == "DenseTensor":
            filter_tensor = filter_values
        elif input_type == "SparseTensor":
            filter_indices = array_ops.boolean_mask(indices, total_mask)
            filter_tensor = sparse_tensor.SparseTensor(
                indices=filter_indices,
                values=filter_values,
                dense_shape=input_tensor.dense_shape)

        return filter_tensor
Exemple #41
0
    def training_graph(self,
                       input_data,
                       input_labels,
                       random_seed,
                       data_spec,
                       epoch=None):
        """Constructs a TF graph for training a random tree.

    Args:
      input_data: A tensor or SparseTensor or placeholder for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      random_seed: The random number generator seed to use for this tree.  0
        means use the current time as the seed.
      data_spec: A list of tf.dtype values specifying the original types of
        each column.
      epoch: A tensor or placeholder for the epoch the training data comes from.

    Returns:
      The last op in the random tree training graph.
    """
        epoch = [0] if epoch is None else epoch

        sparse_indices = []
        sparse_values = []
        sparse_shape = []
        if isinstance(input_data, ops.SparseTensor):
            sparse_indices = input_data.indices
            sparse_values = input_data.values
            sparse_shape = input_data.shape
            input_data = []

        # Count extremely random stats.
        (node_sums, node_squares, splits_indices, splits_sums, splits_squares,
         totals_indices, totals_sums, totals_squares,
         input_leaves) = (self.training_ops.count_extremely_random_stats(
             input_data,
             sparse_indices,
             sparse_values,
             sparse_shape,
             data_spec,
             input_labels,
             self.variables.tree,
             self.variables.tree_thresholds,
             self.variables.node_to_accumulator_map,
             self.variables.candidate_split_features,
             self.variables.candidate_split_thresholds,
             self.variables.start_epoch,
             epoch,
             num_classes=self.params.num_output_columns,
             regression=self.params.regression))
        node_update_ops = []
        node_update_ops.append(
            state_ops.assign_add(self.variables.node_sums, node_sums))

        splits_update_ops = []
        splits_update_ops.append(
            self.training_ops.scatter_add_ndim(
                self.variables.candidate_split_sums, splits_indices,
                splits_sums))
        splits_update_ops.append(
            self.training_ops.scatter_add_ndim(self.variables.accumulator_sums,
                                               totals_indices, totals_sums))

        if self.params.regression:
            node_update_ops.append(
                state_ops.assign_add(self.variables.node_squares,
                                     node_squares))
            splits_update_ops.append(
                self.training_ops.scatter_add_ndim(
                    self.variables.candidate_split_squares, splits_indices,
                    splits_squares))
            splits_update_ops.append(
                self.training_ops.scatter_add_ndim(
                    self.variables.accumulator_squares, totals_indices,
                    totals_squares))

        # Sample inputs.
        update_indices, feature_updates, threshold_updates = (
            self.training_ops.sample_inputs(
                input_data,
                sparse_indices,
                sparse_values,
                sparse_shape,
                self.variables.node_to_accumulator_map,
                input_leaves,
                self.variables.candidate_split_features,
                self.variables.candidate_split_thresholds,
                split_initializations_per_input=(
                    self.params.split_initializations_per_input),
                split_sampling_random_seed=random_seed))
        update_features_op = state_ops.scatter_update(
            self.variables.candidate_split_features, update_indices,
            feature_updates)
        update_thresholds_op = state_ops.scatter_update(
            self.variables.candidate_split_thresholds, update_indices,
            threshold_updates)

        # Calculate finished nodes.
        with ops.control_dependencies(splits_update_ops):
            children = array_ops.squeeze(array_ops.slice(
                self.variables.tree, [0, 0], [-1, 1]),
                                         squeeze_dims=[1])
            is_leaf = math_ops.equal(constants.LEAF_NODE, children)
            leaves = math_ops.to_int32(
                array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1]))
            finished, stale = self.training_ops.finished_nodes(
                leaves,
                self.variables.node_to_accumulator_map,
                self.variables.candidate_split_sums,
                self.variables.candidate_split_squares,
                self.variables.accumulator_sums,
                self.variables.accumulator_squares,
                self.variables.start_epoch,
                epoch,
                num_split_after_samples=self.params.split_after_samples,
                min_split_samples=self.params.min_split_samples)

        # Update leaf scores.
        non_fertile_leaves = array_ops.boolean_mask(
            leaves,
            math_ops.less(
                array_ops.gather(self.variables.node_to_accumulator_map,
                                 leaves), 0))

        # TODO(gilberth): It should be possible to limit the number of non
        # fertile leaves we calculate scores for, especially since we can only take
        # at most array_ops.shape(finished)[0] of them.
        with ops.control_dependencies(node_update_ops):
            sums = array_ops.gather(self.variables.node_sums,
                                    non_fertile_leaves)
            if self.params.regression:
                squares = array_ops.gather(self.variables.node_squares,
                                           non_fertile_leaves)
                non_fertile_leaf_scores = self._variance(sums, squares)
            else:
                non_fertile_leaf_scores = self._weighted_gini(sums)

        # Calculate best splits.
        with ops.control_dependencies(splits_update_ops):
            split_indices = self.training_ops.best_splits(
                finished,
                self.variables.node_to_accumulator_map,
                self.variables.candidate_split_sums,
                self.variables.candidate_split_squares,
                self.variables.accumulator_sums,
                self.variables.accumulator_squares,
                regression=self.params.regression)

        # Grow tree.
        with ops.control_dependencies(
            [update_features_op, update_thresholds_op]):
            (tree_update_indices, tree_children_updates,
             tree_threshold_updates, new_eot) = (self.training_ops.grow_tree(
                 self.variables.end_of_tree,
                 self.variables.node_to_accumulator_map, finished,
                 split_indices, self.variables.candidate_split_features,
                 self.variables.candidate_split_thresholds))
            tree_update_op = state_ops.scatter_update(self.variables.tree,
                                                      tree_update_indices,
                                                      tree_children_updates)
            thresholds_update_op = state_ops.scatter_update(
                self.variables.tree_thresholds, tree_update_indices,
                tree_threshold_updates)
            # TODO(thomaswc): Only update the epoch on the new leaves.
            new_epoch_updates = epoch * array_ops.ones_like(
                tree_threshold_updates, dtype=dtypes.int32)
            epoch_update_op = state_ops.scatter_update(
                self.variables.start_epoch, tree_update_indices,
                new_epoch_updates)

        # Update fertile slots.
        with ops.control_dependencies([tree_update_op]):
            (node_map_updates, accumulators_cleared,
             accumulators_allocated) = (self.training_ops.update_fertile_slots(
                 finished,
                 non_fertile_leaves,
                 non_fertile_leaf_scores,
                 self.variables.end_of_tree,
                 self.variables.accumulator_sums,
                 self.variables.node_to_accumulator_map,
                 stale,
                 regression=self.params.regression))

        # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has
        # used it to calculate new leaves.
        gated_new_eot, = control_flow_ops.tuple(
            [new_eot], control_inputs=[node_map_updates])
        eot_update_op = state_ops.assign(self.variables.end_of_tree,
                                         gated_new_eot)

        updates = []
        updates.append(eot_update_op)
        updates.append(tree_update_op)
        updates.append(thresholds_update_op)
        updates.append(epoch_update_op)

        updates.append(
            state_ops.scatter_update(
                self.variables.node_to_accumulator_map,
                array_ops.squeeze(array_ops.slice(node_map_updates, [0, 0],
                                                  [1, -1]),
                                  squeeze_dims=[0]),
                array_ops.squeeze(array_ops.slice(node_map_updates, [1, 0],
                                                  [1, -1]),
                                  squeeze_dims=[0])))

        cleared_and_allocated_accumulators = array_ops.concat(
            0, [accumulators_cleared, accumulators_allocated])
        # Calculate values to put into scatter update for candidate counts.
        # Candidate split counts are always reset back to 0 for both cleared
        # and allocated accumulators. This means some accumulators might be doubly
        # reset to 0 if the were released and not allocated, then later allocated.
        split_values = array_ops.tile(
            array_ops.expand_dims(
                array_ops.expand_dims(
                    array_ops.zeros_like(cleared_and_allocated_accumulators,
                                         dtype=dtypes.float32), 1), 2),
            [
                1, self.params.num_splits_to_consider,
                self.params.num_output_columns
            ])
        updates.append(
            state_ops.scatter_update(self.variables.candidate_split_sums,
                                     cleared_and_allocated_accumulators,
                                     split_values))
        if self.params.regression:
            updates.append(
                state_ops.scatter_update(
                    self.variables.candidate_split_squares,
                    cleared_and_allocated_accumulators, split_values))

        # Calculate values to put into scatter update for total counts.
        total_cleared = array_ops.tile(
            array_ops.expand_dims(
                math_ops.neg(
                    array_ops.ones_like(accumulators_cleared,
                                        dtype=dtypes.float32)), 1),
            [1, self.params.num_output_columns])
        total_reset = array_ops.tile(
            array_ops.expand_dims(
                array_ops.zeros_like(accumulators_allocated,
                                     dtype=dtypes.float32), 1),
            [1, self.params.num_output_columns])
        accumulator_updates = array_ops.concat(0, [total_cleared, total_reset])
        updates.append(
            state_ops.scatter_update(self.variables.accumulator_sums,
                                     cleared_and_allocated_accumulators,
                                     accumulator_updates))
        if self.params.regression:
            updates.append(
                state_ops.scatter_update(self.variables.accumulator_squares,
                                         cleared_and_allocated_accumulators,
                                         accumulator_updates))

        # Calculate values to put into scatter update for candidate splits.
        split_features_updates = array_ops.tile(
            array_ops.expand_dims(
                math_ops.neg(
                    array_ops.ones_like(cleared_and_allocated_accumulators)),
                1), [1, self.params.num_splits_to_consider])
        updates.append(
            state_ops.scatter_update(self.variables.candidate_split_features,
                                     cleared_and_allocated_accumulators,
                                     split_features_updates))

        updates += self.finish_iteration()

        return control_flow_ops.group(*updates)
  def build_controller(self):
    """RL optimization interface.

    Returns:
      ops: A dictionary holding handles of the model used for training.
    """

    self._global_step = training_util.get_or_create_global_step()
    ops = {}
    ops["loss"] = 0

    failing_signal = self.compute_reward(self.hparams.failing_signal)

    ctr = {}

    with tf_ops.name_scope("controller_{}".format(self.ctrl_id)):
      with variable_scope.variable_scope("controller_{}".format(self.ctrl_id)):
        ctr["reward"] = {"value": [], "ph": [], "update": []}
        ctr["ready"] = {"value": [], "ph": [], "update": []}
        ctr["best_reward"] = {"value": [], "update": []}
        for i in range(self.hparams.num_children):
          reward_value = variable_scope.get_local_variable(
              "reward_{}".format(i),
              initializer=0.0,
              dtype=dtypes.float32,
              trainable=False)
          reward_ph = array_ops.placeholder(
              dtypes.float32, shape=(), name="reward_ph_{}".format(i))
          reward_update = state_ops.assign(
              reward_value, reward_ph, use_locking=True)
          ctr["reward"]["value"].append(reward_value)
          ctr["reward"]["ph"].append(reward_ph)
          ctr["reward"]["update"].append(reward_update)
          best_reward = variable_scope.get_local_variable(
              "best_reward_{}".format(i),
              initializer=failing_signal,
              dtype=dtypes.float32,
              trainable=False)
          ctr["best_reward"]["value"].append(best_reward)
          ctr["best_reward"]["update"].append(
              state_ops.assign(best_reward,
                               math_ops.minimum(best_reward, reward_update)))

          ready_value = variable_scope.get_local_variable(
              "ready_{}".format(i),
              initializer=True,
              dtype=dtypes.bool,
              trainable=False)
          ready_ph = array_ops.placeholder(
              dtypes.bool, shape=(), name="ready_ph_{}".format(i))
          ready_update = state_ops.assign(
              ready_value, ready_ph, use_locking=True)
          ctr["ready"]["value"].append(ready_value)
          ctr["ready"]["ph"].append(ready_ph)
          ctr["ready"]["update"].append(ready_update)

      ctr["grouping_y_preds"], ctr["grouping_log_probs"] = self.get_groupings()
      summary.histogram(
          "grouping_actions",
          array_ops.slice(ctr["grouping_y_preds"]["sample"], [0, 0],
                          [1, array_ops.shape(self.op_embeddings)[0]]))

      with variable_scope.variable_scope("controller_{}".format(self.ctrl_id)):
        ctr["baseline"] = variable_scope.get_local_variable(
            "baseline",
            initializer=failing_signal
            if self.hparams.start_with_failing_signal else 0.0,
            dtype=dtypes.float32,
            trainable=False)

      new_baseline = self.hparams.bl_dec * ctr["baseline"] + (
          1 - self.hparams.bl_dec) * math_ops.reduce_mean(
              ctr["reward"]["value"])
      if not self.hparams.always_update_baseline:
        baseline_mask = math_ops.less(ctr["reward"]["value"], failing_signal)
        selected_reward = array_ops.boolean_mask(ctr["reward"]["value"],
                                                 baseline_mask)
        selected_baseline = control_flow_ops.cond(
            math_ops.reduce_any(baseline_mask),
            lambda: math_ops.reduce_mean(selected_reward),
            lambda: constant_op.constant(0, dtype=dtypes.float32))
        ctr["pos_reward"] = selected_baseline
        pos_ = math_ops.less(
            constant_op.constant(0, dtype=dtypes.float32), selected_baseline)
        selected_baseline = self.hparams.bl_dec * ctr["baseline"] + (
            1 - self.hparams.bl_dec) * selected_baseline
        selected_baseline = control_flow_ops.cond(
            pos_, lambda: selected_baseline, lambda: ctr["baseline"])
        new_baseline = control_flow_ops.cond(
            math_ops.less(self.global_step,
                          self.hparams.stop_updating_after_steps),
            lambda: new_baseline, lambda: selected_baseline)
      ctr["baseline_update"] = state_ops.assign(
          ctr["baseline"], new_baseline, use_locking=True)

      ctr["y_preds"], ctr["log_probs"] = self.get_placements()
      summary.histogram("actions", ctr["y_preds"]["sample"])
      mask = math_ops.less(ctr["reward"]["value"], failing_signal)
      ctr["loss"] = ctr["reward"]["value"] - ctr["baseline"]
      ctr["loss"] *= (
          ctr["log_probs"]["sample"] + ctr["grouping_log_probs"]["sample"])

      selected_loss = array_ops.boolean_mask(ctr["loss"], mask)
      selected_loss = control_flow_ops.cond(
          math_ops.reduce_any(mask),
          lambda: math_ops.reduce_mean(-selected_loss),
          lambda: constant_op.constant(0, dtype=dtypes.float32))

      ctr["loss"] = control_flow_ops.cond(
          math_ops.less(self.global_step,
                        self.hparams.stop_updating_after_steps),
          lambda: math_ops.reduce_mean(-ctr["loss"]), lambda: selected_loss)

      ctr["reward_s"] = math_ops.reduce_mean(ctr["reward"]["value"])
      summary.scalar("loss", ctr["loss"])
      summary.scalar("avg_reward", ctr["reward_s"])
      summary.scalar("best_reward_so_far", best_reward)
      summary.scalar(
          "advantage",
          math_ops.reduce_mean(ctr["reward"]["value"] - ctr["baseline"]))

    with variable_scope.variable_scope(
        "optimizer", reuse=variable_scope.AUTO_REUSE):
      (ctr["train_op"], ctr["lr"], ctr["grad_norm"],
       ctr["grad_norms"]) = self._get_train_ops(
           ctr["loss"],
           tf_ops.get_collection(tf_ops.GraphKeys.TRAINABLE_VARIABLES),
           self.global_step,
           grad_bound=self.hparams.grad_bound,
           lr_init=self.hparams.lr,
           lr_dec=self.hparams.lr_dec,
           start_decay_step=self.hparams.start_decay_step,
           decay_steps=self.hparams.decay_steps,
           optimizer_type=self.hparams.optimizer_type)

    summary.scalar("gradnorm", ctr["grad_norm"])
    summary.scalar("lr", ctr["lr"])
    ctr["summary"] = summary.merge_all()
    ops["controller"] = ctr

    self.ops = ops
    return ops
 def testMaskShapeDifferentThanFirstPartOfTensorShapeRaises(self):
   mask = [True, True, True]
   tensor = [[1, 2], [3, 4]]
   with self.test_session():
     with self.assertRaisesRegexp(ValueError, "incompatible"):
       array_ops.boolean_mask(tensor, mask).eval()
Exemple #44
0
    def training_graph(self, input_data, input_labels, random_seed, data_spec, epoch=None):

        """Constructs a TF graph for training a random tree.

    Args:
      input_data: A tensor or SparseTensor or placeholder for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      random_seed: The random number generator seed to use for this tree.  0
        means use the current time as the seed.
      data_spec: A list of tf.dtype values specifying the original types of
        each column.
      epoch: A tensor or placeholder for the epoch the training data comes from.

    Returns:
      The last op in the random tree training graph.
    """
        epoch = [0] if epoch is None else epoch

        sparse_indices = []
        sparse_values = []
        sparse_shape = []
        if isinstance(input_data, ops.SparseTensor):
            sparse_indices = input_data.indices
            sparse_values = input_data.values
            sparse_shape = input_data.shape
            input_data = []

        # Count extremely random stats.
        (
            node_sums,
            node_squares,
            splits_indices,
            splits_sums,
            splits_squares,
            totals_indices,
            totals_sums,
            totals_squares,
            input_leaves,
        ) = self.training_ops.count_extremely_random_stats(
            input_data,
            sparse_indices,
            sparse_values,
            sparse_shape,
            data_spec,
            input_labels,
            self.variables.tree,
            self.variables.tree_thresholds,
            self.variables.node_to_accumulator_map,
            self.variables.candidate_split_features,
            self.variables.candidate_split_thresholds,
            self.variables.start_epoch,
            epoch,
            num_classes=self.params.num_output_columns,
            regression=self.params.regression,
        )
        node_update_ops = []
        node_update_ops.append(state_ops.assign_add(self.variables.node_sums, node_sums))

        splits_update_ops = []
        splits_update_ops.append(
            self.training_ops.scatter_add_ndim(self.variables.candidate_split_sums, splits_indices, splits_sums)
        )
        splits_update_ops.append(
            self.training_ops.scatter_add_ndim(self.variables.accumulator_sums, totals_indices, totals_sums)
        )

        if self.params.regression:
            node_update_ops.append(state_ops.assign_add(self.variables.node_squares, node_squares))
            splits_update_ops.append(
                self.training_ops.scatter_add_ndim(
                    self.variables.candidate_split_squares, splits_indices, splits_squares
                )
            )
            splits_update_ops.append(
                self.training_ops.scatter_add_ndim(self.variables.accumulator_squares, totals_indices, totals_squares)
            )

        # Sample inputs.
        update_indices, feature_updates, threshold_updates = self.training_ops.sample_inputs(
            input_data,
            sparse_indices,
            sparse_values,
            sparse_shape,
            self.variables.node_to_accumulator_map,
            input_leaves,
            self.variables.candidate_split_features,
            self.variables.candidate_split_thresholds,
            split_initializations_per_input=(self.params.split_initializations_per_input),
            split_sampling_random_seed=random_seed,
        )
        update_features_op = state_ops.scatter_update(
            self.variables.candidate_split_features, update_indices, feature_updates
        )
        update_thresholds_op = state_ops.scatter_update(
            self.variables.candidate_split_thresholds, update_indices, threshold_updates
        )

        # Calculate finished nodes.
        with ops.control_dependencies(splits_update_ops):
            children = array_ops.squeeze(array_ops.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1])
            is_leaf = math_ops.equal(constants.LEAF_NODE, children)
            leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1]))
            finished, stale = self.training_ops.finished_nodes(
                leaves,
                self.variables.node_to_accumulator_map,
                self.variables.candidate_split_sums,
                self.variables.candidate_split_squares,
                self.variables.accumulator_sums,
                self.variables.accumulator_squares,
                self.variables.start_epoch,
                epoch,
                num_split_after_samples=self.params.split_after_samples,
                min_split_samples=self.params.min_split_samples,
            )

        # Update leaf scores.
        non_fertile_leaves = array_ops.boolean_mask(
            leaves, math_ops.less(array_ops.gather(self.variables.node_to_accumulator_map, leaves), 0)
        )

        # TODO(gilberth): It should be possible to limit the number of non
        # fertile leaves we calculate scores for, especially since we can only take
        # at most array_ops.shape(finished)[0] of them.
        with ops.control_dependencies(node_update_ops):
            sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves)
            if self.params.regression:
                squares = array_ops.gather(self.variables.node_squares, non_fertile_leaves)
                non_fertile_leaf_scores = self._variance(sums, squares)
            else:
                non_fertile_leaf_scores = self._weighted_gini(sums)

        # Calculate best splits.
        with ops.control_dependencies(splits_update_ops):
            split_indices = self.training_ops.best_splits(
                finished,
                self.variables.node_to_accumulator_map,
                self.variables.candidate_split_sums,
                self.variables.candidate_split_squares,
                self.variables.accumulator_sums,
                self.variables.accumulator_squares,
                regression=self.params.regression,
            )

        # Grow tree.
        with ops.control_dependencies([update_features_op, update_thresholds_op]):
            (
                tree_update_indices,
                tree_children_updates,
                tree_threshold_updates,
                tree_depth_updates,
                new_eot,
            ) = self.training_ops.grow_tree(
                self.variables.end_of_tree,
                self.variables.tree_depths,
                self.variables.node_to_accumulator_map,
                finished,
                split_indices,
                self.variables.candidate_split_features,
                self.variables.candidate_split_thresholds,
            )
            tree_update_op = state_ops.scatter_update(self.variables.tree, tree_update_indices, tree_children_updates)
            thresholds_update_op = state_ops.scatter_update(
                self.variables.tree_thresholds, tree_update_indices, tree_threshold_updates
            )
            depth_update_op = state_ops.scatter_update(
                self.variables.tree_depths, tree_update_indices, tree_depth_updates
            )
            # TODO(thomaswc): Only update the epoch on the new leaves.
            new_epoch_updates = epoch * array_ops.ones_like(tree_depth_updates)
            epoch_update_op = state_ops.scatter_update(
                self.variables.start_epoch, tree_update_indices, new_epoch_updates
            )

        # Update fertile slots.
        with ops.control_dependencies([depth_update_op]):
            (node_map_updates, accumulators_cleared, accumulators_allocated) = self.training_ops.update_fertile_slots(
                finished,
                non_fertile_leaves,
                non_fertile_leaf_scores,
                self.variables.end_of_tree,
                self.variables.tree_depths,
                self.variables.accumulator_sums,
                self.variables.node_to_accumulator_map,
                stale,
                max_depth=self.params.max_depth,
                regression=self.params.regression,
            )

        # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has
        # used it to calculate new leaves.
        gated_new_eot, = control_flow_ops.tuple([new_eot], control_inputs=[node_map_updates])
        eot_update_op = state_ops.assign(self.variables.end_of_tree, gated_new_eot)

        updates = []
        updates.append(eot_update_op)
        updates.append(tree_update_op)
        updates.append(thresholds_update_op)
        updates.append(epoch_update_op)

        updates.append(
            state_ops.scatter_update(
                self.variables.node_to_accumulator_map,
                array_ops.squeeze(array_ops.slice(node_map_updates, [0, 0], [1, -1]), squeeze_dims=[0]),
                array_ops.squeeze(array_ops.slice(node_map_updates, [1, 0], [1, -1]), squeeze_dims=[0]),
            )
        )

        cleared_and_allocated_accumulators = array_ops.concat(0, [accumulators_cleared, accumulators_allocated])
        # Calculate values to put into scatter update for candidate counts.
        # Candidate split counts are always reset back to 0 for both cleared
        # and allocated accumulators. This means some accumulators might be doubly
        # reset to 0 if the were released and not allocated, then later allocated.
        split_values = array_ops.tile(
            array_ops.expand_dims(
                array_ops.expand_dims(
                    array_ops.zeros_like(cleared_and_allocated_accumulators, dtype=dtypes.float32), 1
                ),
                2,
            ),
            [1, self.params.num_splits_to_consider, self.params.num_output_columns],
        )
        updates.append(
            state_ops.scatter_update(
                self.variables.candidate_split_sums, cleared_and_allocated_accumulators, split_values
            )
        )
        if self.params.regression:
            updates.append(
                state_ops.scatter_update(
                    self.variables.candidate_split_squares, cleared_and_allocated_accumulators, split_values
                )
            )

        # Calculate values to put into scatter update for total counts.
        total_cleared = array_ops.tile(
            array_ops.expand_dims(math_ops.neg(array_ops.ones_like(accumulators_cleared, dtype=dtypes.float32)), 1),
            [1, self.params.num_output_columns],
        )
        total_reset = array_ops.tile(
            array_ops.expand_dims(array_ops.zeros_like(accumulators_allocated, dtype=dtypes.float32), 1),
            [1, self.params.num_output_columns],
        )
        accumulator_updates = array_ops.concat(0, [total_cleared, total_reset])
        updates.append(
            state_ops.scatter_update(
                self.variables.accumulator_sums, cleared_and_allocated_accumulators, accumulator_updates
            )
        )
        if self.params.regression:
            updates.append(
                state_ops.scatter_update(
                    self.variables.accumulator_squares, cleared_and_allocated_accumulators, accumulator_updates
                )
            )

        # Calculate values to put into scatter update for candidate splits.
        split_features_updates = array_ops.tile(
            array_ops.expand_dims(math_ops.neg(array_ops.ones_like(cleared_and_allocated_accumulators)), 1),
            [1, self.params.num_splits_to_consider],
        )
        updates.append(
            state_ops.scatter_update(
                self.variables.candidate_split_features, cleared_and_allocated_accumulators, split_features_updates
            )
        )

        updates += self.finish_iteration()

        return control_flow_ops.group(*updates)
def _filter_input(input_tensor, vocab_freq_table, vocab_min_count,
                  vocab_subsampling, corpus_size, seed):
    """Filters input tensor based on vocab freq, threshold, and subsampling."""
    if vocab_freq_table is None:
        return input_tensor

    if not isinstance(vocab_freq_table,
                      lookup_ops.InitializableLookupTableBase):
        raise ValueError(
            "vocab_freq_table must be a subclass of "
            "InitializableLookupTableBase (such as HashTable) instead of type "
            "{}.".format(type(vocab_freq_table)))

    with ops.name_scope(
            "filter_vocab",
            values=[vocab_freq_table, input_tensor, vocab_min_count]):
        freq = vocab_freq_table.lookup(input_tensor)
        # Filters out elements in input_tensor that are not found in
        # vocab_freq_table (table returns a default value of -1 specified above when
        # an element is not found).
        mask = math_ops.not_equal(freq, vocab_freq_table.default_value)

        # Filters out elements whose vocab frequencies are less than the threshold.
        if vocab_min_count is not None:
            cast_threshold = math_ops.cast(vocab_min_count, freq.dtype)
            mask = math_ops.logical_and(
                mask, math_ops.greater_equal(freq, cast_threshold))

        input_tensor = array_ops.boolean_mask(input_tensor, mask)
        freq = array_ops.boolean_mask(freq, mask)

    if not vocab_subsampling:
        return input_tensor

    if vocab_subsampling < 0 or vocab_subsampling > 1:
        raise ValueError(
            "Invalid vocab_subsampling={} - it should be within range [0, 1].".
            format(vocab_subsampling))

    # Subsamples the input tokens based on vocabulary frequency and
    # vocab_subsampling threshold (ie randomly discard commonly appearing
    # tokens).
    with ops.name_scope("subsample_vocab",
                        values=[input_tensor, freq, vocab_subsampling]):
        corpus_size = math_ops.cast(corpus_size, dtypes.float64)
        freq = math_ops.cast(freq, dtypes.float64)
        vocab_subsampling = math_ops.cast(vocab_subsampling, dtypes.float64)

        # From tensorflow_models/tutorials/embedding/word2vec_kernels.cc, which is
        # suppose to correlate with Eq. 5 in http://arxiv.org/abs/1310.4546.
        keep_prob = ((math_ops.sqrt(freq /
                                    (vocab_subsampling * corpus_size)) + 1.0) *
                     (vocab_subsampling * corpus_size / freq))
        random_prob = random_ops.random_uniform(array_ops.shape(freq),
                                                minval=0,
                                                maxval=1,
                                                dtype=dtypes.float64,
                                                seed=seed)

        mask = math_ops.less_equal(random_prob, keep_prob)
        return array_ops.boolean_mask(input_tensor, mask)
Exemple #46
0
  def training_graph(self,
                     input_data,
                     input_labels,
                     random_seed,
                     data_spec,
                     sparse_features=None,
                     input_weights=None):

    """Constructs a TF graph for training a random tree.

    Args:
      input_data: A tensor or placeholder for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      random_seed: The random number generator seed to use for this tree.  0
        means use the current time as the seed.
      data_spec: A data_ops.TensorForestDataSpec object specifying the
        original feature/columns of the data.
      sparse_features: A tf.SparseTensor for sparse input data.
      input_weights: A float tensor or placeholder holding per-input weights,
        or None if all inputs are to be weighted equally.

    Returns:
      The last op in the random tree training graph.
    """
    epoch = math_ops.to_int32(get_epoch_variable())

    serialized_input_spec = data_spec.SerializeToString()

    if input_weights is None:
      input_weights = []

    if input_data is None:
      input_data = []

    sparse_indices = []
    sparse_values = []
    sparse_shape = []
    if sparse_features is not None:
      sparse_indices = sparse_features.indices
      sparse_values = sparse_features.values
      sparse_shape = sparse_features.dense_shape

    # Count extremely random stats.
    (node_sums, node_squares, splits_indices, splits_sums, splits_squares,
     totals_indices, totals_sums, totals_squares,
     input_leaves) = (tensor_forest_ops.count_extremely_random_stats(
         input_data,
         sparse_indices,
         sparse_values,
         sparse_shape,
         input_labels,
         input_weights,
         self.variables.tree,
         self.variables.tree_thresholds,
         self.variables.node_to_accumulator_map,
         self.variables.candidate_split_features,
         self.variables.candidate_split_thresholds,
         self.variables.start_epoch,
         epoch,
         input_spec=serialized_input_spec,
         num_classes=self.params.num_output_columns,
         regression=self.params.regression))
    node_update_ops = []
    node_update_ops.append(
        state_ops.assign_add(self.variables.node_sums, node_sums))

    splits_update_ops = []
    splits_update_ops.append(
        tensor_forest_ops.scatter_add_ndim(self.variables.candidate_split_sums,
                                           splits_indices, splits_sums))
    splits_update_ops.append(
        tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_sums,
                                           totals_indices, totals_sums))

    if self.params.regression:
      node_update_ops.append(state_ops.assign_add(self.variables.node_squares,
                                                  node_squares))
      splits_update_ops.append(
          tensor_forest_ops.scatter_add_ndim(
              self.variables.candidate_split_squares, splits_indices,
              splits_squares))
      splits_update_ops.append(
          tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_squares,
                                             totals_indices, totals_squares))

    # Sample inputs.
    update_indices, feature_updates, threshold_updates = (
        tensor_forest_ops.sample_inputs(
            input_data,
            sparse_indices,
            sparse_values,
            sparse_shape,
            input_weights,
            self.variables.node_to_accumulator_map,
            input_leaves,
            self.variables.candidate_split_features,
            self.variables.candidate_split_thresholds,
            input_spec=serialized_input_spec,
            split_initializations_per_input=(
                self.params.split_initializations_per_input),
            split_sampling_random_seed=random_seed))
    update_features_op = state_ops.scatter_update(
        self.variables.candidate_split_features, update_indices,
        feature_updates)
    update_thresholds_op = state_ops.scatter_update(
        self.variables.candidate_split_thresholds, update_indices,
        threshold_updates)

    # Calculate finished nodes.
    with ops.control_dependencies(splits_update_ops):
      # Passing input_leaves to finished nodes here means that nodes that
      # have become stale won't be deallocated until an input reaches them,
      # because we're trying to avoid considering every fertile node for
      # performance reasons.
      finished, stale = tensor_forest_ops.finished_nodes(
          input_leaves,
          self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          self.variables.start_epoch,
          epoch,
          num_split_after_samples=self.params.split_after_samples,
          min_split_samples=self.params.min_split_samples,
          dominate_method=self.params.dominate_method,
          dominate_fraction=self.params.dominate_fraction)

    # Update leaf scores.
    # TODO(thomaswc): Store the leaf scores in a TopN and only update the
    # scores of the leaves that were touched by this batch of input.
    children = array_ops.squeeze(
        array_ops.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1])
    is_leaf = math_ops.equal(constants.LEAF_NODE, children)
    leaves = math_ops.to_int32(
        array_ops.squeeze(
            array_ops.where(is_leaf), squeeze_dims=[1]))
    non_fertile_leaves = array_ops.boolean_mask(
        leaves, math_ops.less(array_ops.gather(
            self.variables.node_to_accumulator_map, leaves), 0))

    # TODO(gilberth): It should be possible to limit the number of non
    # fertile leaves we calculate scores for, especially since we can only take
    # at most array_ops.shape(finished)[0] of them.
    with ops.control_dependencies(node_update_ops):
      sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves)
      if self.params.regression:
        squares = array_ops.gather(self.variables.node_squares,
                                   non_fertile_leaves)
        non_fertile_leaf_scores = self._variance(sums, squares)
      else:
        non_fertile_leaf_scores = self._weighted_gini(sums)

    # Calculate best splits.
    with ops.control_dependencies(splits_update_ops):
      split_indices = tensor_forest_ops.best_splits(
          finished,
          self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          regression=self.params.regression)

    # Grow tree.
    with ops.control_dependencies([update_features_op, update_thresholds_op,
                                   non_fertile_leaves.op]):
      (tree_update_indices, tree_children_updates, tree_threshold_updates,
       new_eot) = (tensor_forest_ops.grow_tree(
           self.variables.end_of_tree, self.variables.node_to_accumulator_map,
           finished, split_indices, self.variables.candidate_split_features,
           self.variables.candidate_split_thresholds))
      tree_update_op = state_ops.scatter_update(
          self.variables.tree, tree_update_indices, tree_children_updates)
      thresholds_update_op = state_ops.scatter_update(
          self.variables.tree_thresholds, tree_update_indices,
          tree_threshold_updates)
      # TODO(thomaswc): Only update the epoch on the new leaves.
      new_epoch_updates = epoch * array_ops.ones_like(tree_threshold_updates,
                                                      dtype=dtypes.int32)
      epoch_update_op = state_ops.scatter_update(
          self.variables.start_epoch, tree_update_indices,
          new_epoch_updates)

    # Update fertile slots.
    with ops.control_dependencies([tree_update_op]):
      (n2a_map_updates, a2n_map_updates, accumulators_cleared,
       accumulators_allocated) = (tensor_forest_ops.update_fertile_slots(
           finished,
           non_fertile_leaves,
           non_fertile_leaf_scores,
           self.variables.end_of_tree,
           self.variables.accumulator_sums,
           self.variables.node_to_accumulator_map,
           stale,
           self.variables.node_sums,
           regression=self.params.regression))

    # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has
    # used it to calculate new leaves.
    with ops.control_dependencies([n2a_map_updates.op]):
      eot_update_op = state_ops.assign(self.variables.end_of_tree, new_eot)

    updates = []
    updates.append(eot_update_op)
    updates.append(tree_update_op)
    updates.append(thresholds_update_op)
    updates.append(epoch_update_op)

    updates.append(
        state_ops.scatter_update(self.variables.node_to_accumulator_map,
                                 n2a_map_updates[0], n2a_map_updates[1]))

    updates.append(
        state_ops.scatter_update(self.variables.accumulator_to_node_map,
                                 a2n_map_updates[0], a2n_map_updates[1]))

    cleared_and_allocated_accumulators = array_ops.concat(
        [accumulators_cleared, accumulators_allocated], 0)

    # Calculate values to put into scatter update for candidate counts.
    # Candidate split counts are always reset back to 0 for both cleared
    # and allocated accumulators. This means some accumulators might be doubly
    # reset to 0 if the were released and not allocated, then later allocated.
    split_values = array_ops.tile(
        array_ops.expand_dims(array_ops.expand_dims(
            array_ops.zeros_like(cleared_and_allocated_accumulators,
                                 dtype=dtypes.float32), 1), 2),
        [1, self.params.num_splits_to_consider, self.params.num_output_columns])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_sums,
        cleared_and_allocated_accumulators, split_values))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.candidate_split_squares,
          cleared_and_allocated_accumulators, split_values))

    # Calculate values to put into scatter update for total counts.
    total_cleared = array_ops.tile(
        array_ops.expand_dims(
            math_ops.negative(array_ops.ones_like(accumulators_cleared,
                                                  dtype=dtypes.float32)), 1),
        [1, self.params.num_output_columns])
    total_reset = array_ops.tile(
        array_ops.expand_dims(
            array_ops.zeros_like(accumulators_allocated,
                                 dtype=dtypes.float32), 1),
        [1, self.params.num_output_columns])
    accumulator_updates = array_ops.concat([total_cleared, total_reset], 0)
    updates.append(state_ops.scatter_update(
        self.variables.accumulator_sums,
        cleared_and_allocated_accumulators, accumulator_updates))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.accumulator_squares,
          cleared_and_allocated_accumulators, accumulator_updates))

    # Calculate values to put into scatter update for candidate splits.
    split_features_updates = array_ops.tile(
        array_ops.expand_dims(
            math_ops.negative(array_ops.ones_like(
                cleared_and_allocated_accumulators)), 1),
        [1, self.params.num_splits_to_consider])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_features,
        cleared_and_allocated_accumulators, split_features_updates))

    updates += self.finish_iteration()

    return control_flow_ops.group(*updates)
Exemple #47
0
 def testMaskHasMoreDimsThanTensorRaises(self):
     mask = [[True, True], [False, False]]
     tensor = [1, 2, 3, 4]
     with self.test_session():
         with self.assertRaisesRegexp(ValueError, "incompatible"):
             array_ops.boolean_mask(tensor, mask).eval()
    def build_controller(self):
        """RL optimization interface.

    Returns:
      ops: A dictionary holding handles of the model used for training.
    """

        self._global_step = training_util.get_or_create_global_step()
        ops = {}
        ops["loss"] = 0

        failing_signal = self.compute_reward(self.hparams.failing_signal)

        ctr = {}

        with tf_ops.name_scope("controller_{}".format(self.ctrl_id)):
            with variable_scope.variable_scope("controller_{}".format(
                    self.ctrl_id)):
                ctr["reward"] = {"value": [], "ph": [], "update": []}
                ctr["ready"] = {"value": [], "ph": [], "update": []}
                ctr["best_reward"] = {"value": [], "update": []}
                for i in range(self.hparams.num_children):
                    reward_value = variable_scope.get_local_variable(
                        "reward_{}".format(i),
                        initializer=0.0,
                        dtype=dtypes.float32,
                        trainable=False)
                    reward_ph = array_ops.placeholder(
                        dtypes.float32,
                        shape=(),
                        name="reward_ph_{}".format(i))
                    reward_update = state_ops.assign(reward_value,
                                                     reward_ph,
                                                     use_locking=True)
                    ctr["reward"]["value"].append(reward_value)
                    ctr["reward"]["ph"].append(reward_ph)
                    ctr["reward"]["update"].append(reward_update)
                    best_reward = variable_scope.get_local_variable(
                        "best_reward_{}".format(i),
                        initializer=failing_signal,
                        dtype=dtypes.float32,
                        trainable=False)
                    ctr["best_reward"]["value"].append(best_reward)
                    ctr["best_reward"]["update"].append(
                        state_ops.assign(
                            best_reward,
                            math_ops.minimum(best_reward, reward_update)))

                    ready_value = variable_scope.get_local_variable(
                        "ready_{}".format(i),
                        initializer=True,
                        dtype=dtypes.bool,
                        trainable=False)
                    ready_ph = array_ops.placeholder(
                        dtypes.bool, shape=(), name="ready_ph_{}".format(i))
                    ready_update = state_ops.assign(ready_value,
                                                    ready_ph,
                                                    use_locking=True)
                    ctr["ready"]["value"].append(ready_value)
                    ctr["ready"]["ph"].append(ready_ph)
                    ctr["ready"]["update"].append(ready_update)

            ctr["grouping_y_preds"], ctr[
                "grouping_log_probs"] = self.get_groupings()
            summary.histogram(
                "grouping_actions",
                array_ops.slice(ctr["grouping_y_preds"]["sample"], [0, 0],
                                [1, array_ops.shape(self.op_embeddings)[0]]))

            with variable_scope.variable_scope("controller_{}".format(
                    self.ctrl_id)):
                ctr["baseline"] = variable_scope.get_local_variable(
                    "baseline",
                    initializer=failing_signal
                    if self.hparams.start_with_failing_signal else 0.0,
                    dtype=dtypes.float32,
                    trainable=False)

            new_baseline = self.hparams.bl_dec * ctr["baseline"] + (
                1 - self.hparams.bl_dec) * math_ops.reduce_mean(
                    ctr["reward"]["value"])
            if not self.hparams.always_update_baseline:
                baseline_mask = math_ops.less(ctr["reward"]["value"],
                                              failing_signal)
                selected_reward = array_ops.boolean_mask(
                    ctr["reward"]["value"], baseline_mask)
                selected_baseline = control_flow_ops.cond(
                    math_ops.reduce_any(baseline_mask),
                    lambda: math_ops.reduce_mean(selected_reward),
                    lambda: constant_op.constant(0, dtype=dtypes.float32))
                ctr["pos_reward"] = selected_baseline
                pos_ = math_ops.less(
                    constant_op.constant(0, dtype=dtypes.float32),
                    selected_baseline)
                selected_baseline = self.hparams.bl_dec * ctr["baseline"] + (
                    1 - self.hparams.bl_dec) * selected_baseline
                selected_baseline = control_flow_ops.cond(
                    pos_, lambda: selected_baseline, lambda: ctr["baseline"])
                new_baseline = control_flow_ops.cond(
                    math_ops.less(self.global_step,
                                  self.hparams.stop_updating_after_steps),
                    lambda: new_baseline, lambda: selected_baseline)
            ctr["baseline_update"] = state_ops.assign(ctr["baseline"],
                                                      new_baseline,
                                                      use_locking=True)

            ctr["y_preds"], ctr["log_probs"] = self.get_placements()
            summary.histogram("actions", ctr["y_preds"]["sample"])
            mask = math_ops.less(ctr["reward"]["value"], failing_signal)
            ctr["loss"] = ctr["reward"]["value"] - ctr["baseline"]
            ctr["loss"] *= (ctr["log_probs"]["sample"] +
                            ctr["grouping_log_probs"]["sample"])

            selected_loss = array_ops.boolean_mask(ctr["loss"], mask)
            selected_loss = control_flow_ops.cond(
                math_ops.reduce_any(mask),
                lambda: math_ops.reduce_mean(-selected_loss),
                lambda: constant_op.constant(0, dtype=dtypes.float32))

            ctr["loss"] = control_flow_ops.cond(
                math_ops.less(self.global_step,
                              self.hparams.stop_updating_after_steps),
                lambda: math_ops.reduce_mean(-ctr["loss"]),
                lambda: selected_loss)

            ctr["reward_s"] = math_ops.reduce_mean(ctr["reward"]["value"])
            summary.scalar("loss", ctr["loss"])
            summary.scalar("avg_reward", ctr["reward_s"])
            summary.scalar("best_reward_so_far", best_reward)
            summary.scalar(
                "advantage",
                math_ops.reduce_mean(ctr["reward"]["value"] - ctr["baseline"]))

        with variable_scope.variable_scope("optimizer",
                                           reuse=variable_scope.AUTO_REUSE):
            (ctr["train_op"], ctr["lr"], ctr["grad_norm"],
             ctr["grad_norms"]) = self._get_train_ops(
                 ctr["loss"],
                 tf_ops.get_collection(tf_ops.GraphKeys.TRAINABLE_VARIABLES),
                 self.global_step,
                 grad_bound=self.hparams.grad_bound,
                 lr_init=self.hparams.lr,
                 lr_dec=self.hparams.lr_dec,
                 start_decay_step=self.hparams.start_decay_step,
                 decay_steps=self.hparams.decay_steps,
                 optimizer_type=self.hparams.optimizer_type)

        summary.scalar("gradnorm", ctr["grad_norm"])
        summary.scalar("lr", ctr["lr"])
        ctr["summary"] = summary.merge_all()
        ops["controller"] = ctr

        self.ops = ops
        return ops
Exemple #49
0
 def testMaskShapeDifferentThanFirstPartOfTensorShapeRaises(self):
     mask = [True, True, True]
     tensor = [[1, 2], [3, 4]]
     with self.test_session():
         with self.assertRaisesRegexp(ValueError, "incompatible"):
             array_ops.boolean_mask(tensor, mask).eval()
Exemple #50
0
def repeat(data, repeats, axis, name=None):
    """Repeats elements of `data`.

  Args:
    data: An `N`-dimensional tensor.
    repeats: A 1-D integer tensor specifying how many times each element in
      `axis` should be repeated.  `len(repeats)` must equal `data.shape[axis]`.
      Supports broadcasting from a scalar value.
    axis: `int`.  The axis along which to repeat values.  Must be less than
      `max(N, 1)`.
    name: A name for the operation.

  Returns:
    A tensor with `max(N, 1)` dimensions.  Has the same shape as `data`,
    except that dimension `axis` has size `sum(repeats)`.

  #### Examples:
    ```python
    >>> repeat(['a', 'b', 'c'], repeats=[3, 0, 2], axis=0)
    ['a', 'a', 'a', 'c', 'c']
    >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=0)
    [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4]]
    >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=1)
    [[1, 1, 2, 2, 2], [3, 3, 4, 4, 4]]
    ```
  """
    if not isinstance(axis, int):
        raise TypeError("axis must be an int; got %s" % type(axis).__name__)

    with ops.name_scope(name, "Repeat", [data, repeats]):
        data = ops.convert_to_tensor(data, name="data")
        repeats = convert_to_int_tensor(repeats, name="repeats")
        repeats.shape.with_rank_at_most(1)

        # If `data` is a scalar, then upgrade it to a vector.
        data = _with_nonzero_rank(data)
        data_shape = array_ops.shape(data)

        # If `axis` is negative, then convert it to a positive value.
        axis = get_positive_axis(axis, data.shape.ndims)

        # Check data Tensor shapes.
        if repeats.shape.ndims == 1:
            data.shape.dims[axis].assert_is_compatible_with(repeats.shape[0])

        # If we know that `repeats` is a scalar, then we can just tile & reshape.
        if repeats.shape.ndims == 0:
            expanded = array_ops.expand_dims(data, axis + 1)
            tiled = tile_one_dimension(expanded, axis + 1, repeats)
            result_shape = array_ops.concat(
                [data_shape[:axis], [-1], data_shape[axis + 1:]], axis=0)
            return array_ops.reshape(tiled, result_shape)

        # Broadcast the `repeats` tensor so rank(repeats) == axis + 1.
        if repeats.shape.ndims != axis + 1:
            repeats_shape = array_ops.shape(repeats)
            repeats_ndims = array_ops.rank(repeats)
            broadcast_shape = array_ops.concat(
                [data_shape[:axis + 1 - repeats_ndims], repeats_shape], axis=0)
            repeats = array_ops.broadcast_to(repeats, broadcast_shape)
            repeats.set_shape([None] * (axis + 1))

        # Create a "sequence mask" based on `repeats`, where slices across `axis`
        # contain one `True` value for each repetition.  E.g., if
        # `repeats = [3, 1, 2]`, then `mask = [[1, 1, 1], [1, 0, 0], [1, 1, 0]]`.
        max_repeat = math_ops.maximum(0, math_ops.reduce_max(repeats))
        mask = array_ops.sequence_mask(repeats, max_repeat)

        # Add a new dimension around each value that needs to be repeated, and
        # then tile that new dimension to match the maximum number of repetitions.
        expanded = array_ops.expand_dims(data, axis + 1)
        tiled = tile_one_dimension(expanded, axis + 1, max_repeat)

        # Use `boolean_mask` to discard the extra repeated values.  This also
        # flattens all dimensions up through `axis`.
        masked = array_ops.boolean_mask(tiled, mask)

        # Reshape the output tensor to add the outer dimensions back.
        if axis == 0:
            result = masked
        else:
            result_shape = array_ops.concat(
                [data_shape[:axis], [-1], data_shape[axis + 1:]], axis=0)
            result = array_ops.reshape(masked, result_shape)

        # Preserve shape information.
        if data.shape.ndims is not None:
            new_axis_size = 0 if repeats.shape[0] == 0 else None
            result.set_shape(data.shape[:axis].concatenate(
                [new_axis_size]).concatenate(data.shape[axis + 1:]))

        return result
    def _reshape_helper(self, x, event_shape_in, event_shape_out):
        """Reshape only the event_shape of an input `Tensor`."""

        event_ndims_in_ = _static_ndims_from_shape(event_shape_in)
        event_ndims_in = _ndims_from_shape(event_shape_in)
        x_ndims_, x_ndims = x.shape.ndims, array_ops.rank(x)

        assertions = []

        # Ensure x.event_shape is compatible with event_shape_in.
        if (event_ndims_in_ is not None and x_ndims_ is not None
                and x.shape.with_rank_at_least(event_ndims_in_)
            [x_ndims_ - event_ndims_in_:].is_fully_defined()):
            x_event_shape_, x_event_shape = [  # pylint: disable=unbalanced-tuple-unpacking
                np.int32(x.shape[x_ndims_ - event_ndims_in_:])
            ] * 2
        else:
            x_event_shape_, x_event_shape = (
                None, array_ops.shape(x)[x_ndims - event_ndims_in:])

        event_shape_in_ = tensor_util.constant_value(event_shape_in)

        if x_event_shape_ is not None and event_shape_in_ is not None:
            # Compare the shape dimensions that are fully specified in the
            # input (i.e., for which event_shape_in is not -1). If x_event_shape
            # matches along all of these dimensions, it is compatible with
            # the desired input shape and any further mismatches (i.e.,
            # imcompatibility with the desired *output* shape) will be
            # caught inside of array_ops.reshape() below.
            x_event_shape_specified_ = x_event_shape_[event_shape_in_ >= 0]
            event_shape_in_specified_ = event_shape_in_[event_shape_in_ >= 0]
            if not np.equal(x_event_shape_specified_,
                            event_shape_in_specified_).all():
                raise ValueError(
                    "Input `event_shape` does not match `event_shape_in` ({} vs {})."
                    .format(x_event_shape_, event_shape_in_))
        elif self.validate_args:
            # Similarly to the static case, we compare the shape dimensions
            # that are fully specified in the input. We extract these
            # dimensions using boolean_mask(), which requires that the mask
            # have known ndims. We can assume that shape Tensors always have
            # ndims==1 (this assumption is verified inside of
            # _maybe_check_valid_shape), so the reshape operation is just a
            # no-op that formally encodes this fact to make boolean_mask()
            # happy.
            event_shape_mask = array_ops.reshape(event_shape_in >= 0, [-1])
            x_event_shape_specified = array_ops.boolean_mask(
                x_event_shape, event_shape_mask)
            event_shape_in_specified = array_ops.boolean_mask(
                event_shape_in, event_shape_mask)
            assertions.append(
                check_ops.assert_equal(
                    x_event_shape_specified,
                    event_shape_in_specified,
                    message=
                    "Input `event_shape` does not match `event_shape_in`."))

        if assertions:
            x = control_flow_ops.with_dependencies(assertions, x)

        # get the parts of shape(x) that will not change
        sample_and_batch_shape = array_ops.shape(x)

        ndims = (x.shape.ndims
                 if x.shape.ndims is not None else array_ops.rank(x))
        sample_and_batch_shape = sample_and_batch_shape[:(
            ndims - math_ops.abs(event_ndims_in))]

        if (event_ndims_in_ is not None and x_ndims_ is not None
                and event_ndims_in_ == x_ndims_):
            # Hack to allow forward/inverse_event_shape to do shape
            # inference by calling this helper method with a dummy Tensor of
            # shape event_shape_in. In this special case,
            # sample_and_batch_shape will be empty so we can preserve static
            # shape information by avoiding the concat operation below
            # (which would be a no-op).
            new_shape = event_shape_out
        else:
            new_shape = array_ops.concat(
                [sample_and_batch_shape, event_shape_out], axis=0)

        return array_ops.reshape(x, new_shape)
def from_tensor(tensor, lengths=None, padding=None, ragged_rank=1, name=None):
    """Converts a `Tensor` into a `RaggedTensor`.

  The set of absent/default values may be specified using a vector of lengths
  or a padding value (but not both).  If `lengths` is specified, then the
  output tensor will satisfy `output[row] = tensor[row][:lengths[row]]`.
  If `padding` is specified, then any row *suffix* consisting entirely of
  `padding` will be excluded from the returned `RaggedTensor`.  If neither
  `lengths` nor `padding` is specified, then the returned `RaggedTensor` will
  have no absent/default values.

  Examples:

  ```python
  >>> dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
  >>> ragged.from_tensor(dt).eval().tolist()
  [[5, 7, 0], [0, 3, 0], [6, 0, 0]]
  >>> ragged.from_tensor(dt, lengths=[2, 0, 3]).eval().tolist()
  [[5, 7], [], [6, 0, 0]]
  >>> ragged.from_tensor(dt, padding=0).eval().tolist()
  [[5, 7], [0, 3], [6]]
  ```

  Args:
    tensor: The `Tensor` to convert.  Must have rank `ragged_rank + 1` or
      higher.
    lengths: An optional set of row lengths, specified using a 1-D integer
      `Tensor` whose length is equal to `tensor.shape[0]` (the number of rows in
      `tensor`).  If specified, then `output[row]` will contain
      `tensor[row][:lengths[row]]`.  Negative lengths are treated as zero.
    padding: An optional padding value.  If specified, then any row suffix
      consisting entirely of `padding` will be excluded from the returned
      RaggedTensor.  `padding` is a `Tensor` with the same dtype as `tensor`
      and with `shape=tensor.shape[ragged_rank + 1:]`.
    ragged_rank: Integer specifying the ragged rank for the returned
      `RaggedTensor`.  Must be greater than zero.
    name: A name prefix for the returned tensors (optional).

  Returns:
    A `RaggedTensor` with the specified `ragged_rank`.  The shape of the
    returned ragged tensor is compatible with the shape of `tensor`.
  Raises:
    ValueError: If both `lengths` and `padding` are specified.
  """
    if lengths is not None and padding is not None:
        raise ValueError('Specify lengths or padding, but not both')
    if not isinstance(ragged_rank, int):
        raise TypeError('ragged_rank expected int, got %r' % ragged_rank)
    if ragged_rank <= 0:
        raise ValueError('ragged_rank must be greater than 0; got %s' %
                         ragged_rank)

    with ops.name_scope(name, 'RaggedFromTensor', [tensor, lengths, padding]):
        tensor = ops.convert_to_tensor(tensor, name='tensor')
        tensor.shape.with_rank_at_least(ragged_rank + 1)
        input_shape = array_ops.shape(tensor, out_type=dtypes.int64)
        ncols = input_shape[1]

        # Handle ragged_rank>1 via recursion:
        # If the output should have multiple ragged dimensions, then first
        # flatten the tensor to eliminate all but the last ragged dimension,
        # and recursively convert that flattened tensor.  Then add on the splits
        # for the dimensions that we flattened out.
        if ragged_rank > 1:
            # Flatten `tensor` to eliminate all but the last ragged dimension.
            new_shape = array_ops.concat([
                constant_op.constant([-1], dtypes.int64),
                input_shape[ragged_rank:]
            ],
                                         axis=0)
            flattened = array_ops.reshape(tensor, new_shape)
            # Recursively convert the flattened tensor.
            values = from_tensor(flattened, lengths, padding)
            # The total number of elements in each  dimension.  E.g., if
            # input_shape=[3, 4, 5, 6], then dim[2] has 3*4*5 elements in total.
            dim_size = math_ops.cumprod(input_shape)
            # Construct splits tensors for the dimensions that were flattened.
            new_splits = [
                math_ops.range(0, dim_size[dim - 1] + 1) * input_shape[dim]
                for dim in range(1, ragged_rank)
            ]
            return ragged_factory_ops.from_nested_row_splits(
                values, new_splits)

        # If padding was specified, then use it to find row lengths.
        if padding is not None:
            padding = ops.convert_to_tensor(padding,
                                            name='padding',
                                            dtype=tensor.dtype)
            padding.shape.assert_is_compatible_with(tensor.shape[2:])

            # Find places where the padding is equal to the tensor.  (This will
            # broadcast `padding` across the outermost 2 dimensions of `tensor`,
            # so `has_default_value.shape = tensor.shape`.)
            has_default_value = math_ops.equal(padding, tensor)

            # If the padding isn't a scalar, then require that all values in the
            # padding match each item in the tensor.  After this block of code,
            # `has_default.shape = tensor.shape[:2]`.  (Unfortunately, we can't just
            # use reduce_all for both cases, becaue when you pass an empty `axis`
            # list to reduce_all, it reduces all axes; but we want it to reduce no
            # axes -- i.e., to be a no-op.)
            tensor_rank = array_ops.rank(tensor)
            reduce_axis = math_ops.range(2, tensor_rank)
            has_default = control_flow_ops.cond(
                tensor_rank > 2, lambda: math_ops.reduce_all(has_default_value,
                                                             axis=reduce_axis),
                lambda: has_default_value)
            has_default.set_shape(tensor_shape.TensorShape([None, None]))
            has_default.set_shape(tensor.shape[:2])

            # Use has_default it to find the length of each row: for each non-default
            # item in a row, calculate the length that the row needs to have to
            # include that item; and then take the max of those values (across each
            # row).
            has_nondefault = math_ops.logical_not(has_default)
            has_nondefault = math_ops.cast(has_nondefault, dtypes.int64)
            length_for_nondefault_value = (
                has_nondefault *
                array_ops.expand_dims(math_ops.range(1, ncols + 1), 0))
            lengths = math_ops.reduce_max(length_for_nondefault_value, axis=1)

        # If we have lengths (either directly supplied, or computed from paddings),
        # then use those to construct splits; and then use masking to get the
        # corresponding values.
        if lengths is not None:
            lengths = ragged_util.convert_to_int_tensor(
                lengths, 'lengths', dtypes.int64)
            lengths.shape.assert_has_rank(1)
            lengths = math_ops.minimum(lengths, ncols)
            lengths = math_ops.maximum(lengths, 0)
            limits = math_ops.cumsum(lengths)
            splits = array_ops.concat(
                [array_ops.zeros([1], dtypes.int64), limits], axis=0)
            mask = array_ops.sequence_mask(lengths, maxlen=ncols)
            values = array_ops.boolean_mask(tensor, mask)
            return ragged_factory_ops.from_row_splits(values, splits)

        # If neither padding nor lengths were specified, then create a splits
        # vector that contains no default values, and reshape the input tensor
        # to form the values for the RaggedTensor.
        nrows = input_shape[0]
        nvals = nrows * ncols
        splits = math_ops.range(nrows + 1) * ncols
        values_shape = array_ops.concat([[nvals], input_shape[2:]], axis=0)
        values = array_ops.reshape(tensor, values_shape)
        return ragged_factory_ops.from_row_splits(values, splits)
Exemple #53
0
  def _reshape_helper(self, x, event_shape_in, event_shape_out):
    """Reshape only the event_shape of an input `Tensor`."""

    event_ndims_in_ = _static_ndims_from_shape(event_shape_in)
    event_ndims_in = _ndims_from_shape(event_shape_in)
    x_ndims_, x_ndims = x.shape.ndims, array_ops.rank(x)

    assertions = []

    # Ensure x.event_shape is compatible with event_shape_in.
    if (event_ndims_in_ is not None
        and x_ndims_ is not None
        and x.shape.with_rank_at_least(event_ndims_in_)[
            x_ndims_-event_ndims_in_:].is_fully_defined()):
      x_event_shape_, x_event_shape = [  # pylint: disable=unbalanced-tuple-unpacking
          np.int32(x.shape[x_ndims_-event_ndims_in_:])]*2
    else:
      x_event_shape_, x_event_shape = (
          None, array_ops.shape(x)[x_ndims-event_ndims_in:])

    event_shape_in_ = tensor_util.constant_value(event_shape_in)

    if x_event_shape_ is not None and event_shape_in_ is not None:
      # Compare the shape dimensions that are fully specified in the
      # input (i.e., for which event_shape_in is not -1). If x_event_shape
      # matches along all of these dimensions, it is compatible with
      # the desired input shape and any further mismatches (i.e.,
      # imcompatibility with the desired *output* shape) will be
      # caught inside of array_ops.reshape() below.
      x_event_shape_specified_ = x_event_shape_[event_shape_in_ >= 0]
      event_shape_in_specified_ = event_shape_in_[event_shape_in_ >= 0]
      if not np.equal(x_event_shape_specified_,
                      event_shape_in_specified_).all():
        raise ValueError(
            "Input `event_shape` does not match `event_shape_in` ({} vs {}).".
            format(x_event_shape_, event_shape_in_))
    elif self.validate_args:
      # Similarly to the static case, we compare the shape dimensions
      # that are fully specified in the input. We extract these
      # dimensions using boolean_mask(), which requires that the mask
      # have known ndims. We can assume that shape Tensors always have
      # ndims==1 (this assumption is verified inside of
      # _maybe_check_valid_shape), so the reshape operation is just a
      # no-op that formally encodes this fact to make boolean_mask()
      # happy.
      event_shape_mask = array_ops.reshape(event_shape_in >= 0, [-1])
      x_event_shape_specified = array_ops.boolean_mask(x_event_shape,
                                                       event_shape_mask)
      event_shape_in_specified = array_ops.boolean_mask(event_shape_in,
                                                        event_shape_mask)
      assertions.append(check_ops.assert_equal(
          x_event_shape_specified, event_shape_in_specified,
          message="Input `event_shape` does not match `event_shape_in`."))

    if assertions:
      x = control_flow_ops.with_dependencies(assertions, x)

    # get the parts of shape(x) that will not change
    sample_and_batch_shape = array_ops.shape(x)

    ndims = (x.shape.ndims if x.shape.ndims is not None
             else array_ops.rank(x))
    sample_and_batch_shape = sample_and_batch_shape[
        :(ndims - math_ops.abs(event_ndims_in))]

    if (event_ndims_in_ is not None
        and x_ndims_ is not None
        and event_ndims_in_ == x_ndims_):
      # Hack to allow forward/inverse_event_shape to do shape
      # inference by calling this helper method with a dummy Tensor of
      # shape event_shape_in. In this special case,
      # sample_and_batch_shape will be empty so we can preserve static
      # shape information by avoiding the concat operation below
      # (which would be a no-op).
      new_shape = event_shape_out
    else:
      new_shape = array_ops.concat(
          [sample_and_batch_shape, event_shape_out], axis=0)

    return array_ops.reshape(x, new_shape)
    def __getitem__(self, slice_spec):
        """Extracts the specified region as a Tensor from the sharded variable.

    The API contract is identical to `Tensor.__getitem__`. Assignment to the
    sliced range is not yet supported.

    Args:
      slice_spec: The arguments to __getitem__, specifying the global slicing of
        the sharded variable.

    Returns:
      The appropriate slice of tensor based on `slice_spec`.

    Raises:
      IndexError: If a slice index is out of bound.
      TypeError: If `spec_spec` contains Tensor.
    """

        # TODO(b/177482728): Support tensor input.
        # TODO(b/177482728): Support slice assign, similar to variable slice assign.

        if (isinstance(slice_spec, bool)
                or (isinstance(slice_spec, ops.Tensor)
                    and slice_spec.dtype == dtypes.bool) or
            (isinstance(slice_spec, np.ndarray) and slice_spec.dtype == bool)):
            tensor = _var_to_tensor(self)
            return array_ops.boolean_mask(tensor=tensor, mask=slice_spec)

        if not isinstance(slice_spec, (list, tuple)):
            slice_spec = (slice_spec, )

        s = slice_spec[0]
        if isinstance(s, slice):
            first_dim_slice_specs = self._decompose_slice_spec(s)
            values = []
            for i, var in enumerate(self._variables):
                if first_dim_slice_specs[i] is not None:
                    all_dim_slice_spec = (
                        first_dim_slice_specs[i], ) + slice_spec[1:]
                    values.append(var[all_dim_slice_spec])
            if s.step is not None and s.step < 0:
                values.reverse()
            if not values:
                return constant_op.constant([],
                                            dtype=self._dtype,
                                            shape=((0, ) + self._shape[1:]))
            return array_ops.concat(values, axis=0)
        elif s is Ellipsis:
            return array_ops.concat(
                [var[slice_spec] for var in self._variables], axis=0)
        elif s is array_ops.newaxis:
            return array_ops.concat(
                [var[slice_spec[1:]] for var in self._variables],
                axis=0)[array_ops.newaxis]
        else:
            if isinstance(s, ops.Tensor):
                raise TypeError(
                    'ShardedVariable: using Tensor for indexing is not allowed.'
                )
            if s < 0:
                s += self._shape[0]
            if s < 0 or s >= self._shape[0]:
                raise IndexError(
                    f'ShardedVariable: slice index {s} of dimension 0 out of bounds.'
                )
            for i in range(len(self._variables)):
                if i == len(self._variables) - 1 or (
                        s > self._var_offsets[i][0]
                        and s < self._var_offsets[i + 1][0]):
                    return self._variables[i][(s - self._var_offsets[i][0], ) +
                                              slice_spec[1:]]