def testNegativeBinomialSample(self):
    with self.cached_session() as sess:
      probs = [.3, .9]
      total_count = [4., 11.]
      n = int(100e3)
      negbinom = negative_binomial.NegativeBinomial(
          total_count=total_count, probs=probs)

      samples = negbinom.sample(n, seed=12345)
      self.assertEqual([n, 2], samples.get_shape())

      sample_mean = math_ops.reduce_mean(samples, axis=0)
      sample_var = math_ops.reduce_mean(
          (samples - sample_mean[array_ops.newaxis, ...])**2., axis=0)
      sample_min = math_ops.reduce_min(samples)
      [sample_mean_, sample_var_, sample_min_] = sess.run([
          sample_mean, sample_var, sample_min])
      self.assertAllEqual(np.ones(sample_min_.shape, dtype=np.bool),
                          sample_min_ >= 0.0)
      for i in range(2):
        self.assertAllClose(sample_mean_[i],
                            stats.nbinom.mean(total_count[i], 1 - probs[i]),
                            atol=0.,
                            rtol=.02)
        self.assertAllClose(sample_var_[i],
                            stats.nbinom.var(total_count[i], 1 - probs[i]),
                            atol=0.,
                            rtol=.02)
Beispiel #2
0
 def grow_tree_from_stats_summaries(stats_summary_list):
   """Updates ensemble based on the best gains from stats summaries."""
   (node_ids_per_feature, gains_list, thresholds_list,
    left_node_contribs_list, right_node_contribs_list) = (
        boosted_trees_ops.calculate_best_gains_per_feature(
            node_id_range=array_ops.stack([
                math_ops.reduce_min(node_ids),
                math_ops.reduce_max(node_ids)
            ]),
            stats_summary_list=stats_summary_list,
            l1=tree_hparams.l1,
            l2=tree_hparams.l2,
            tree_complexity=tree_hparams.tree_complexity,
            max_splits=max_splits))
   grow_op = boosted_trees_ops.update_ensemble(
       # Confirm if local_tree_ensemble or tree_ensemble should be used.
       tree_ensemble.resource_handle,
       feature_ids=math_ops.range(0, num_features, dtype=dtypes.int32),
       node_ids=node_ids_per_feature,
       gains=gains_list,
       thresholds=thresholds_list,
       left_node_contribs=left_node_contribs_list,
       right_node_contribs=right_node_contribs_list,
       learning_rate=tree_hparams.learning_rate,
       max_depth=tree_hparams.max_depth,
       pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
   return grow_op
 def testGradient4(self):
   s = [2, 3, 4, 2]
   x = np.arange(1.0, 49.0).reshape(s).astype(np.float64)
   with self.test_session():
     t = ops.convert_to_tensor(x)
     su = math_ops.reduce_min(t)
     jacob_t, jacob_n = gradient_checker.compute_gradient(
         t, s, su, [1], x_init_value=x, delta=1)
   self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
Beispiel #4
0
def my_rnn(alphabetEnc, cell, inputs, initial_state=None, dtype=None,
        sequence_length=None, scope=None):

  if not isinstance(cell, rnn_cell.RNNCell):
    raise TypeError("cell must be an instance of RNNCell")
  if not isinstance(inputs, list):
    raise TypeError("inputs must be a list")
  if not inputs:
    raise ValueError("inputs must not be empty")

  outputs = []
  with vs.variable_scope(scope or "RNN"):
    fixed_batch_size = inputs[0].get_shape().with_rank_at_least(1)[0]
    if fixed_batch_size.value:
      batch_size = fixed_batch_size.value
    else:
      batch_size = array_ops.shape(inputs[0])[0]
    if initial_state is not None:
      state = initial_state
    else:
      if not dtype:
        raise ValueError("If no initial_state is provided, dtype must be.")
      state = cell.zero_state(batch_size, dtype)

    if sequence_length is not None:
      sequence_length = math_ops.to_int32(sequence_length)

    if sequence_length:  # Prepare variables
      zero_output = array_ops.zeros(
          array_ops.pack([batch_size, cell.output_size]), inputs[0].dtype)
      zero_output.set_shape(
          tensor_shape.TensorShape([fixed_batch_size.value, cell.output_size]))


      min_sequence_length = math_ops.reduce_min(sequence_length)
      max_sequence_length = math_ops.reduce_max(sequence_length)


    for time, input_ in enumerate(inputs):
      if time > 0: vs.get_variable_scope().reuse_variables()
      # pylint: disable=cell-var-from-loop
      call_cell = lambda: cell([ input_ , alphabetEnc[time] ], state)
      # pylint: enable=cell-var-from-loop
      if sequence_length:

        (output, state) = _rnn_step(
            time, sequence_length, min_sequence_length, max_sequence_length,
            zero_output, state, call_cell)
      else:

        (output, state) = call_cell()

      outputs.append(output)

    return (outputs, state)
Beispiel #5
0
  def initialize_graph(self, features, update_statistics=True):
    """Create any ops needed to provide input statistics.

    Should be called before statistics are requested.

    Args:
      features: A dictionary, the output of a `TimeSeriesInputFn` (with keys
          TrainEvalFeatures.TIMES and TrainEvalFeatures.VALUES).
      update_statistics: Whether `features` should be used to update adaptive
          statistics. Typically True for training and false for evaluation.
    Returns:
      An InputStatistics object composed of Variables, which will be updated
      based on mini-batches of data if requested.
    """
    if (TrainEvalFeatures.TIMES in features
        and TrainEvalFeatures.VALUES in features):
      times = features[TrainEvalFeatures.TIMES]
      values = features[TrainEvalFeatures.VALUES]
    else:
      # times and values may not be available, for example during prediction. We
      # still need to retrieve our variables so that they can be read from, even
      # if we're not going to update them.
      times = None
      values = None
    # Create/retrieve variables representing input statistics, initialized
    # without data to avoid deadlocking if variables are initialized before
    # queue runners are started.
    with variable_scope.variable_scope("input_statistics", use_resource=True):
      statistics = self._create_variable_statistics_object()
    with variable_scope.variable_scope(
        "input_statistics_auxiliary", use_resource=True):
      # Secondary statistics, necessary for the incremental computation of the
      # primary statistics (e.g. counts and sums for computing a mean
      # incrementally).
      auxiliary_variables = self._AdaptiveInputAuxiliaryStatistics(
          num_features=self._num_features, dtype=self._dtype)
    if update_statistics and times is not None and values is not None:
      # If we have times and values from mini-batch input, create update ops to
      # take the new data into account.
      assign_op = self._update_statistics_from_mini_batch(
          statistics, auxiliary_variables, times, values)
      with ops.control_dependencies([assign_op]):
        stat_variables = nest.pack_sequence_as(statistics, [
            array_ops.identity(tensor) for tensor in nest.flatten(statistics)
        ])
        # Since start time updates have a race condition, ensure that the
        # reported start time is at least as low as the lowest time in this
        # mini-batch. The start time should converge on the correct value
        # eventually even with the race condition, but for example state space
        # models have an assertion which could fail without this
        # post-processing.
        return stat_variables._replace(start_time=gen_math_ops.minimum(
            stat_variables.start_time, math_ops.reduce_min(times)))
    else:
      return statistics
 def testLargeFeed(self):
   server = self._cached_server
   with session.Session(server.target, config=self._useRPCConfig()) as sess:
     feed_val = np.empty([10000, 3000], dtype=np.float32)
     feed_val.fill(0.5)
     p = array_ops.placeholder(dtypes.float32, shape=[10000, 3000])
     min_t = math_ops.reduce_min(p)
     max_t = math_ops.reduce_max(p)
     min_val, max_val = sess.run([min_t, max_t], feed_dict={p: feed_val})
     self.assertEqual(0.5, min_val)
     self.assertEqual(0.5, max_val)
  def _ones_diag(self):
    """Returns the diagonal of this operator as all ones."""
    if self.shape.is_fully_defined():
      d_shape = self.batch_shape.concatenate(
          [min(self.domain_dimension.value, self.range_dimension.value)])
    else:
      d_shape = array_ops.concat(
          [self.batch_shape_tensor(),
           [math_ops.reduce_min(self.shape_tensor()[-2:])]], axis=0)

    return array_ops.ones(shape=d_shape, dtype=self.dtype)
Beispiel #8
0
 def refresh_shortlist():
   """Update the shortlist with the highest scores in id_to_score."""
   new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size)
   smallest_new_score = math_ops.reduce_min(new_scores)
   new_length = math_ops.reduce_sum(
       math_ops.to_int32(math_ops.greater(new_scores, dtypes.float32.min)))
   u1 = self.sl_ids.assign(
       math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0)))
   u2 = self.sl_scores.assign(
       array_ops.concat([[smallest_new_score], new_scores], 0))
   self.last_ops = [u1, u2]
   return control_flow_ops.group(u1, u2)
Beispiel #9
0
    def element_to_bucket_id(*args):
      """Return int64 id of the length bucket for this element."""
      seq_length = element_length_func(*args)

      boundaries = list(bucket_boundaries)
      buckets_min = [np.iinfo(np.int32).min] + boundaries
      buckets_max = boundaries + [np.iinfo(np.int32).max]
      conditions_c = math_ops.logical_and(
          math_ops.less_equal(buckets_min, seq_length),
          math_ops.less(seq_length, buckets_max))
      bucket_id = math_ops.reduce_min(array_ops.where(conditions_c))

      return bucket_id
 def func_body(iteration, gt_cluster_score):
   """Per each cluster, compute the average travel distance."""
   mask = math_ops.equal(labels, unique_class_ids[iteration])
   this_cluster_ids = array_ops.where(mask)
   pairwise_distances_subset = array_ops.transpose(
       array_ops.gather(
           array_ops.transpose(
               array_ops.gather(pairwise_distances, this_cluster_ids)),
           this_cluster_ids))
   this_cluster_score = -1.0 * math_ops.reduce_min(
       math_ops.reduce_sum(
           pairwise_distances_subset, axis=0))
   return iteration + 1, gt_cluster_score + this_cluster_score
def compute_facility_energy(pairwise_distances, centroid_ids):
  """Compute the average travel distance to the assigned centroid.

  Args:
    pairwise_distances: 2-D Tensor of pairwise distances.
    centroid_ids: 1-D Tensor of indices.

  Returns:
    facility_energy: dtypes.float32 scalar.
  """
  return -1.0 * math_ops.reduce_sum(
      math_ops.reduce_min(
          array_ops.gather(pairwise_distances, centroid_ids), axis=0))
 def _compare(self, x, reduction_axes, keep_dims, use_gpu=False):
   np_ans = x
   if reduction_axes is None:
     np_ans = np.amin(np_ans, keepdims=keep_dims)
   else:
     for ra in reduction_axes[::-1]:
       np_ans = np.amin(np_ans, axis=ra, keepdims=keep_dims)
   with self.test_session(use_gpu=use_gpu):
     if reduction_axes is not None:
       reduction_axes = np.array(reduction_axes).astype(np.int32)
     tf_ans = math_ops.reduce_min(x, reduction_axes, keep_dims)
     out = tf_ans.eval()
   self.assertAllClose(np_ans, out)
   self.assertShapeEqual(np_ans, tf_ans)
Beispiel #13
0
  def grow_tree(self, stats_summaries_list, feature_ids_list,
                last_layer_nodes_range):
    # For not in memory situation, we need to accumulate enough of batches first
    # before proceeding with building a tree layer.
    max_splits = _get_max_splits(self._tree_hparams)

    # Prepare accumulators.
    accumulators = []
    dependencies = []
    for i, feature_ids in enumerate(feature_ids_list):
      stats_summaries = stats_summaries_list[i]
      accumulator = data_flow_ops.ConditionalAccumulator(
          dtype=dtypes.float32,
          # The stats consist of grads and hessians (the last dimension).
          shape=[len(feature_ids), max_splits, self._bucket_size_list[i], 2],
          shared_name='numeric_stats_summary_accumulator_' + str(i))
      accumulators.append(accumulator)

      apply_grad = accumulator.apply_grad(
          array_ops.stack(stats_summaries, axis=0), self._stamp_token)
      dependencies.append(apply_grad)

    # Grow the tree if enough batches is accumulated.
    with ops.control_dependencies(dependencies):
      if not self._is_chief:
        return control_flow_ops.no_op()

      min_accumulated = math_ops.reduce_min(
          array_ops.stack([acc.num_accumulated() for acc in accumulators]))

      def grow_tree_from_accumulated_summaries_fn():
        """Updates tree with the best layer from accumulated summaries."""
        # Take out the accumulated summaries from the accumulator and grow.
        stats_summaries_list = []
        stats_summaries_list = [
            array_ops.unstack(accumulator.take_grad(1), axis=0)
            for accumulator in accumulators
        ]
        grow_op = self._grow_tree_from_stats_summaries(
            stats_summaries_list, feature_ids_list, last_layer_nodes_range)
        return grow_op

      grow_model = control_flow_ops.cond(
          math_ops.greater_equal(min_accumulated, self._n_batches_per_layer),
          grow_tree_from_accumulated_summaries_fn,
          control_flow_ops.no_op,
          name='wait_until_n_batches_accumulated')
      return grow_model
 def _assert_non_singular(self):
   """Private default implementation of _assert_non_singular."""
   logging.warn(
       "Using (possibly slow) default implementation of assert_non_singular."
       "  Requires conversion to a dense matrix and O(N^3) operations.")
   if self._can_use_cholesky():
     return self.assert_positive_definite()
   else:
     singular_values = linalg_ops.svd(self.to_dense(), compute_uv=False)
     # TODO(langmore) Add .eig and .cond as methods.
     cond = (math_ops.reduce_max(singular_values, axis=-1) /
             math_ops.reduce_min(singular_values, axis=-1))
     return check_ops.assert_less(
         cond,
         self._max_condition_number_to_be_non_singular(),
         message="Singular matrix up to precision epsilon.")
Beispiel #15
0
        def grow_not_in_mem():
          """Accumulates the data and grows a layer when ready."""

          accumulators = []
          dependencies = []
          for i, feature_ids in enumerate(feature_ids_list):
            stats_summaries = stats_summaries_list[i]
            accumulator = data_flow_ops.ConditionalAccumulator(
                dtype=dtypes.float32,
                # The stats consist of grads and hessians (the last dimension).
                shape=[len(feature_ids), max_splits, bucket_size_list[i], 2],
                shared_name='numeric_stats_summary_accumulator_' + str(i))
            accumulators.append(accumulator)

            apply_grad = accumulator.apply_grad(
                array_ops.stack(stats_summaries, axis=0), stamp_token)
            dependencies.append(apply_grad)

          def grow_tree_from_accumulated_summaries_fn():
            """Updates tree with the best layer from accumulated summaries."""
            # Take out the accumulated summaries from the accumulator and grow.
            stats_summaries_list = []

            stats_summaries_list = [
                array_ops.unstack(accumulator.take_grad(1), axis=0)
                for accumulator in accumulators
            ]

            grow_op = grow_tree_from_stats_summaries(stats_summaries_list,
                                                     feature_ids_list)
            return grow_op

          with ops.control_dependencies(dependencies):
            if config.is_chief:
              min_accumulated = math_ops.reduce_min(
                  array_ops.stack(
                      [acc.num_accumulated() for acc in accumulators]))

              grow_model = control_flow_ops.cond(
                  math_ops.greater_equal(min_accumulated, n_batches_per_layer),
                  grow_tree_from_accumulated_summaries_fn,
                  control_flow_ops.no_op,
                  name='wait_until_n_batches_accumulated')

              return grow_model
            else:
              return control_flow_ops.no_op()
Beispiel #16
0
def masked_maximum(data, mask, dim=1):
    """Computes the axis wise maximum over chosen elements.

  Args:
    data: 2-D float `Tensor` of size [n, m].
    mask: 2-D Boolean `Tensor` of size [n, m].
    dim: The dimension over which to compute the maximum.

  Returns:
    masked_maximums: N-D `Tensor`.
      The maximized dimension is of size 1 after the operation.
  """
    axis_minimums = math_ops.reduce_min(data, dim, keepdims=True)
    masked_maximums = math_ops.reduce_max(
        math_ops.multiply(data - axis_minimums,
                          mask), dim, keepdims=True) + axis_minimums
    return masked_maximums
def masked_minimum(data, mask, dim=1):
  """Computes the axis wise minimum over chosen elements.

  Args:
    data: 2-D float `Tensor` of size [n, m].
    mask: 2-D Boolean `Tensor` of size [n, m].
    dim: The dimension over which to compute the minimum.

  Returns:
    masked_minimums: N-D `Tensor`.
      The minimized dimension is of size 1 after the operation.
  """
  axis_maximums = math_ops.reduce_max(data, dim, keepdims=True)
  masked_minimums = math_ops.reduce_min(
      math_ops.multiply(data - axis_maximums, mask), dim,
      keepdims=True) + axis_maximums
  return masked_minimums
Beispiel #18
0
 def _assert_non_singular(self):
   """Private default implementation of _assert_non_singular."""
   logging.warn(
       "Using (possibly slow) default implementation of assert_non_singular."
       "  Requires conversion to a dense matrix and O(N^3) operations.")
   if self._can_use_cholesky():
     return self.assert_positive_definite()
   else:
     singular_values = linalg_ops.svd(
         self._get_cached_dense_matrix(), compute_uv=False)
     # TODO(langmore) Add .eig and .cond as methods.
     cond = (math_ops.reduce_max(singular_values, axis=-1) /
             math_ops.reduce_min(singular_values, axis=-1))
     return check_ops.assert_less(
         cond,
         self._max_condition_number_to_be_non_singular(),
         message="Singular matrix up to precision epsilon.")
        def grow_not_in_mem():
          """Accumulates the data and grows a layer when ready."""

          accumulators = []
          dependencies = []
          for i, feature_ids in enumerate(feature_ids_list):
            stats_summaries = stats_summaries_list[i]
            accumulator = data_flow_ops.ConditionalAccumulator(
                dtype=dtypes.float32,
                # The stats consist of grads and hessians (the last dimension).
                shape=[len(feature_ids), max_splits, bucket_size_list[i], 2],
                shared_name='numeric_stats_summary_accumulator_' + str(i))
            accumulators.append(accumulator)

            apply_grad = accumulator.apply_grad(
                array_ops.stack(stats_summaries, axis=0), stamp_token)
            dependencies.append(apply_grad)

          def grow_tree_from_accumulated_summaries_fn():
            """Updates tree with the best layer from accumulated summaries."""
            # Take out the accumulated summaries from the accumulator and grow.
            stats_summaries_list = []

            stats_summaries_list = [
                array_ops.unstack(accumulator.take_grad(1), axis=0)
                for accumulator in accumulators
            ]

            grow_op = grow_tree_from_stats_summaries(stats_summaries_list,
                                                     feature_ids_list)
            return grow_op

          with ops.control_dependencies(dependencies):
            if config.is_chief:
              min_accumulated = math_ops.reduce_min(
                  array_ops.stack(
                      [acc.num_accumulated() for acc in accumulators]))

              grow_model = control_flow_ops.cond(
                  math_ops.greater_equal(min_accumulated, n_batches_per_layer),
                  grow_tree_from_accumulated_summaries_fn,
                  control_flow_ops.no_op,
                  name='wait_until_n_batches_accumulated')

              return grow_model
Beispiel #20
0
    def test_cond(self):
        with self.test_session(graph=ops.Graph()) as sess:
            # svd does not work with zero dimensional matrices, so we'll
            # skip
            if 0 in shapes_info.shape[-2:]:
                return

            # ROCm platform does not yet support complex types
            if test.is_built_with_rocm() and \
               ((dtype == dtypes.complex64) or (dtype == dtypes.complex128)):
                return

            sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED
            # Ensure self-adjoint and PD so we get finite condition numbers.
            operator, mat = self.operator_and_matrix(
                shapes_info,
                dtype,
                use_placeholder=use_placeholder,
                ensure_self_adjoint_and_pd=True)
            # Eigenvalues are real, so we'll cast these to float64 and sort
            # for comparison.
            op_cond = operator.cond()
            s = math_ops.abs(linalg_ops.svd(mat, compute_uv=False))
            mat_cond = math_ops.reduce_max(s, axis=-1) / math_ops.reduce_min(
                s, axis=-1)
            op_cond_v, mat_cond_v = sess.run([op_cond, mat_cond])

            atol_override = {
                dtypes.float16: 1e-2,
                dtypes.float32: 1e-3,
                dtypes.float64: 1e-6,
                dtypes.complex64: 1e-3,
                dtypes.complex128: 1e-6,
            }
            rtol_override = {
                dtypes.float16: 1e-2,
                dtypes.float32: 1e-3,
                dtypes.float64: 1e-4,
                dtypes.complex64: 1e-3,
                dtypes.complex128: 1e-6,
            }
            atol = atol_override[dtype]
            rtol = rtol_override[dtype]
            self.assertAllClose(op_cond_v, mat_cond_v, atol=atol, rtol=rtol)
def _minimum_mean(samples, envelope, low, name=None):
  """Returns a stochastic lower bound on the mean of a scalar distribution.

  The idea is that if the true CDF is within an `eps`-envelope of the
  empirical CDF of the samples, and the support is bounded below, then
  the mean is bounded below as well.  In symbols,

  ```none
  sup_x(|F_n(x) - F(x)|) < eps
  ```

  The 0th dimension of `samples` is interpreted as independent and
  identically distributed samples.  The remaining dimensions are
  broadcast together with `envelope` and `low`, and operated on
  separately.

  Args:
    samples: Floating-point tensor of samples from the distribution(s)
      of interest.  Entries are assumed IID across the 0th dimension.
      The other dimensions must broadcast with `envelope` and `low`.
    envelope: Floating-point tensor of sizes of admissible CDF
      envelopes (i.e., the `eps` above).
    low: Floating-point tensor of lower bounds on the distributions'
      supports.
    name: A name for this operation (optional).

  Returns:
    bound: Floating-point tensor of lower bounds on the true means.

  Raises:
    InvalidArgumentError: If some `sample` is found to be smaller than
      the corresponding `low`.
  """
  with ops.name_scope(name, "minimum_mean", [samples, envelope, low]):
    samples = ops.convert_to_tensor(samples, name="samples")
    envelope = ops.convert_to_tensor(envelope, name="envelope")
    low = ops.convert_to_tensor(low, name="low")

    xmin = math_ops.reduce_min(samples, axis=[-1])
    msg = "Given sample minimum value falls below expectations"
    check_op = check_ops.assert_greater_equal(xmin, low, message=msg)
    with ops.control_dependencies([check_op]):
      return - _do_maximum_mean(-samples, envelope, -low)
def _MatrixSetDiagGrad(op, grad):
    input_shape = op.inputs[0].get_shape().merge_with(grad.get_shape())
    diag_shape = op.inputs[1].get_shape()
    batch_shape = input_shape[:-2].merge_with(diag_shape[:-1])
    matrix_shape = input_shape[-2:]
    if batch_shape.is_fully_defined() and matrix_shape.is_fully_defined():
        diag_shape = batch_shape.as_list() + [min(matrix_shape.as_list())]
    else:
        with ops.colocate_with(grad):
            grad_shape = array_ops.shape(grad)
            grad_rank = array_ops.rank(grad)
            batch_shape = array_ops.slice(grad_shape, [0], [grad_rank - 2])
            matrix_shape = array_ops.slice(grad_shape, [grad_rank - 2], [2])
            min_dim = math_ops.reduce_min(matrix_shape)
            diag_shape = array_ops.concat([batch_shape, [min_dim]], 0)
    grad_input = array_ops.matrix_set_diag(
        grad, array_ops.zeros(diag_shape, dtype=grad.dtype))
    grad_diag = array_ops.matrix_diag_part(grad)
    return (grad_input, grad_diag)
def _minimum_mean(samples, envelope, low, name=None):
    """Returns a stochastic lower bound on the mean of a scalar distribution.

  The idea is that if the true CDF is within an `eps`-envelope of the
  empirical CDF of the samples, and the support is bounded below, then
  the mean is bounded below as well.  In symbols,

  ```none
  sup_x(|F_n(x) - F(x)|) < eps
  ```

  The 0th dimension of `samples` is interpreted as independent and
  identically distributed samples.  The remaining dimensions are
  broadcast together with `envelope` and `low`, and operated on
  separately.

  Args:
    samples: Floating-point tensor of samples from the distribution(s)
      of interest.  Entries are assumed IID across the 0th dimension.
      The other dimensions must broadcast with `envelope` and `low`.
    envelope: Floating-point tensor of sizes of admissible CDF
      envelopes (i.e., the `eps` above).
    low: Floating-point tensor of lower bounds on the distributions'
      supports.
    name: A name for this operation (optional).

  Returns:
    bound: Floating-point tensor of lower bounds on the true means.

  Raises:
    InvalidArgumentError: If some `sample` is found to be smaller than
      the corresponding `low`.
  """
    with ops.name_scope(name, "minimum_mean", [samples, envelope, low]):
        samples = ops.convert_to_tensor(samples, name="samples")
        envelope = ops.convert_to_tensor(envelope, name="envelope")
        low = ops.convert_to_tensor(low, name="low")

        xmin = math_ops.reduce_min(samples, axis=[-1])
        msg = "Given sample minimum value falls below expectations"
        check_op = check_ops.assert_greater_equal(xmin, low, message=msg)
        with ops.control_dependencies([check_op]):
            return -_do_maximum_mean(-samples, envelope, -low)
Beispiel #24
0
    def histogram(self, x, value_range=None, nbins=None, name=None):
        """Return histogram of values.

    Given the tensor `values`, this operation returns a rank 1 histogram
    counting the number of entries in `values` that fell into every bin. The
    bins are equal width and determined by the arguments `value_range` and
    `nbins`.

    Args:
      x: 1D numeric `Tensor` of items to count.
      value_range:  Shape [2] `Tensor`. `new_values <= value_range[0]` will be
        mapped to `hist[0]`, `values >= value_range[1]` will be mapped to
        `hist[-1]`. Must be same dtype as `x`.
      nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
      name: Python `str` name prefixed to Ops created by this class.

    Returns:
      counts: 1D `Tensor` of counts, i.e.,
        `counts[i] = sum{ edges[i-1] <= values[j] < edges[i] : j }`.
      edges: 1D `Tensor` characterizing intervals used for counting.
    """
        with ops.name_scope(name, "histogram", [x]):
            x = ops.convert_to_tensor(x, name="x")
            if value_range is None:
                value_range = [
                    math_ops.reduce_min(x), 1 + math_ops.reduce_max(x)
                ]
            value_range = ops.convert_to_tensor(value_range,
                                                name="value_range")
            lo = value_range[0]
            hi = value_range[1]
            if nbins is None:
                nbins = math_ops.to_int32(hi - lo)
            delta = (hi - lo) / math_ops.cast(
                nbins, dtype=value_range.dtype.base_dtype)
            edges = math_ops.range(start=lo,
                                   limit=hi,
                                   delta=delta,
                                   dtype=x.dtype.base_dtype)
            counts = histogram_ops.histogram_fixed_width(
                x, value_range=value_range, nbins=nbins)
            return counts, edges
Beispiel #25
0
def _rolling_window_indices(size, rw_size, num_valid_entries):
    """Returns the rolling windows indices and mask for valid ones.

  When size = 3, rw_size = 2, returns [[0, 1], [1, 2], [2, 0]]. When size = 2,
  rw_size = 3, returns [[0, 1, 0], [1, 0, 1]].

  When num_valid_entries = 2, the first returns [[0, 1], [1, 0], [0, 1]] and the
  first 2 are valid with mask as [True, True, False].

  Args:
    size: A scalar int `Tensor` for the size.
    rw_size: A scalr int `Tensor` for the rw_size.
    num_valid_entries: A 1-D `Tensor` with shape [batch_size] representing the
      number of valid entries for each instance in a batch.

  Returns:
    A tuple of Tensors (batch_rw_indices, batch_indices_mask). The first has
    shape [batch_size, size, rw_size] and the second has shape [batch_size,
    size].
  """
    with ops.name_scope(None, 'rolling_window_indices',
                        (size, rw_size, num_valid_entries)):
        # shape = [size, rw_size] with value [[0, 1, ...], [1, 2, ...], ...].
        rw_indices = array_ops.expand_dims(math_ops.range(rw_size),
                                           0) + array_ops.expand_dims(
                                               math_ops.range(size), 1)
        # shape = [batch_size, size, rw_size]. Make batch_size copies.
        batch_rw_indices = array_ops.gather(
            array_ops.expand_dims(rw_indices, 0),
            array_ops.zeros_like(num_valid_entries),
            axis=0)
        # Mark the first n indices as valid where n = num_valid_entries.
        batch_indices_mask = math_ops.less(
            math_ops.reduce_min(batch_rw_indices, axis=2),
            array_ops.reshape(num_valid_entries, [-1, 1]))
        # Mod the indices to the range of num_valid_entries.
        num_valid_entries = array_ops.where(
            math_ops.less(num_valid_entries, 1),
            array_ops.ones_like(num_valid_entries), num_valid_entries)
        batch_rw_indices = math_ops.mod(
            batch_rw_indices, array_ops.reshape(num_valid_entries, [-1, 1, 1]))
        return batch_rw_indices, batch_indices_mask
Beispiel #26
0
def _MatrixSetDiagGrad(op, grad):
  input_shape = op.inputs[0].get_shape().merge_with(grad.get_shape())
  diag_shape = op.inputs[1].get_shape()
  batch_shape = input_shape[:-2].merge_with(diag_shape[:-1])
  matrix_shape = input_shape[-2:]
  if batch_shape.is_fully_defined() and matrix_shape.is_fully_defined():
    diag_shape = batch_shape.as_list() + [min(matrix_shape.as_list())]
  else:
    with ops.colocate_with(grad):
      grad_shape = array_ops.shape(grad)
      grad_rank = array_ops.rank(grad)
      batch_shape = array_ops.slice(grad_shape, [0], [grad_rank - 2])
      matrix_shape = array_ops.slice(grad_shape, [grad_rank - 2], [2])
      min_dim = math_ops.reduce_min(matrix_shape)
      diag_shape = array_ops.concat([batch_shape, [min_dim]], 0)
  grad_input = array_ops.matrix_set_diag(
      grad, array_ops.zeros(
          diag_shape, dtype=grad.dtype))
  grad_diag = array_ops.matrix_diag_part(grad)
  return (grad_input, grad_diag)
Beispiel #27
0
def normalize_for_graph_lstm(tensor):
    """Normalizes Tensor to range [-0.5, 0.5].

    Scales a Tensor uniformly to fit within [-0.5, 0.5]^n. Additionally,
      each dimension is shifted to be centred around [0]^n i.e. the origin,
      in a way that data extends the same distance in positive and negative
      direction. In other words, the mean between maximum and minimum value
      of each dimension is shifted to zero. The undo_scaling op undoes
      scaling, but does not undo shifting. The unnormalize op does both,
      but is currently unused.

    Returns: The normalized Tensor, and an op to undo normalization.

    Example usage:
    ```
    normalized_tensor, undo_scaling = normalize_for_graph_lstm(input_tensor)
    normalized_output_tensor = some_op(normalized_tensor)
    output_tensor = undo_scaling(normalized_output_tensor)
    ```
    """
    # tensor is normalized to range[-0.5, 0.5]
    # this function assumes tensors with shape [ batch_size, number_of_nodes, output_size ]
    assert (len(tensor.shape) == 3)
    # compute maximum and minimum joint position value in each dimension
    max_dim = math_ops.reduce_max(tensor, axis=1, keepdims=True)
    min_dim = math_ops.reduce_min(tensor, axis=1, keepdims=True)
    diff_dim = math_ops.subtract(max_dim, min_dim)
    # get normalizing factor as maximum difference within all dimensions
    max_diff = math_ops.reduce_max(diff_dim, axis=2, keepdims=True)
    normalized_tensor = math_ops.divide(tensor - min_dim - diff_dim / 2,
                                        max_diff)

    # return output rescaled and shifted to original position
    def unnormalize(tensor):
        return math_ops.multiply(tensor, max_diff) + diff_dim / 2 + min_dim

    # return output only rescaled, centered around 0
    def undo_scaling(tensor):
        return math_ops.multiply(tensor, max_diff)

    return normalized_tensor, undo_scaling
Beispiel #28
0
  def histogram(self, x, value_range=None, nbins=None, name=None):
    """Return histogram of values.

    Given the tensor `values`, this operation returns a rank 1 histogram
    counting the number of entries in `values` that fell into every bin. The
    bins are equal width and determined by the arguments `value_range` and
    `nbins`.

    Args:
      x: 1D numeric `Tensor` of items to count.
      value_range:  Shape [2] `Tensor`. `new_values <= value_range[0]` will be
        mapped to `hist[0]`, `values >= value_range[1]` will be mapped to
        `hist[-1]`. Must be same dtype as `x`.
      nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
      name: Python `str` name prefixed to Ops created by this class.

    Returns:
      counts: 1D `Tensor` of counts, i.e.,
        `counts[i] = sum{ edges[i-1] <= values[j] < edges[i] : j }`.
      edges: 1D `Tensor` characterizing intervals used for counting.
    """
    with ops.name_scope(name, "histogram", [x]):
      x = ops.convert_to_tensor(x, name="x")
      if value_range is None:
        value_range = [math_ops.reduce_min(x), 1 + math_ops.reduce_max(x)]
      value_range = ops.convert_to_tensor(value_range, name="value_range")
      lo = value_range[0]
      hi = value_range[1]
      if nbins is None:
        nbins = math_ops.cast(hi - lo, dtypes.int32)
      delta = (hi - lo) / math_ops.cast(
          nbins, dtype=value_range.dtype.base_dtype)
      edges = math_ops.range(
          start=lo, limit=hi, delta=delta, dtype=x.dtype.base_dtype)
      counts = histogram_ops.histogram_fixed_width(
          x, value_range=value_range, nbins=nbins)
      return counts, edges
Beispiel #29
0
def _prepare_and_validate_params(labels, predictions, weights=None, topn=None):
    """Prepares and validates the parameters.

    Args:
      labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
        relevant example.
      predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
        the ranking score of the corresponding example.
      weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
        former case is per-example and the latter case is per-list.
      topn: A cutoff for how many examples to consider for this metric.

    Returns:
      (labels, predictions, weights, topn) ready to be used for metric
      calculation.
    """
    labels = ops.convert_to_tensor(labels)
    predictions = ops.convert_to_tensor(predictions)
    weights = 1.0 if weights is None else ops.convert_to_tensor(weights)
    example_weights = array_ops.ones_like(labels) * weights
    predictions.get_shape().assert_is_compatible_with(example_weights.get_shape())
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions.get_shape().assert_has_rank(2)
    if topn is None:
        topn = array_ops.shape(predictions)[1]

    # All labels should be >= 0. Invalid entries are reset.
    is_label_valid = utils.is_label_valid(labels)
    labels = array_ops.where(
        is_label_valid,
        labels,
        array_ops.zeros_like(labels))
    predictions = array_ops.where(
        is_label_valid, predictions,
        -1e-6 * array_ops.ones_like(predictions) + math_ops.reduce_min(
            predictions, axis=1, keepdims=True))
    return labels, predictions, example_weights, topn
Beispiel #30
0
def hardest_negative_dist(labels, embeddings, margin=1.2, squared=False):
    """Build the triplet loss over a batch of embeddings.
    For each anchor, we get the hardest positive and hardest negative to form a triplet.
    Args:
        labels: labels of the batch, of size (batch_size,)
        embeddings: tensor of shape (batch_size, embed_dim)
        margin: margin for triplet loss
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.
    Returns:
        triplet_loss: scalar tensor containing the triplet loss
    """
    # Get the pairwise distance matrix
    pairwise_dist = _pairwise_distances(embeddings, squared=squared)

    mask_anchor_negative = _get_anchor_negative_triplet_mask(labels)
    mask_anchor_negative = tf.to_float(mask_anchor_negative)

    maximum = math_ops.reduce_max(pairwise_dist, keepdims=True)
    result = math_ops.reduce_min(math_ops.multiply(pairwise_dist - maximum,
                                                   mask_anchor_negative),
                                 keepdims=True) + maximum

    return result
Beispiel #31
0
    def do_filter(self, estimated_state, estimated_state_covariance,
                  predicted_observation, predicted_observation_covariance,
                  observation, observation_model, observation_noise):
        """Convenience function for scoring predictions.

    Scores a prediction against an observation, and computes the updated
    posterior over states.

    Shapes given below for arguments are for single-model Kalman filtering
    (e.g. KalmanFilter). For ensembles, prior_state and prior_state_var are
    same-length tuples of values corresponding to each model.

    Args:
      estimated_state: A prior mean over states [batch size x state dimension]
      estimated_state_covariance: Covariance of state prior [batch size x D x
          D], with D depending on the Kalman filter implementation (typically
          the state dimension).
      predicted_observation: A prediction for the observed value, such as that
          returned by observed_from_state. A [batch size x num features] Tensor.
      predicted_observation_covariance: A covariance matrix corresponding to
          `predicted_observation`, a [batch size x num features x num features]
          Tensor.
      observation: The observed value corresponding to the predictions
          given [batch size x observation dimension]
      observation_model: The [batch size x observation dimension x model state
          dimension] Tensor indicating how a particular state is mapped to
          (pre-noise) observations for each part of the batch.
      observation_noise: A [batch size x observation dimension x observation
          dimension] Tensor or [observation dimension x observation dimension]
          Tensor with covariance matrices to use for each part of the batch (a
          two-dimensional input will be broadcast).
    Returns:
      posterior_state, posterior_state_var: Posterior mean and
          covariance, updated versions of prior_state and
          prior_state_var.
      log_prediction_prob: Log probability of the observations under
          the priors, suitable for optimization (should be maximized).

    """
        symmetrized_observation_covariance = 0.5 * (
            predicted_observation_covariance +
            array_ops.matrix_transpose(predicted_observation_covariance))
        instability_message = (
            "This may occur due to numerically unstable filtering when there is "
            "a large difference in posterior variances, or when inferences are "
            "near-deterministic. Considering tuning the "
            "'filtering_maximum_posterior_variance_ratio' or "
            "'filtering_minimum_posterior_variance' parameters in your "
            "StateSpaceModelConfiguration, or tuning the transition matrix.")
        symmetrized_observation_covariance = numerics.verify_tensor_all_finite(
            symmetrized_observation_covariance,
            "Predicted observation covariance was not finite. {}".format(
                instability_message))
        diag = array_ops.matrix_diag_part(symmetrized_observation_covariance)
        min_diag = math_ops.reduce_min(diag)
        non_negative_assert = control_flow_ops.Assert(
            min_diag >= 0.,
            [("The predicted observation covariance "
              "has a negative diagonal entry. {}").format(instability_message),
             min_diag])
        with ops.control_dependencies([non_negative_assert]):
            observation_covariance_cholesky = linalg_ops.cholesky(
                symmetrized_observation_covariance)
        log_prediction_prob = math_utils.mvn_tril_log_prob(
            loc=predicted_observation,
            scale_tril=observation_covariance_cholesky,
            x=observation)
        (posterior_state,
         posterior_state_var) = self.posterior_from_prior_state(
             prior_state=estimated_state,
             prior_state_var=estimated_state_covariance,
             observation=observation,
             observation_model=observation_model,
             predicted_observations=(predicted_observation,
                                     predicted_observation_covariance),
             observation_noise=observation_noise)
        return (posterior_state, posterior_state_var, log_prediction_prob)
Beispiel #32
0
def LastValueQuantize(inputs,
                      per_channel=False,
                      init_min=-6.0,
                      init_max=6.0,
                      vars_collection=None,
                      name_prefix='LastValueQuant',
                      reuse=None,
                      is_training=True,
                      num_bits=8,
                      narrow_range=False,
                      symmetric=False):
  """Adds a layer that collects quantization ranges as last input ranges.

  LastValueQuantize creates variables called 'min' and 'max', representing the
  interval used for quantization and clamping.

  Args:
    inputs: a tensor containing values to be quantized.
    per_channel: (Optional) a boolean specifying whether to use different
      quantization ranges per output channel.
    init_min: a float scalar, the initial value for variable min.
    init_max: a float scalar, the initial value for variable max.
    vars_collection: (Optional) collection where to store variables for
      quantization interval ends.
    name_prefix: name_prefix for created nodes.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    is_training: Whether the op is applied to a training or eval graph.
    num_bits: Number of bits to use for quantization, must be between 2 and 8.
    narrow_range: Whether to use the narrow quantization range
      [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
    symmetric: If true, use symmetric quantization limits instead of training
      the minimum and maximum of each quantization range separately.
  Returns:
    a tensor containing quantized values.
  """
  with variable_scope.variable_scope(
      None, default_name=name_prefix, values=[inputs], reuse=reuse) as scope:
    scope.set_partitioner(None)
    input_shape = inputs.get_shape()
    input_dim = len(input_shape)
    if per_channel:
      # Only support quantizing 1-, 2- and 4-dimensional tensors.
      assert input_dim in [1, 2, 4], ('Expected 1D, 2D or 4D input, was: %s in '
                                      ' scope: %s' % (input_shape, name_prefix))
      min_max_shape = [input_shape[-1]]
    else:
      min_max_shape = []

    vars_collections = [vars_collection] if vars_collection else []
    min_var = _ModelVariable(
        'min',
        shape=min_max_shape,
        initializer=init_ops.constant_initializer(init_min),
        collections=vars_collections,
        trainable=False)
    max_var = _ModelVariable(
        'max',
        shape=min_max_shape,
        initializer=init_ops.constant_initializer(init_max),
        collections=vars_collections,
        trainable=False)
    if not is_training:
      return _FakeQuantWithMinMaxVars(
          inputs,
          min_var,
          max_var,
          per_channel=per_channel,
          num_bits=num_bits,
          narrow_range=narrow_range)

    if per_channel:
      if input_dim == 2:
        reduce_dims = [0]
      elif input_dim == 4:
        reduce_dims = [0, 1, 2]

    if per_channel:
      if input_dim >= 2:
        batch_min = math_ops.reduce_min(
            inputs, reduction_indices=reduce_dims, name='BatchMin')
      else:
        batch_min = inputs
    else:
      batch_min = math_ops.reduce_min(inputs, name='BatchMin')

    if per_channel:
      if input_dim >= 2:
        batch_max = math_ops.reduce_max(
            inputs, reduction_indices=reduce_dims, name='BatchMax')
      else:
        batch_max = inputs
    else:
      batch_max = math_ops.reduce_max(inputs, name='BatchMax')

    if symmetric:
      if narrow_range:
        min_max_ratio = -1
      else:
        # In two's complement notation, the negative range is slightly larger
        # than the positive range.
        min_max_ratio = -((1 << num_bits) - 2) / (1 << num_bits)

      # TFLite requires that 0.0 if always in the [min; max] range. Because
      # batch_min <= batch_max, it follows that range_min <= 0 <= range_max.
      range_min = math_ops.minimum(batch_min, batch_max / min_max_ratio)
      range_max = math_ops.maximum(batch_max, batch_min * min_max_ratio)
    else:
      # TFLite requires that 0.0 if always in the [min; max] range.
      range_min = math_ops.minimum(batch_min, 0.0)
      range_max = math_ops.maximum(batch_max, 0.0)

    assign_min = state_ops.assign(min_var, range_min, name='AssignMinLast')
    assign_max = state_ops.assign(max_var, range_max, name='AssignMaxLast')

    return _FakeQuantWithMinMaxVars(
        inputs,
        assign_min,
        assign_max,
        per_channel=per_channel,
        num_bits=num_bits,
        narrow_range=narrow_range)
def bucket_by_sequence_length(input_length,
                              tensors,
                              batch_size,
                              bucket_boundaries,
                              num_threads=1,
                              capacity=32,
                              shapes=None,
                              dynamic_pad=False,
                              allow_smaller_final_batch=False,
                              keep_input=None,
                              shared_name=None,
                              name=None):
    """Lazy bucketing of inputs according to their length.

  This method calls `tf.contrib.training.bucket` under the hood, after first
  subdividing the bucket boundaries into separate buckets and identifying which
  bucket the given `input_length` belongs to.  See the documentation for
  `which_bucket` for details of the other arguments.

  Args:
    input_length: `int32` scalar `Tensor`, the sequence length of tensors.
    tensors: The list or dictionary of tensors, representing a single element,
      to bucket.  Nested lists are not supported.
    batch_size: The new batch size pulled from the queue
      (python int or int32 scalar).
    bucket_boundaries: int list, increasing non-negative numbers.
      The edges of the buckets to use when bucketing tensors.  Two extra buckets
      are created, one for `input_length < bucket_boundaries[0]` and
      one for `input_length >= bucket_boundaries[-1]`.
    num_threads: An integer.  The number of threads enqueuing `tensors`.
    capacity: An integer. The maximum number of minibatches in the top queue,
      and also the maximum number of elements within each bucket.
    shapes: (Optional) The shapes for each example.  Defaults to the
      inferred shapes for `tensors`.
    dynamic_pad: Boolean.  Allow variable dimensions in input shapes.
      The given dimensions are padded upon dequeue so that tensors within a
      batch have the same shapes.
    allow_smaller_final_batch: (Optional) Boolean. If `True`, allow the final
      batches to be smaller if there are insufficient items left in the queues.
    keep_input: (Optional).  A `bool` scalar Tensor.  If provided, this tensor
      controls whether the input is added to the queue or not.  If it evaluates
      `True`, then `tensors` are added to the bucket; otherwise they are
      dropped.  This tensor essentially acts as a filtering mechanism.
      The default behavior is to assume `keep_input=True`.
    shared_name: (Optional). If set, the queues will be shared under the given
      name across multiple sessions.
    name: (Optional) A name for the operations.

  Returns:
    A tuple `(sequence_length, outputs)` where `sequence_length` is
    a 1-D `Tensor` of size `batch_size` and `outputs` is a list or dictionary
    of batched, bucketed, outputs corresponding to elements of `tensors`.

  Raises:
    TypeError: if `bucket_boundaries` is not a list of python integers.
    ValueError: if `bucket_boundaries` is empty or contains non-increasing
      values.
  """
    tensor_list = _as_tensor_list(tensors)
    if not isinstance(bucket_boundaries, (list, tuple)):
        raise TypeError(
            "bucket_boundaries must be a list or tuple, but received: %s" %
            bucket_boundaries)
    if not bucket_boundaries:
        raise ValueError("bucket_boundaries must not be empty")
    for (s, e) in zip(bucket_boundaries[:-1], bucket_boundaries[1:]):
        if not isinstance(s, int) or not isinstance(e, int):
            raise TypeError(
                "bucket boundaries must be integers, but saw: %s and %s" %
                (s, e))
        if s >= e:
            raise ValueError(
                "Buckets must contain sequential increasing lengths, but saw: "
                "%d before %d" % (s, e))

    with ops.name_scope(name, "bucket_by_sequence_length",
                        [input_length] + tensor_list) as name:
        input_length = ops.convert_to_tensor(input_length,
                                             dtype=dtypes.int32,
                                             name="input_length")
        # Bucketing conditions are:
        #   l < b[0]
        #   b[0] <= l < b[1]
        #   b[1] <= l < b[2]
        #   ...
        #   b[N-2] <= l < b[N-1]
        #   b[N-1] <= l
        # Equivalent to:
        #   [-inf, b[0], b[1], ..., b[N-1]] <= l < [b[0], b[1], ..., b[N-1], inf]
        buckets_min = [np.iinfo(np.int32).min] + list(bucket_boundaries)
        buckets_max = list(bucket_boundaries) + [np.iinfo(np.int32).max]
        conditions_c = math_ops.logical_and(
            math_ops.less_equal(buckets_min, input_length),
            math_ops.less(input_length, buckets_max))
        which_bucket = math_ops.reduce_min(array_ops.where(conditions_c))
        which_bucket = math_ops.to_int32(which_bucket)

        if shapes is not None:
            shapes = [tensor_shape.scalar()] + shapes

        _, dequeued = bucket(
            tensors=[input_length] + tensor_list,
            which_bucket=which_bucket,
            batch_size=batch_size,
            num_buckets=len(bucket_boundaries) + 1,
            num_threads=num_threads,
            capacity=capacity,
            shapes=shapes,
            dynamic_pad=dynamic_pad,
            allow_smaller_final_batch=allow_smaller_final_batch,
            keep_input=keep_input,
            shared_name=shared_name)

        return (dequeued[0], _as_original_type(tensors, dequeued[1:]))
Beispiel #34
0
def _dynamic_rnn_loop(cell,
                      inputs,
                      initial_state,
                      parallel_iterations,
                      swap_memory,
                      sequence_length=None,
                      dtype=None):
    """Internal implementation of Dynamic RNN.

  Args:
    cell: An instance of RNNCell.
    inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested
      tuple of such elements.
    initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if
      `cell.state_size` is a tuple, then this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell.state_size`.
    parallel_iterations: Positive Python int.
    swap_memory: A Python boolean
    sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].
    dtype: (optional) Expected dtype of output. If not specified, inferred from
      initial_state.

  Returns:
    Tuple `(final_outputs, final_state)`.
    final_outputs:
      A `Tensor` of shape `[time, batch_size, cell.output_size]`.  If
      `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape`
      objects, then this returns a (possibly nsted) tuple of Tensors matching
      the corresponding shapes.
    final_state:
      A `Tensor`, or possibly nested tuple of Tensors, matching in length
      and shapes to `initial_state`.

  Raises:
    ValueError: If the input depth cannot be inferred via shape inference
      from the inputs.
  """
    state = initial_state
    assert isinstance(parallel_iterations,
                      int), "parallel_iterations must be int"

    state_size = cell.state_size

    flat_input = nest.flatten(inputs)
    flat_output_size = nest.flatten(cell.output_size)

    # Construct an initial output
    input_shape = array_ops.shape(flat_input[0])
    time_steps = input_shape[0]
    batch_size = input_shape[1]

    inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3)
                             for input_ in flat_input)

    const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2]

    for shape in inputs_got_shape:
        if not shape[2:].is_fully_defined():
            raise ValueError(
                "Input size (depth of inputs) must be accessible via shape inference,"
                " but saw value None.")
        got_time_steps = shape[0].value
        got_batch_size = shape[1].value
        if const_time_steps != got_time_steps:
            raise ValueError(
                "Time steps is not the same for all the elements in the input in a "
                "batch.")
        if const_batch_size != got_batch_size:
            raise ValueError(
                "Batch_size is not the same for all the elements in the input."
            )

    # Prepare dynamic conditional copying of state & output
    def _create_zero_arrays(size):
        size = _state_size_with_prefix(size, prefix=[batch_size])
        return array_ops.zeros(array_ops.stack(size),
                               rnn._infer_state_dtype(dtype, state))

    flat_zero_output = tuple(
        _create_zero_arrays(output) for output in flat_output_size)
    zero_output = nest.pack_sequence_as(structure=cell.output_size,
                                        flat_sequence=flat_zero_output)

    if sequence_length is not None:
        min_sequence_length = math_ops.reduce_min(sequence_length)
        max_sequence_length = math_ops.reduce_max(sequence_length)

    time = array_ops.constant(0, dtype=dtypes.int32, name="time")

    with ops.name_scope("dynamic_rnn") as scope:
        base_name = scope

    def _create_ta(name, dtype):
        return tensor_array_ops.TensorArray(dtype=dtype,
                                            size=time_steps,
                                            tensor_array_name=base_name + name,
                                            clear_after_read=False)

    output_ta = tuple(
        _create_ta("output_%d" % i, rnn._infer_state_dtype(dtype, state))
        for i in range(len(flat_output_size)))
    input_ta = tuple(
        _create_ta("input_%d" % i, flat_input[0].dtype)
        for i in range(len(flat_input)))

    input_ta = tuple(
        ta.unstack(input_) for ta, input_ in zip(input_ta, flat_input))

    def _time_step(time, output_ta_t, state):
        """Take a time step of the dynamic RNN.

    Args:
      time: int32 scalar Tensor.
      output_ta_t: List of `TensorArray`s that represent the output.
      state: nested tuple of vector tensors that represent the state.

    Returns:
      The tuple (time + 1, output_ta_t with updated flow, new_state).
    """

        input_t = tuple(ta.read(time) for ta in input_ta)
        # Restore some shape information
        for input_, shape in zip(input_t, inputs_got_shape):
            input_.set_shape(shape[1:])

        input_t = nest.pack_sequence_as(structure=inputs,
                                        flat_sequence=input_t)
        call_cell = lambda: cell(input_t, state)

        def f1():
            return zero_output

        def f2():
            return tuple(
                ta.read(tf.subtract(time, 1))
                for ta in output_ta_t)  #output_ta_t.read(tf.subtract(time, 1))

        cur_zero_output = tf.cond(tf.less(time, 1), f1, f2)

        if sequence_length is not None:
            (output, new_state) = rnn._rnn_step(
                time=time,
                sequence_length=sequence_length,
                min_sequence_length=min_sequence_length,
                max_sequence_length=max_sequence_length,
                zero_output=cur_zero_output,  # TODO
                state=state,
                call_cell=call_cell,
                state_size=state_size,
                skip_conditionals=True)
        else:
            (output, new_state) = call_cell()

        # Pack state if using state tuples
        output = nest.flatten(output)

        output_ta_t = tuple(
            ta.write(time, out) for ta, out in zip(output_ta_t, output))

        return (time + 1, output_ta_t, new_state)

    _, output_final_ta, final_state = control_flow_ops.while_loop(
        cond=lambda time, *_: time < time_steps,
        body=_time_step,
        loop_vars=(time, output_ta, state),
        parallel_iterations=parallel_iterations,
        swap_memory=swap_memory)

    # Unpack final output if not using output tuples.
    final_outputs = tuple(ta.stack() for ta in output_final_ta)

    # Restore some shape information
    for output, output_size in zip(final_outputs, flat_output_size):
        shape = _state_size_with_prefix(
            output_size, prefix=[const_time_steps, const_batch_size])
        output.set_shape(shape)

    final_outputs = nest.pack_sequence_as(structure=cell.output_size,
                                          flat_sequence=final_outputs)

    return (final_outputs, final_state)
 def testEmptyGradients(self):
   with self.test_session():
     x = array_ops.zeros([0, 3])
     y = math_ops.reduce_min(x, [1])
     error = gradient_checker.compute_gradient_error(x, [0, 3], y, [0])
     self.assertEqual(error, 0)
Beispiel #36
0
def rnn_mem(cell, inputs, question, state, m_input_size, m_size, initial_state=None, dtype=None,
		sequence_length=None, scope=None):

	if not isinstance(cell, rnn_cell.RNNCell):
		raise TypeError("cell must be an instance of RNNCell")
	if not isinstance(inputs, list):
		raise TypeError("inputs must be a list")
	if not inputs:
		raise ValueError("inputs must not be empty")

	outputs = []
	# Create a new scope in which the caching device is either
	# determined by the parent scope, or is set to place the cached
	# Variable using the same placement as for the rest of the RNN.
	with vs.variable_scope(scope or "RNN") as varscope:
		if varscope.caching_device is None:
			varscope.set_caching_device(lambda op: op.device)

		# Temporarily avoid EmbeddingWrapper and seq2seq badness
		# TODO(lukaszkaiser): remove EmbeddingWrapper
		if inputs[0].get_shape().ndims != 1:
			(fixed_batch_size, input_size) = inputs[0].get_shape().with_rank(2)
			if input_size.value is None:
				raise ValueError(
						"Input size (second dimension of inputs[0]) must be accessible via "
						"shape inference, but saw value None.")
		else:
			fixed_batch_size = inputs[0].get_shape().with_rank_at_least(1)[0]

		if fixed_batch_size.value:
			batch_size = fixed_batch_size.value
		else:
			batch_size = array_ops.shape(inputs[0])[0]
		if initial_state is not None:
			state = initial_state
		else:
			if not dtype:
				raise ValueError("If no initial_state is provided, dtype must be.")
			state = cell.zero_state(batch_size, dtype)
		if sequence_length is not None:	# Prepare variables
			sequence_length = math_ops.to_int32(sequence_length)
			zero_output = array_ops.zeros(
					array_ops.pack([batch_size, cell.output_size]), inputs[0].dtype)
			zero_output.set_shape(
					tensor_shape.TensorShape([fixed_batch_size.value, cell.output_size]))
			min_sequence_length = math_ops.reduce_min(sequence_length)
			max_sequence_length = math_ops.reduce_max(sequence_length)

		for time, input_ in enumerate(inputs):
			if time > 0: vs.get_variable_scope().reuse_variables()
			# pylint: disable=cell-var-from-loop
			call_cell = lambda: cell(input_, question, state, m_input_size, m_size)
			# pylint: enable=cell-var-from-loop
			if sequence_length is not None:
				(output, state) = rnn._rnn_step(
						time, sequence_length, min_sequence_length, max_sequence_length,
						zero_output, state, call_cell)
			else:
				(output, state) = call_cell()

			outputs.append(output)

		return (outputs, state)
Beispiel #37
0
 def test(self):
   result_lt = ops.reduce_min(self.original_lt, {'channel'})
   golden_lt = core.LabeledTensor(
       math_ops.reduce_min(self.original_lt.tensor, 1),
       [self.a0, self.a2, self.a3])
   self.assertLabeledTensorsEqual(result_lt, golden_lt)
Beispiel #38
0
def rnn(cell, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None):
    """Creates a recurrent neural network specified by RNNCell `cell`.

  The simplest form of RNN network generated is:
    state = cell.zero_state(...)
    outputs = []
    for input_ in inputs:
      output, state = cell(input_, state)
      outputs.append(output)
    return (outputs, state)

  However, a few other options are available:

  An initial state can be provided.
  If the sequence_length vector is provided, dynamic calculation is performed.
  This method of calculation does not compute the RNN steps past the maximum
  sequence length of the minibatch (thus saving computational time),
  and properly propagates the state at an example's sequence length
  to the final state output.

  The dynamic calculation performed is, at time t for batch row b,
    (output, state)(b, t) =
      (t >= sequence_length(b))
        ? (zeros(cell.output_size), states(b, sequence_length(b) - 1))
        : cell(input(b, t), state(b, t - 1))

  Args:
    cell: An instance of RNNCell.
    inputs: A length T list of inputs, each a tensor of shape
      [batch_size, input_size].
    initial_state: (optional) An initial state for the RNN.
      If `cell.state_size` is an integer, this must be
      a tensor of appropriate type and shape `[batch_size x cell.state_size]`.
      If `cell.state_size` is a tuple, this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell.state_size`.
    dtype: (optional) The data type for the initial state.  Required if
      initial_state is not provided.
    sequence_length: Specifies the length of each sequence in inputs.
      An int32 or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`.
    scope: VariableScope for the created subgraph; defaults to "RNN".

  Returns:
    A pair (outputs, state) where:
      - outputs is a length T list of outputs (one for each input)
      - state is the final state

  Raises:
    TypeError: If `cell` is not an instance of RNNCell.
    ValueError: If `inputs` is `None` or an empty list, or if the input depth
      (column size) cannot be inferred from inputs via shape inference.
  """

    if not isinstance(cell, rnn_cell.RNNCell):
        raise TypeError("cell must be an instance of RNNCell")
    if not isinstance(inputs, list):
        raise TypeError("inputs must be a list")
    if not inputs:
        raise ValueError("inputs must not be empty")

    outputs = []
    # Create a new scope in which the caching device is either
    # determined by the parent scope, or is set to place the cached
    # Variable using the same placement as for the rest of the RNN.
    with vs.variable_scope(scope or "RNN") as varscope:
        if varscope.caching_device is None:
            varscope.set_caching_device(lambda op: op.device)

        # Temporarily avoid EmbeddingWrapper and seq2seq badness
        # TODO(lukaszkaiser): remove EmbeddingWrapper
        if inputs[0].get_shape().ndims != 1:
            input_shape = inputs[0].get_shape().with_rank_at_least(2)
            input_shape[1:].assert_is_fully_defined()
            (fixed_batch_size, input_size) = input_shape[0], input_shape[1:]
            if input_size[0].value is None:
                raise ValueError(
                    "Input size (second dimension of inputs[0]) must be accessible via "
                    "shape inference, but saw value None."
                )
        else:
            fixed_batch_size = inputs[0].get_shape().with_rank_at_least(1)[0]

        if fixed_batch_size.value:
            batch_size = fixed_batch_size.value
        else:
            batch_size = array_ops.shape(inputs[0])[0]
        if initial_state is not None:
            state = initial_state
        else:
            if not dtype:
                raise ValueError("If no initial_state is provided, " "dtype must be specified")
            state = cell.zero_state(batch_size, dtype)

        if sequence_length is not None:  # Prepare variables
            sequence_length = math_ops.to_int32(sequence_length)
            # convert int to TensorShape if necessary
            output_size = _state_size_with_prefix(cell.output_size, prefix=[batch_size])
            zero_output = array_ops.zeros(array_ops.pack(output_size), inputs[0].dtype)
            zero_output_shape = _state_size_with_prefix(cell.output_size, prefix=[fixed_batch_size.value])
            zero_output.set_shape(tensor_shape.TensorShape(zero_output_shape))
            min_sequence_length = math_ops.reduce_min(sequence_length)
            max_sequence_length = math_ops.reduce_max(sequence_length)

        for time, input_ in enumerate(inputs):
            if time > 0:
                vs.get_variable_scope().reuse_variables()
            # pylint: disable=cell-var-from-loop
            call_cell = lambda: cell(input_, state)
            # pylint: enable=cell-var-from-loop
            if sequence_length is not None:
                (output, state) = _rnn_step(
                    time=time,
                    sequence_length=sequence_length,
                    min_sequence_length=min_sequence_length,
                    max_sequence_length=max_sequence_length,
                    zero_output=zero_output,
                    state=state,
                    call_cell=call_cell,
                    state_size=cell.state_size,
                )
            else:
                (output, state) = call_cell()

            outputs.append(output)

        return (outputs, state)
Beispiel #39
0
 def test(self):
     result_lt = ops.reduce_min(self.original_lt, {'channel'})
     golden_lt = core.LabeledTensor(
         math_ops.reduce_min(self.original_lt.tensor, 1),
         [self.a0, self.a2, self.a3])
     self.assertLabeledTensorsEqual(result_lt, golden_lt)
Beispiel #40
0
    def all_reduce_indexed_slices(
        self,
        input_slices: indexed_slices.IndexedSlices,
        options: Optional[collective_util.Options] = None
    ) -> indexed_slices.IndexedSlices:
        """All-reduce an IndexedSlices.

    This method must be called inside a tf.function.

    Args:
      input_slices: an IndexedSlices.
      options: an optional tf.distribute.experimental.CommunicationOptions. If
        provided, it overrides the default options.

    Returns:
      The reduced IndexedSlices.

    Raises:
      RuntimeError: if called in eager mode.
    """
        if context.executing_eagerly():
            raise RuntimeError(
                'all_reduce_indexed_slices is not supported in eager mode.')

        # Current CollectiveAllGather implementations require input IndexedSlices to
        # have consistent length across the board, we handle the reduction of
        # IndexedSlices as follows:
        #   1. Gather the lengths of IndexedSlices from all participants.
        #   2. If they have consistent length, apply all_gather.
        #   3. Otherwise pad IndexedSlices to be the same length accross all
        #      participants and apply_gather.
        options = self._options.merge(options)
        with ops.device(self._device):

            def all_gather_indexed_slices(
                all_gather_fn: Callable[
                    [core.TensorLike, Optional[collective_util.Options]],
                    core.Tensor]
            ) -> indexed_slices.IndexedSlices:
                """Use all_gather_fn to aggregate `IndexedSlices`."""
                all_values = all_gather_fn(input_slices.values, options)
                # Add control dependency to order the all-gather.
                if (options.implementation ==
                        collective_util.CommunicationImplementation.NCCL):
                    control = [all_values]
                else:
                    control = []
                with ops.control_dependencies(control):
                    all_indices = all_gather_fn(input_slices.indices, options)
                return indexed_slices.IndexedSlices(
                    values=all_values,
                    indices=all_indices,
                    dense_shape=input_slices.dense_shape)

            length = array_ops.shape(input_slices.indices)
            all_lengths = self._all_gather(length, options)

            def all_gather_with_padding(
                    input_tensor: core.TensorLike,
                    options: Optional[collective_util.Options]) -> core.Tensor:
                """all_gather tensors of different sizes using padding."""
                max_length = math_ops.reduce_max(all_lengths)
                padded_tensor = _pad_util(input_tensor, max_length)
                all_padded_tensors = self._all_gather(padded_tensor, options)
                split_tensors = []
                for i in range(self._group_size):
                    start_pos = i * max_length
                    split_tensors.append(
                        all_padded_tensors[start_pos:start_pos +
                                           all_lengths[i]])
                return array_ops.concat(split_tensors, 0)

            return control_flow_ops.cond(
                math_ops.equal(math_ops.reduce_max(all_lengths),
                               math_ops.reduce_min(all_lengths)),
                lambda: all_gather_indexed_slices(self._all_gather),
                lambda: all_gather_indexed_slices(all_gather_with_padding))
    def all_reduce_indexed_slices(self,
                                  input_slices,
                                  communication_hint='AUTO',
                                  timeout=0):
        """All-reduce an IndexedSlices.

    This method must be called inside a tf.function.

    Args:
      input_slices: an IndexedSlices.
      communication_hint: string providing hint to runtime for choosing
        collective implementation.
      timeout: a float. The timeout in seconds.

    Returns:
      The reduced IndexedSlices.

    Raises:
      RuntimeError: if called in eager mode.
    """
        if context.executing_eagerly():
            raise RuntimeError(
                'all_reduce_indexed_slices in eager mode is not supported')

        # Current CollectiveAllGather implementations require input IndexedSlices to
        # have consistent length across the board, we handle the reduction of
        # IndexedSlices as follows:
        #   1. Gather the lengths of IndexedSlices from all participants.
        #   2. If they have consistent length, apply all_gather.
        #   3. Otherwise convert IndexedSlices to dense tensors and apply
        #      all_reduce.
        with ops.device(self._device):

            def all_gather():
                """Use all_gather to aggregate `IndexedSlices`."""
                all_values = self._all_gather(input_slices.values,
                                              communication_hint,
                                              timeout=timeout)
                # Add control dependency to order the all-gather.
                control = [all_values] if communication_hint == 'NCCL' else []
                with ops.control_dependencies(control):
                    all_indices = self._all_gather(input_slices.indices,
                                                   communication_hint,
                                                   timeout=timeout)
                return ops.IndexedSlices(values=all_values,
                                         indices=all_indices,
                                         dense_shape=input_slices.dense_shape)

            def densify_and_all_reduce():
                """Use all_reduce to aggregate `IndexedSlices`."""
                densified = ops.convert_to_tensor(input_slices)
                reduced = self.all_reduce(
                    densified,
                    communication_hint=communication_hint,
                    timeout=timeout)
                # We have to convert dense grad to IndexedSlice because all_reduce()
                # and all_gather() must have the same return type as required by
                # control_flow_ops.cond.
                return ops.IndexedSlices(values=reduced,
                                         indices=math_ops.range(
                                             array_ops.shape(reduced)[0]),
                                         dense_shape=input_slices.dense_shape)

            length = array_ops.shape(input_slices.indices)
            all_lengths = self._all_gather(length,
                                           communication_hint,
                                           timeout=timeout)
            return control_flow_ops.cond(
                math_ops.equal(math_ops.reduce_max(all_lengths),
                               math_ops.reduce_min(all_lengths)), all_gather,
                densify_and_all_reduce)
Beispiel #42
0
    def call(self, inputs, count_weights=None):
        if isinstance(inputs, (list, np.ndarray)):
            inputs = ops.convert_to_tensor_v2_with_dispatch(inputs)
        if inputs.shape.rank == 1:
            inputs = array_ops.expand_dims(inputs, 1)

        if count_weights is not None and self._output_mode != COUNT:
            raise ValueError(
                "count_weights is not used in `output_mode='tf-idf'`, "
                "or `output_mode='binary'`. Please pass a single input.")
        self._called = True
        if self._max_tokens is None:
            raise RuntimeError(
                "If you construct a `CategoryEncoding` layer with "
                "`max_tokens=None`, you need to call `adapt()` "
                "on it before using it")
        else:
            out_depth = self._max_tokens

        if self._output_mode == TFIDF:
            # If the input is a sparse tensor, we densify it with the default value of
            # -1. Because -1 is ignored by one_hot, this effectively drops the non-set
            # positions from the output encoding.
            if self._sparse:
                raise ValueError("`sparse=True` with `output_mode=tfidf` "
                                 "is not supported.")
            if isinstance(inputs, sparse_tensor.SparseTensor):
                inputs = sparse_ops.sparse_tensor_to_dense(inputs,
                                                           default_value=-1)
            one_hot_data = array_ops.one_hot(inputs, depth=out_depth)
            counts = math_ops.reduce_sum(one_hot_data, axis=1)
            tf_idf_data = math_ops.multiply(counts, self.tf_idf_weights)
            tf_idf_data.set_shape(tensor_shape.TensorShape((None, out_depth)))
            return tf_idf_data

        binary_output = (self._output_mode == BINARY)
        if isinstance(inputs, sparse_tensor.SparseTensor):
            max_value = math_ops.reduce_max(inputs.values)
            min_value = math_ops.reduce_min(inputs.values)
        else:
            max_value = math_ops.reduce_max(inputs)
            min_value = math_ops.reduce_min(inputs)
        condition = math_ops.logical_and(
            math_ops.greater_equal(math_ops.cast(out_depth, max_value.dtype),
                                   max_value),
            math_ops.greater_equal(min_value,
                                   math_ops.cast(0, min_value.dtype)))
        control_flow_ops.Assert(condition, [
            "Input values must be in the range 0 <= values < max_tokens"
            " with max_tokens={}".format(out_depth)
        ])
        if self._sparse:
            result = bincount_ops.sparse_bincount(inputs,
                                                  weights=count_weights,
                                                  minlength=out_depth,
                                                  maxlength=out_depth,
                                                  axis=-1,
                                                  binary_output=binary_output)
            result = math_ops.cast(result, K.floatx())
            batch_size = array_ops.shape(result)[0]
            result = sparse_tensor.SparseTensor(
                indices=result.indices,
                values=result.values,
                dense_shape=[batch_size, out_depth])
            return result
        else:
            result = bincount_ops.bincount(inputs,
                                           weights=count_weights,
                                           minlength=out_depth,
                                           maxlength=out_depth,
                                           dtype=K.floatx(),
                                           axis=-1,
                                           binary_output=binary_output)
            result.set_shape(tensor_shape.TensorShape((None, out_depth)))
            return result
Beispiel #43
0
 def testEmptyGradients(self):
     with self.cached_session():
         x = array_ops.zeros([0, 3])
         y = math_ops.reduce_min(x, [1])
         error = gradient_checker.compute_gradient_error(x, [0, 3], y, [0])
         self.assertEqual(error, 0)
Beispiel #44
0
def _dynamic_rnn_loop(cell,
                      inputs,
                      initial_state,
                      parallel_iterations,
                      swap_memory,
                      sequence_length=None,
                      dtype=None):
  """Internal implementation of Dynamic RNN.

  Args:
    cell: An instance of RNNCell.
    inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested
      tuple of such elements.
    initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if
      `cell.state_size` is a tuple, then this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell.state_size`.
    parallel_iterations: Positive Python int.
    swap_memory: A Python boolean
    sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].
    dtype: (optional) Expected dtype of output. If not specified, inferred from
      initial_state.

  Returns:
    Tuple `(final_outputs, final_state)`.
    final_outputs:
      A `Tensor` of shape `[time, batch_size, cell.output_size]`.  If
      `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape`
      objects, then this returns a (possibly nsted) tuple of Tensors matching
      the corresponding shapes.
    final_state:
      A `Tensor`, or possibly nested tuple of Tensors, matching in length
      and shapes to `initial_state`.

  Raises:
    ValueError: If the input depth cannot be inferred via shape inference
      from the inputs.
  """
  state = initial_state
  assert isinstance(parallel_iterations, int), "parallel_iterations must be int"

  state_size = cell.state_size

  flat_input = nest.flatten(inputs)
  flat_output_size = nest.flatten(cell.output_size)

  # Construct an initial output
  input_shape = array_ops.shape(flat_input[0])
  time_steps = input_shape[0]
  batch_size = input_shape[1]

  inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3)
                           for input_ in flat_input)

  const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2]

  for shape in inputs_got_shape:
    if not shape[2:].is_fully_defined():
      raise ValueError(
          "Input size (depth of inputs) must be accessible via shape inference,"
          " but saw value None.")
    got_time_steps = shape[0].value
    got_batch_size = shape[1].value
    if const_time_steps != got_time_steps:
      raise ValueError(
          "Time steps is not the same for all the elements in the input in a "
          "batch.")
    if const_batch_size != got_batch_size:
      raise ValueError(
          "Batch_size is not the same for all the elements in the input.")

  # Prepare dynamic conditional copying of state & output
  def _create_zero_arrays(size):
    size = _state_size_with_prefix(size, prefix=[batch_size])
    return array_ops.zeros(
        array_ops.stack(size), _infer_state_dtype(dtype, state))

  flat_zero_output = tuple(_create_zero_arrays(output)
                           for output in flat_output_size)
  zero_output = nest.pack_sequence_as(structure=cell.output_size,
                                      flat_sequence=flat_zero_output)

  if sequence_length is not None:
    min_sequence_length = math_ops.reduce_min(sequence_length)
    max_sequence_length = math_ops.reduce_max(sequence_length)

  time = array_ops.constant(0, dtype=dtypes.int32, name="time")

  with ops.name_scope("dynamic_rnn") as scope:
    base_name = scope

  def _create_ta(name, dtype):
    return tensor_array_ops.TensorArray(dtype=dtype,
                                        size=time_steps,
                                        tensor_array_name=base_name + name)

  output_ta = tuple(_create_ta("output_%d" % i,
                               _infer_state_dtype(dtype, state))
                    for i in range(len(flat_output_size)))
  input_ta = tuple(_create_ta("input_%d" % i, flat_input[0].dtype)
                   for i in range(len(flat_input)))

  input_ta = tuple(ta.unstack(input_)
                   for ta, input_ in zip(input_ta, flat_input))

  def _time_step(time, output_ta_t, state):
    """Take a time step of the dynamic RNN.

    Args:
      time: int32 scalar Tensor.
      output_ta_t: List of `TensorArray`s that represent the output.
      state: nested tuple of vector tensors that represent the state.

    Returns:
      The tuple (time + 1, output_ta_t with updated flow, new_state).
    """

    input_t = tuple(ta.read(time) for ta in input_ta)
    # Restore some shape information
    for input_, shape in zip(input_t, inputs_got_shape):
      input_.set_shape(shape[1:])

    input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t)
    call_cell = lambda: cell(input_t, state)

    if sequence_length is not None:
      (output, new_state) = _rnn_step(
          time=time,
          sequence_length=sequence_length,
          min_sequence_length=min_sequence_length,
          max_sequence_length=max_sequence_length,
          zero_output=zero_output,
          state=state,
          call_cell=call_cell,
          state_size=state_size,
          skip_conditionals=True)
    else:
      (output, new_state) = call_cell()

    # Pack state if using state tuples
    output = nest.flatten(output)

    output_ta_t = tuple(
        ta.write(time, out) for ta, out in zip(output_ta_t, output))

    return (time + 1, output_ta_t, new_state)

  _, output_final_ta, final_state = control_flow_ops.while_loop(
      cond=lambda time, *_: time < time_steps,
      body=_time_step,
      loop_vars=(time, output_ta, state),
      parallel_iterations=parallel_iterations,
      swap_memory=swap_memory)

  # Unpack final output if not using output tuples.
  final_outputs = tuple(ta.stack() for ta in output_final_ta)

  # Restore some shape information
  for output, output_size in zip(final_outputs, flat_output_size):
    shape = _state_size_with_prefix(
        output_size, prefix=[const_time_steps, const_batch_size])
    output.set_shape(shape)

  final_outputs = nest.pack_sequence_as(
      structure=cell.output_size, flat_sequence=final_outputs)

  return (final_outputs, final_state)
Beispiel #45
0
 def _update_statistics_from_mini_batch(
     self, statistics, auxiliary_variables, times, values):
   """Given mini-batch input, update `statistics` and `auxiliary_variables`."""
   values = math_ops.cast(values, self._dtype)
   # The density (measured in times per observation) that we see in each part
   # of the mini-batch.
   batch_inter_observation_duration = (math_ops.cast(
       math_ops.reduce_max(times, axis=1) - math_ops.reduce_min(times, axis=1),
       self._dtype) / math_ops.cast(
           array_ops.shape(times)[1] - 1, self._dtype))
   # Co-locate updates with their variables to minimize race conditions when
   # updating statistics.
   with ops.colocate_with(auxiliary_variables.max_time_seen):
     # There is a race condition if this value is being updated from multiple
     # workers. However, it should eventually reach the correct value if the
     # last chunk is presented enough times.
     max_time_seen_assign = state_ops.assign(
         auxiliary_variables.max_time_seen,
         gen_math_ops.maximum(auxiliary_variables.max_time_seen,
                              math_ops.reduce_max(times)))
   with ops.colocate_with(auxiliary_variables.chunk_count):
     chunk_count_assign = state_ops.assign_add(auxiliary_variables.chunk_count,
                                               array_ops.shape(
                                                   times,
                                                   out_type=dtypes.int64)[0])
   with ops.colocate_with(auxiliary_variables.inter_observation_duration_sum):
     inter_observation_duration_assign = state_ops.assign_add(
         auxiliary_variables.inter_observation_duration_sum,
         math_ops.reduce_sum(batch_inter_observation_duration))
   with ops.colocate_with(auxiliary_variables.example_count):
     example_count_assign = state_ops.assign_add(
         auxiliary_variables.example_count,
         array_ops.size(times, out_type=dtypes.int64))
   # Note: These mean/variance updates assume that all points are equally
   # likely, which is not true if _chunks_ are sampled uniformly from the space
   # of all possible contiguous chunks, since points at the start and end of
   # the series are then members of fewer chunks. For series which are much
   # longer than the chunk size (the usual/expected case), this effect becomes
   # irrelevant.
   with ops.colocate_with(auxiliary_variables.overall_feature_sum):
     overall_feature_sum_assign = state_ops.assign_add(
         auxiliary_variables.overall_feature_sum,
         math_ops.reduce_sum(values, axis=[0, 1]))
   with ops.colocate_with(auxiliary_variables.overall_feature_sum_of_squares):
     overall_feature_sum_of_squares_assign = state_ops.assign_add(
         auxiliary_variables.overall_feature_sum_of_squares,
         math_ops.reduce_sum(values**2, axis=[0, 1]))
   per_chunk_aux_updates = control_flow_ops.group(
       max_time_seen_assign, chunk_count_assign,
       inter_observation_duration_assign, example_count_assign,
       overall_feature_sum_assign, overall_feature_sum_of_squares_assign)
   with ops.control_dependencies([per_chunk_aux_updates]):
     example_count_float = math_ops.cast(auxiliary_variables.example_count,
                                         self._dtype)
     new_feature_mean = (auxiliary_variables.overall_feature_sum /
                         example_count_float)
     overall_feature_mean_update = state_ops.assign(
         statistics.overall_feature_moments.mean, new_feature_mean)
     overall_feature_var_update = state_ops.assign(
         statistics.overall_feature_moments.variance,
         # De-biased n / (n - 1) variance correction
         example_count_float / (example_count_float - 1.) *
         (auxiliary_variables.overall_feature_sum_of_squares /
          example_count_float - new_feature_mean**2))
     # TODO(b/35675805): Remove this cast
     min_time_batch = math_ops.cast(math_ops.argmin(times[:, 0]), dtypes.int32)
     def series_start_updates():
       # If this is the lowest-time chunk that we have seen so far, update
       # series start moments to reflect that. Note that these statistics are
       # "best effort", as there are race conditions in the update (however,
       # they should eventually converge if the start of the series is
       # presented enough times).
       mean, variance = nn.moments(
           values[min_time_batch, :self._starting_variance_window_size],
           axes=[0])
       return control_flow_ops.group(
           state_ops.assign(statistics.series_start_moments.mean, mean),
           state_ops.assign(statistics.series_start_moments.variance,
                            variance))
     with ops.colocate_with(statistics.start_time):
       series_start_update = control_flow_ops.cond(
           # Update moments whenever we even match the lowest time seen so far,
           # to ensure that series start statistics are eventually updated to
           # their correct values, despite race conditions (i.e. eventually
           # statistics.start_time will reflect the global lowest time, and
           # given that we will eventually update the series start moments to
           # their correct values).
           math_ops.less_equal(times[min_time_batch, 0],
                               statistics.start_time),
           series_start_updates,
           control_flow_ops.no_op)
       with ops.control_dependencies([series_start_update]):
         # There is a race condition if this update is performed in parallel on
         # multiple workers. Since models may be sensitive to being presented
         # with times before the putative start time, the value of this
         # variable is post-processed above to guarantee that each worker is
         # presented with a start time which is at least as low as the lowest
         # time in its current mini-batch.
         start_time_update = state_ops.assign(statistics.start_time,
                                              gen_math_ops.minimum(
                                                  statistics.start_time,
                                                  math_ops.reduce_min(times)))
     inter_observation_duration_estimate = (
         auxiliary_variables.inter_observation_duration_sum / math_ops.cast(
             auxiliary_variables.chunk_count, self._dtype))
     # Estimate the total number of observations as:
     #   (end time - start time + 1) * average intra-chunk time density
     total_observation_count_update = state_ops.assign(
         statistics.total_observation_count,
         math_ops.cast(
             gen_math_ops.round(
                 math_ops.cast(auxiliary_variables.max_time_seen -
                               statistics.start_time + 1, self._dtype) /
                 inter_observation_duration_estimate), dtypes.int64))
     per_chunk_stat_updates = control_flow_ops.group(
         overall_feature_mean_update, overall_feature_var_update,
         series_start_update, start_time_update,
         total_observation_count_update)
   return per_chunk_stat_updates
Beispiel #46
0
    def _train_op_fn(loss):
      """Run one training iteration."""
      if training_state_cache:
        train_op.append(training_state_cache.insert(tree_ids, node_ids, logits))
      if closed_form_grad_and_hess_fn:
        gradients, hessians = closed_form_grad_and_hess_fn(logits, labels)
      else:
        gradients = gradients_impl.gradients(loss, logits, name='Gradients')[0]
        hessians = gradients_impl.gradients(
            gradients, logits, name='Hessians')[0]

      stats_summaries_list = []
      for i, feature_ids in enumerate(feature_ids_list):
        num_buckets = bucket_size_list[i]
        summaries = [
            array_ops.squeeze(
                boosted_trees_ops.make_stats_summary(
                    node_ids=node_ids,
                    gradients=gradients,
                    hessians=hessians,
                    bucketized_features_list=[input_feature_list[f]],
                    max_splits=max_splits,
                    num_buckets=num_buckets),
                axis=0) for f in feature_ids
        ]
        stats_summaries_list.append(summaries)

      accumulators = []

      def grow_tree_from_stats_summaries(stats_summaries_list,
                                         feature_ids_list):
        """Updates ensemble based on the best gains from stats summaries."""
        node_ids_per_feature = []
        gains_list = []
        thresholds_list = []
        left_node_contribs_list = []
        right_node_contribs_list = []
        all_feature_ids = []

        assert len(stats_summaries_list) == len(feature_ids_list)

        for i, feature_ids in enumerate(feature_ids_list):
          (numeric_node_ids_per_feature, numeric_gains_list,
           numeric_thresholds_list, numeric_left_node_contribs_list,
           numeric_right_node_contribs_list) = (
               boosted_trees_ops.calculate_best_gains_per_feature(
                   node_id_range=last_layer_nodes_range,
                   stats_summary_list=stats_summaries_list[i],
                   l1=tree_hparams.l1,
                   l2=tree_hparams.l2,
                   tree_complexity=tree_hparams.tree_complexity,
                   min_node_weight=tree_hparams.min_node_weight,
                   max_splits=max_splits))

          all_feature_ids += feature_ids
          node_ids_per_feature += numeric_node_ids_per_feature
          gains_list += numeric_gains_list
          thresholds_list += numeric_thresholds_list
          left_node_contribs_list += numeric_left_node_contribs_list
          right_node_contribs_list += numeric_right_node_contribs_list

        grow_op = boosted_trees_ops.update_ensemble(
            # Confirm if local_tree_ensemble or tree_ensemble should be used.
            tree_ensemble.resource_handle,
            feature_ids=all_feature_ids,
            node_ids=node_ids_per_feature,
            gains=gains_list,
            thresholds=thresholds_list,
            left_node_contribs=left_node_contribs_list,
            right_node_contribs=right_node_contribs_list,
            learning_rate=tree_hparams.learning_rate,
            max_depth=tree_hparams.max_depth,
            pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
        return grow_op

      if train_in_memory and is_single_machine:
        train_op.append(distribute_lib.increment_var(global_step))
        train_op.append(
            grow_tree_from_stats_summaries(stats_summaries_list,
                                           feature_ids_list))
      else:
        dependencies = []

        for i, feature_ids in enumerate(feature_ids_list):
          stats_summaries = stats_summaries_list[i]
          accumulator = data_flow_ops.ConditionalAccumulator(
              dtype=dtypes.float32,
              # The stats consist of grads and hessians (the last dimension).
              shape=[len(feature_ids), max_splits, bucket_size_list[i], 2],
              shared_name='numeric_stats_summary_accumulator_' + str(i))
          accumulators.append(accumulator)

          apply_grad = accumulator.apply_grad(
              array_ops.stack(stats_summaries, axis=0), stamp_token)
          dependencies.append(apply_grad)

        def grow_tree_from_accumulated_summaries_fn():
          """Updates the tree with the best layer from accumulated summaries."""
          # Take out the accumulated summaries from the accumulator and grow.
          stats_summaries_list = []

          stats_summaries_list = [
              array_ops.unstack(accumulator.take_grad(1), axis=0)
              for accumulator in accumulators
          ]

          grow_op = grow_tree_from_stats_summaries(stats_summaries_list,
                                                   feature_ids_list)
          return grow_op

        with ops.control_dependencies(dependencies):
          train_op.append(distribute_lib.increment_var(global_step))
          if config.is_chief:
            min_accumulated = math_ops.reduce_min(
                array_ops.stack(
                    [acc.num_accumulated() for acc in accumulators]))

            train_op.append(
                control_flow_ops.cond(
                    math_ops.greater_equal(min_accumulated,
                                           n_batches_per_layer),
                    grow_tree_from_accumulated_summaries_fn,
                    control_flow_ops.no_op,
                    name='wait_until_n_batches_accumulated'))

      return control_flow_ops.group(train_op, name='train_op')
Beispiel #47
0
 def testMinGradient(self):
   inputs = constant_op.constant([1.0], dtype=dtypes.float32)
   outputs = math_ops.reduce_min(array_ops.concat([inputs, inputs], 0))
   with self.cached_session():
     error = gradient_checker.compute_gradient_error(inputs, [1], outputs, [])
     self.assertLess(error, 1e-4)
Beispiel #48
0
def rnn(cell,
        inputs,
        initial_state=None,
        dtype=None,
        sequence_length=None,
        scope=None):
    """Creates a recurrent neural network specified by RNNCell "cell".

  The simplest form of RNN network generated is:
    state = cell.zero_state(...)
    outputs = []
    for input_ in inputs:
      output, state = cell(input_, state)
      outputs.append(output)
    return (outputs, state)

  However, a few other options are available:

  An initial state can be provided.
  If the sequence_length vector is provided, dynamic calculation is performed.
  This method of calculation does not compute the RNN steps past the maximum
  sequence length of the minibatch (thus saving computational time),
  and properly propagates the state at an example's sequence length
  to the final state output.

  The dynamic calculation performed is, at time t for batch row b,
    (output, state)(b, t) =
      (t >= sequence_length(b))
        ? (zeros(cell.output_size), states(b, sequence_length(b) - 1))
        : cell(input(b, t), state(b, t - 1))

  Args:
    cell: An instance of RNNCell.
    inputs: A length T list of inputs, each a tensor of shape
      [batch_size, cell.input_size].
    initial_state: (optional) An initial state for the RNN.  This must be
      a tensor of appropriate type and shape [batch_size x cell.state_size].
    dtype: (optional) The data type for the initial state.  Required if
      initial_state is not provided.
    sequence_length: Specifies the length of each sequence in inputs.
      An int32 or int64 vector (tensor) size [batch_size].  Values in [0, T).
    scope: VariableScope for the created subgraph; defaults to "RNN".

  Returns:
    A pair (outputs, state) where:
      outputs is a length T list of outputs (one for each input)
      state is the final state

  Raises:
    TypeError: If "cell" is not an instance of RNNCell.
    ValueError: If inputs is None or an empty list, or if the input depth
      cannot be inferred from inputs via shape inference.
  """

    if not isinstance(cell, BaseCell):
        raise TypeError("cell must be an instance of RNNCell")
    if not isinstance(inputs, list):
        raise TypeError("inputs must be a list")
    if not inputs:
        raise ValueError("inputs must not be empty")

    outputs = []
    # Create a new scope in which the caching device is either
    # determined by the parent scope, or is set to place the cached
    # Variable using the same placement as for the rest of the RNN.
    with vs.variable_scope(scope or "RNN") as varscope:
        if varscope.caching_device is None:
            varscope.set_caching_device(lambda op: op.device)

        # Temporarily avoid EmbeddingWrapper and seq2seq badness
        # TODO(lukaszkaiser): remove EmbeddingWrapper
        if inputs[0].get_shape().ndims != 1:
            (fixed_batch_size, input_size) = inputs[0].get_shape().with_rank(2)
            if input_size.value is None:
                raise ValueError(
                    "Input size (second dimension of inputs[0]) must be accessible via "
                    "shape inference, but saw value None.")
        else:
            fixed_batch_size = inputs[0].get_shape().with_rank_at_least(1)[0]

        if fixed_batch_size.value:
            batch_size = fixed_batch_size.value
        else:
            batch_size = array_ops.shape(inputs[0])[0]
        if initial_state is not None:
            state = initial_state
        else:
            if not dtype:
                raise ValueError(
                    "If no initial_state is provided, dtype must be.")
            state = cell.zero_state(batch_size, dtype)

        if sequence_length is not None:
            sequence_length = math_ops.to_int32(sequence_length)

        if sequence_length is not None:  # Prepare variables
            zero_output = array_ops.zeros(
                array_ops.pack([batch_size, cell.output_size]),
                inputs[0].dtype)
            zero_output.set_shape(
                tensor_shape.TensorShape(
                    [fixed_batch_size.value, cell.output_size]))
            min_sequence_length = math_ops.reduce_min(sequence_length)
            max_sequence_length = math_ops.reduce_max(sequence_length)

        for time, input_ in enumerate(inputs):
            if time > 0: vs.get_variable_scope().reuse_variables()
            # pylint: disable=cell-var-from-loop
            call_cell = lambda: cell(input_, state)
            # pylint: enable=cell-var-from-loop
            if sequence_length is not None:
                (output, state) = _rnn_step(time, sequence_length,
                                            min_sequence_length,
                                            max_sequence_length, zero_output,
                                            state, call_cell)
            else:
                (output, state) = call_cell()

            outputs.append(output)

        return (outputs, state)
Beispiel #49
0
def attention_RNN(encoder_outputs, 
                  encoder_state,
                  num_decoder_symbols,
                  sequence_length,
                  num_heads=1,
                  dtype=dtypes.float32,
                  use_attention=True,
                  loop_function=None,
                  scope=None):
  if use_attention:
    print ('Use the attention RNN model')
    if num_heads < 1:
      raise ValueError("With less than 1 heads, use a non-attention decoder.")
  
    with variable_scope.variable_scope(scope or "attention_RNN"):
      output_size = encoder_outputs[0].get_shape()[1].value
      top_states = [array_ops.reshape(e, [-1, 1, output_size])
                  for e in encoder_outputs]
      attention_states = array_ops.concat(top_states, 1)
      if not attention_states.get_shape()[1:2].is_fully_defined():
        raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
                       % attention_states.get_shape())
  
      batch_size = array_ops.shape(top_states[0])[0]  # Needed for reshaping.
      attn_length = attention_states.get_shape()[1].value
      attn_size = attention_states.get_shape()[2].value
  
      # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
      hidden = array_ops.reshape(
          attention_states, [-1, attn_length, 1, attn_size])
      hidden_features = []
      v = []
      attention_vec_size = attn_size  # Size of query vectors for attention.
      for a in xrange(num_heads):
        k = variable_scope.get_variable("AttnW_%d" % a,
                                        [1, 1, attn_size, attention_vec_size])
        hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
        v.append(variable_scope.get_variable("AttnV_%d" % a,
                                             [attention_vec_size]))
  
      def attention(query):
        """Put attention masks on hidden using hidden_features and query."""
        attn_weights = []
        ds = []  # Results of attention reads will be stored here.
        for i in xrange(num_heads):
          with variable_scope.variable_scope("Attention_%d" % i):
            y = rnn_cell._linear(query, attention_vec_size, True)
            y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
            # Attention mask is a softmax of v^T * tanh(...).
            s = math_ops.reduce_sum(
                v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3])
            a = nn_ops.softmax(s)
            attn_weights.append(a)
            # Now calculate the attention-weighted vector d.
            d = math_ops.reduce_sum(
                array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
                [1, 2])
            ds.append(array_ops.reshape(d, [-1, attn_size]))
        return attn_weights, ds
  
      batch_attn_size = array_ops.stack([batch_size, attn_size])
      attns = [array_ops.zeros(batch_attn_size, dtype=dtype)
               for _ in xrange(num_heads)]
      for a in attns:  # Ensure the second shape of attention vectors is set.
        a.set_shape([None, attn_size])
  
      # loop through the encoder_outputs
      attention_encoder_outputs = list()
      sequence_attention_weights = list()
      for i in xrange(len(encoder_outputs)):
        if i > 0:
          variable_scope.get_variable_scope().reuse_variables()
        if i == 0:
          with variable_scope.variable_scope("Initial_Decoder_Attention"):
            initial_state = rnn_cell._linear(encoder_state, output_size, True)
          attn_weights, ds = attention(initial_state)
        else:
          attn_weights, ds = attention(encoder_outputs[i])
        output = array_ops.concat([ds[0], encoder_outputs[i]], 1) # NOTE: here we temporarily assume num_head = 1
        with variable_scope.variable_scope("AttnRnnOutputProjection"):
          logit = rnn_cell._linear(output, num_decoder_symbols, True)
        attention_encoder_outputs.append(logit) # NOTE: here we temporarily assume num_head = 1
        sequence_attention_weights.append(attn_weights[0]) # NOTE: here we temporarily assume num_head = 1
  else:
    print ('Use the NON attention RNN model')
    with variable_scope.variable_scope(scope or "non-attention_RNN"):
      attention_encoder_outputs = list()
      sequence_attention_weights = list()
      
      # copy over logits once out of sequence_length
      if encoder_outputs[0].get_shape().ndims != 1:
        (fixed_batch_size, output_size) = encoder_outputs[0].get_shape().with_rank(2)
      else:
        fixed_batch_size = encoder_outputs[0].get_shape().with_rank_at_least(1)[0]

      if fixed_batch_size.value: 
        batch_size = fixed_batch_size.value
      else:
        batch_size = array_ops.shape(encoder_outputs[0])[0]
      if sequence_length is not None:
        sequence_length = math_ops.to_int32(sequence_length)
      if sequence_length is not None:  # Prepare variables
        zero_logit = array_ops.zeros(
            array_ops.pack([batch_size, num_decoder_symbols]), encoder_outputs[0].dtype)
        zero_logit.set_shape(
            tensor_shape.TensorShape([fixed_batch_size.value, num_decoder_symbols]))
        min_sequence_length = math_ops.reduce_min(sequence_length)
        max_sequence_length = math_ops.reduce_max(sequence_length)
    
      for time, input_ in enumerate(encoder_outputs):
        if time > 0: variable_scope.get_variable_scope().reuse_variables()
        # pylint: disable=cell-var-from-loop
        # call_cell = lambda: cell(input_, state)
        generate_logit = lambda: rnn_cell._linear(encoder_outputs[time], num_decoder_symbols, True)
        # pylint: enable=cell-var-from-loop
        if sequence_length is not None:
          logit = _step(
              time, sequence_length, min_sequence_length, max_sequence_length, zero_logit, generate_logit)
        else:
          logit = generate_logit
        attention_encoder_outputs.append(logit)   
        
  return attention_encoder_outputs, sequence_attention_weights
Beispiel #50
0
def _dynamic_rnn_loop(cell,
                      inputs,
                      initial_state,
                      ff_keep_prob,
                      recur_keep_prob,
                      parallel_iterations,
                      swap_memory,
                      sequence_length=None):
    """Internal implementation of Dynamic RNN.

  Args:
    cell: An instance of RNNCell.
    inputs: A `Tensor` of shape [time, batch_size, depth].
    initial_state: A `Tensor` of shape [batch_size, depth].
    parallel_iterations: Positive Python int.
    swap_memory: A Python boolean
    sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].

  Returns:
    Tuple (final_outputs, final_state).
    final_outputs:
      A `Tensor` of shape [time, batch_size, depth]`.
    final_state:
      A `Tensor` of shape [batch_size, depth].

  Raises:
    ValueError: If the input depth cannot be inferred via shape inference
      from the inputs.
  """
    state = initial_state
    assert isinstance(parallel_iterations,
                      int), "parallel_iterations must be int"

    # Construct an initial output
    input_shape = array_ops.shape(inputs)
    (time_steps, batch_size, _) = array_ops.unpack(input_shape, 3)

    inputs_got_shape = inputs.get_shape().with_rank(3)
    (const_time_steps, const_batch_size,
     const_depth) = inputs_got_shape.as_list()

    if const_depth is None:
        raise ValueError(
            "Input size (depth of inputs) must be accessible via shape inference, "
            "but saw value None.")

    # Prepare dynamic conditional copying of state & output
    zero_output = array_ops.zeros(
        array_ops.pack([batch_size, cell.output_size]), inputs.dtype)
    if sequence_length is not None:
        min_sequence_length = math_ops.reduce_min(sequence_length)
        max_sequence_length = math_ops.reduce_max(sequence_length)

    time = array_ops.constant(0, dtype=dtypes.int32, name="time")

    with ops.name_scope("dynamic_rnn", None, []) as scope:
        base_name = scope

    output_ta = tensor_array_ops.TensorArray(dtype=inputs.dtype,
                                             size=time_steps,
                                             tensor_array_name=base_name +
                                             "output")

    input_ta = tensor_array_ops.TensorArray(dtype=inputs.dtype,
                                            size=time_steps,
                                            tensor_array_name=base_name +
                                            "input")

    if isinstance(ff_keep_prob, ops.Tensor) or ff_keep_prob < 1:
        inputs = nn_ops.dropout(inputs,
                                ff_keep_prob,
                                noise_shape=array_ops.pack(
                                    [1, batch_size, const_depth]))
    input_ta = input_ta.unpack(inputs)

    if isinstance(recur_keep_prob, ops.Tensor) or recur_keep_prob < 1:
        ones = array_ops.ones(array_ops.pack([batch_size, cell.output_size]),
                              inputs.dtype)
        state_dropout = nn_ops.dropout(ones, recur_keep_prob)
        state_dropout = array_ops.concat(
            1, [ones] * (cell.state_size // cell.output_size - 1) +
            [state_dropout])
    else:
        state_dropout = 1.

    def _time_step(time, state, output_ta_t):
        """Take a time step of the dynamic RNN.

    Args:
      time: int32 scalar Tensor.
      state: Vector.
      output_ta_t: `TensorArray`, the output with existing flow.

    Returns:
      The tuple (time + 1, new_state, output_ta_t with updated flow).
    """

        input_t = input_ta.read(time)
        # Restore some shape information
        input_t.set_shape([const_batch_size, const_depth])

        call_cell = lambda: cell(input_t, state * state_dropout)

        if sequence_length is not None:
            (output,
             new_state) = _rnn_step(time=time,
                                    sequence_length=sequence_length,
                                    min_sequence_length=min_sequence_length,
                                    max_sequence_length=max_sequence_length,
                                    zero_output=zero_output,
                                    state=state,
                                    call_cell=call_cell,
                                    skip_conditionals=True)
        else:
            (output, new_state) = call_cell()

        output_ta_t = output_ta_t.write(time, output)

        return (time + 1, new_state, output_ta_t)

    (_, final_state, output_final_ta) = control_flow_ops.while_loop(
        cond=lambda time, _1, _2: time < time_steps,
        body=_time_step,
        loop_vars=(time, state, output_ta),
        parallel_iterations=parallel_iterations,
        swap_memory=swap_memory)

    final_outputs = output_final_ta.pack()
    # Restore some shape information
    final_outputs.set_shape(
        [const_time_steps, const_batch_size, cell.output_size])

    return (final_outputs, final_state)
 def _min_matrix_dim_tensor(self):
     """Minimum of domain/range dimension, as a tensor."""
     return math_ops.reduce_min(self.shape_tensor()[-2:])
Beispiel #52
0
def masked_minimum(data, mask, dim=1):
    axis_maximums = math_ops.reduce_max(data, dim, keepdims=True)
    masked_minimums = math_ops.reduce_min(
        math_ops.multiply(data - axis_maximums,
                          mask), dim, keepdims=True) + axis_maximums
    return masked_minimums
 def tpu_fn(x, y):
     a = x + 7.0
     b = y * 2.0
     c, d, e = tpu.outside_compilation(outside_fn, a, b)
     return (math_ops.reduce_max(c) + math_ops.reduce_min(d) +
             math_ops.reduce_sum(e))
Beispiel #54
0
def static_rnn(cell,
               inputs,
               initial_state=None,
               dtype=None,
               sequence_length=None,
               scope=None):
    """Creates a recurrent neural network specified by RNNCell `cell`.

  The simplest form of RNN network generated is:

  ```python
    state = cell.zero_state(...)
    outputs = []
    for input_ in inputs:
      output, state = cell(input_, state)
      outputs.append(output)
    return (outputs, state)
  ```
  However, a few other options are available:

  An initial state can be provided.
  If the sequence_length vector is provided, dynamic calculation is performed.
  This method of calculation does not compute the RNN steps past the maximum
  sequence length of the minibatch (thus saving computational time),
  and properly propagates the state at an example's sequence length
  to the final state output.

  The dynamic calculation performed is, at time `t` for batch row `b`,

  ```python
    (output, state)(b, t) =
      (t >= sequence_length(b))
        ? (zeros(cell.output_size), states(b, sequence_length(b) - 1))
        : cell(input(b, t), state(b, t - 1))
  ```

  Args:
    cell: An instance of RNNCell.
    inputs: A length T list of inputs, each a `Tensor` of shape
      `[batch_size, input_size]`, or a nested tuple of such elements.
    initial_state: (optional) An initial state for the RNN.
      If `cell.state_size` is an integer, this must be
      a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
      If `cell.state_size` is a tuple, this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell.state_size`.
    dtype: (optional) The data type for the initial state and expected output.
      Required if initial_state is not provided or RNN state has a heterogeneous
      dtype.
    sequence_length: Specifies the length of each sequence in inputs.
      An int32 or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`.
    scope: VariableScope for the created subgraph; defaults to "rnn".

  Returns:
    A pair (outputs, state) where:

    - outputs is a length T list of outputs (one for each input), or a nested
      tuple of such elements.
    - state is the final state

  Raises:
    TypeError: If `cell` is not an instance of RNNCell.
    ValueError: If `inputs` is `None` or an empty list, or if the input depth
      (column size) cannot be inferred from inputs via shape inference.
  """

    if not isinstance(cell, RNNCell):
        raise TypeError("cell must be an instance of RNNCell")
    if not nest.is_sequence(inputs):
        raise TypeError("inputs must be a sequence")
    if not inputs:
        raise ValueError("inputs must not be empty")

    outputs = []
    # Create a new scope in which the caching device is either
    # determined by the parent scope, or is set to place the cached
    # Variable using the same placement as for the rest of the RNN.
    with vs.variable_scope(scope or "rnn") as varscope:
        if varscope.caching_device is None:
            varscope.set_caching_device(lambda op: op.device)

        # Obtain the first sequence of the input
        first_input = inputs
        while nest.is_sequence(first_input):
            first_input = first_input[0]

        # Temporarily avoid EmbeddingWrapper and seq2seq badness
        # TODO(lukaszkaiser): remove EmbeddingWrapper
        if first_input.get_shape().ndims != 1:

            input_shape = first_input.get_shape().with_rank_at_least(2)
            fixed_batch_size = input_shape[0]

            flat_inputs = nest.flatten(inputs)
            for flat_input in flat_inputs:
                input_shape = flat_input.get_shape().with_rank_at_least(2)
                batch_size, input_size = input_shape[0], input_shape[1:]
                fixed_batch_size.merge_with(batch_size)
                for i, size in enumerate(input_size):
                    if size.value is None:
                        raise ValueError(
                            "Input size (dimension %d of inputs) must be accessible via "
                            "shape inference, but saw value None." % i)
        else:
            fixed_batch_size = first_input.get_shape().with_rank_at_least(1)[0]

        if fixed_batch_size.value:
            batch_size = fixed_batch_size.value
        else:
            batch_size = array_ops.shape(first_input)[0]
        if initial_state is not None:
            state = initial_state
        else:
            if not dtype:
                raise ValueError("If no initial_state is provided, "
                                 "dtype must be specified")
            state = cell.zero_state(batch_size, dtype)

        if sequence_length is not None:  # Prepare variables
            sequence_length = ops.convert_to_tensor(sequence_length,
                                                    name="sequence_length")
            if sequence_length.get_shape().ndims not in (None, 1):
                raise ValueError(
                    "sequence_length must be a vector of length batch_size")

            def _create_zero_output(output_size):
                # convert int to TensorShape if necessary
                size = _state_size_with_prefix(output_size,
                                               prefix=[batch_size])
                output = array_ops.zeros(array_ops.stack(size),
                                         _infer_state_dtype(dtype, state))
                shape = _state_size_with_prefix(
                    output_size, prefix=[fixed_batch_size.value])
                output.set_shape(tensor_shape.TensorShape(shape))
                return output

            output_size = cell.output_size
            flat_output_size = nest.flatten(output_size)
            flat_zero_output = tuple(
                _create_zero_output(size) for size in flat_output_size)
            zero_output = nest.pack_sequence_as(structure=output_size,
                                                flat_sequence=flat_zero_output)

            sequence_length = math_ops.to_int32(sequence_length)
            min_sequence_length = math_ops.reduce_min(sequence_length)
            max_sequence_length = math_ops.reduce_max(sequence_length)

        for time, input_ in enumerate(inputs):
            if time > 0: varscope.reuse_variables()
            # pylint: disable=cell-var-from-loop
            call_cell = lambda: cell(input_, state)
            # pylint: enable=cell-var-from-loop
            if sequence_length is not None:
                (output, state, wt, fi,
                 ti) = _rnn_step(time=time,
                                 sequence_length=sequence_length,
                                 min_sequence_length=min_sequence_length,
                                 max_sequence_length=max_sequence_length,
                                 zero_output=zero_output,
                                 state=state,
                                 call_cell=call_cell,
                                 state_size=cell.state_size)
            else:
                (output, state, wt, fi, ti) = call_cell()

            outputs.append(output)

        return (outputs, state, wt, fi, ti)
Beispiel #55
0
    def _finish(self, state):
        var_dtype = self._variables[0].dtype.base_dtype
        # Update global step.
        global_step = self._get_global_step(state)
        update_global_step = state_ops.assign_add(global_step, 1.)

        # Update the first moment estimate.
        beta1 = state.get_hyper("beta1", dtype=var_dtype)
        moment1 = self._get_moment1(state)
        flat_grad = self._get_flat_grad(state)
        # moment1_t := beta1 * moment1_{t-1} + (1 - beta1) * flat_grad_t
        update_moment1 = moment1.assign(beta1 * moment1 +
                                        (1. - beta1) * flat_grad)

        # Update the gradient buffer.
        window = state.get_hyper("window")
        grad_buffer = self._get_grad_buffer(state)
        next_grad_index = math_ops.floormod(
            math_ops.to_int32(update_global_step - 1.), window)
        # grad_buffer[(t-1) % window] := moment1_t
        update_grad_buffer = state_ops.scatter_update(grad_buffer,
                                                      next_grad_index,
                                                      update_moment1)

        # Compute the update step.
        eps = state.get_hyper("eps", dtype=var_dtype)
        svd_eps = state.get_hyper("svd_eps", dtype=var_dtype)
        sigma_eps = state.get_hyper("sigma_eps", dtype=var_dtype)
        lr = state.get_hyper("lr", dtype=var_dtype)
        denom = math_ops.sqrt(
            math_ops.minimum(
                ops.convert_to_tensor(update_global_step),
                ops.convert_to_tensor(math_ops.cast(window, dtype=var_dtype))))
        moment1_2d = array_ops.expand_dims(update_moment1, -1)

        # m = grad_buffer^T / sqrt(min(t, window))
        # m has shape [model dimension, window], where model dimension is the sum
        # of the dimensions of the flattened variables.
        m = array_ops.transpose(math_ops.divide(update_grad_buffer, denom))

        # sigma, u, _ = SVD(m^Tm + I * svd_eps)
        mm = math_ops.matmul(m, m, transpose_a=True)
        damping = math_ops.cast(linalg_ops.eye(window),
                                dtype=var_dtype) * svd_eps
        sigma, u, _ = linalg_ops.svd(mm + damping)
        sigma_sqrt = math_ops.sqrt(sigma)
        sigma_sqrt_min = math_ops.reduce_min(sigma_sqrt)

        # sigma_sqrt_inv = 1 / (\sqrt{sigma} + sigma_eps) ^ 3
        # We add sigma_eps to alleviate numerical instability.
        # Note that (m^Tm)^(-3/2) = u diag(sigma_sqrt_inv) u^T.
        sigma_sqrt_inv = math_ops.divide(
            math_ops.cast(1.0, dtype=var_dtype),
            math_ops.pow(sigma_sqrt + sigma_eps, 3))

        # In full matrix AdaGrad, the update step computes (mm^T)^(-1/2)g, where the
        # inversion of a model dimension by model dimension matrix is needed. To
        # speed up this computation we calculate the following instead:
        # m(m^Tm)^(-3/2)m^T moment1 = m u diag(sigma_sqrt_inv) u^T m^T moment1.
        new_step = array_ops.expand_dims(
            array_ops.zeros(flat_grad.get_shape(), dtype=var_dtype), -1)
        head = math_ops.matmul(
            m,
            math_ops.matmul(
                u,
                math_ops.matmul(
                    array_ops.diag(sigma_sqrt_inv),
                    math_ops.matmul(u,
                                    math_ops.matmul(m,
                                                    moment1_2d,
                                                    transpose_a=True),
                                    transpose_a=True))))

        # When inverting (mm^t)^(1/2), we also add epsilon * I regularization for
        # degenerate cases. We expand ((mm^t)^(1/2) + epsilon * I)^(-1) using
        # Woodbury's identity.
        # For full derivation please see paper at
        # https://arxiv.org/pdf/1806.02958.pdf
        tail = moment1_2d - math_ops.matmul(
            m,
            math_ops.matmul(
                u,
                math_ops.matmul(
                    array_ops.diag(
                        math_ops.divide(math_ops.cast(1.0, dtype=var_dtype),
                                        sigma)),
                    math_ops.matmul(u,
                                    math_ops.matmul(
                                        m, moment1_2d, transpose_a=True),
                                    transpose_a=True))))
        scaled_tail = math_ops.divide(tail, sigma_sqrt_min)

        update_new_step = control_flow_ops.cond(
            sigma_sqrt_min > eps, lambda: math_ops.add(head, scaled_tail),
            lambda: math_ops.add(new_step, head))

        # Update each variable.
        update_step = []
        for var in self._variables:
            dim = self.shape_dict[var.name]
            start_index = self.index_dict[var.name]
            end_index = start_index + dim
            var_update_correct_shape = array_ops.reshape(
                update_new_step[start_index:end_index], var.get_shape())
            var_updated = state_ops.assign_sub(var,
                                               lr * var_update_correct_shape)
            update_step.append(var_updated)

        return control_flow_ops.group(update_step)
Beispiel #56
0
def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs):
  """Calculates the acceptance probabilities and mixing ratio.

  In this case, we assume that we can *either* sample from the original data
  distribution with probability `m`, or sample from a reshaped distribution
  that comes from rejection sampling on the original distribution. This
  rejection sampling is done on a per-class basis, with `a_i` representing the
  probability of accepting data from class `i`.

  This method is based on solving the following analysis for the reshaped
  distribution:

  Let F be the probability of a rejection (on any example).
  Let p_i be the proportion of examples in the data in class i (init_probs)
  Let a_i is the rate the rejection sampler should *accept* class i
  Let t_i is the target proportion in the minibatches for class i (target_probs)

  ```
  F = sum_i(p_i * (1-a_i))
    = 1 - sum_i(p_i * a_i)     using sum_i(p_i) = 1
  ```

  An example with class `i` will be accepted if `k` rejections occur, then an
  example with class `i` is seen by the rejector, and it is accepted. This can
  be written as follows:

  ```
  t_i = sum_k=0^inf(F^k * p_i * a_i)
      = p_i * a_j / (1 - F)    using geometric series identity, since 0 <= F < 1
      = p_i * a_i / sum_j(p_j * a_j)        using F from above
  ```

  Note that the following constraints hold:
  ```
  0 <= p_i <= 1, sum_i(p_i) = 1
  0 <= a_i <= 1
  0 <= t_i <= 1, sum_i(t_i) = 1
  ```

  A solution for a_i in terms of the other variables is the following:
    ```a_i = (t_i / p_i) / max_i[t_i / p_i]```

  If we try to minimize the amount of data rejected, we get the following:

  M_max = max_i [ t_i / p_i ]
  M_min = min_i [ t_i / p_i ]

  The desired probability of accepting data if it comes from class `i`:

  a_i = (t_i/p_i - m) / (M_max - m)

  The desired probability of pulling a data element from the original dataset,
  rather than the filtered one:

  m = M_min

  Args:
    initial_probs: A Tensor of the initial probability distribution, given or
      estimated.
    target_probs: A Tensor of the corresponding classes.

  Returns:
    (A 1D Tensor with the per-class acceptance probabilities, the desired
    probability of pull from the original distribution.)
  """
  ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs)
  max_ratio = math_ops.reduce_max(ratio_l)
  min_ratio = math_ops.reduce_min(ratio_l)

  # Target prob to sample from original distribution.
  m = min_ratio

  # TODO(joelshor): Simplify fraction, if possible.
  a_i = (ratio_l - m) / (max_ratio - m)
  return a_i, m
Beispiel #57
0
def _dynamic_rnn_loop(cell, inputs, initial_state, parallel_iterations, swap_memory, sequence_length=None):
    """Internal implementation of Dynamic RNN.

  Args:
    cell: An instance of RNNCell.
    inputs: A `Tensor` of shape [time, batch_size, input_size].
    initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if
      `cell.state_size` is a tuple, then this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell.state_size`.
    parallel_iterations: Positive Python int.
    swap_memory: A Python boolean
    sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].

  Returns:
    Tuple `(final_outputs, final_state)`.
    final_outputs:
      A `Tensor` of shape `[time, batch_size, cell.output_size]`.
    final_state:
      A `Tensor` matrix, or tuple of such matrices, matching in length
      and shapes to `initial_state`.

  Raises:
    ValueError: If the input depth cannot be inferred via shape inference
      from the inputs.
  """
    state = initial_state
    assert isinstance(parallel_iterations, int), "parallel_iterations must be int"

    # Construct an initial output
    input_shape = array_ops.shape(inputs)
    time_steps = input_shape[0]
    batch_size = input_shape[1]

    inputs_got_shape = inputs.get_shape().with_rank_at_least(3).as_list()
    const_time_steps = inputs_got_shape[0]
    const_batch_size = inputs_got_shape[1]
    const_depth = inputs_got_shape[2:]

    if const_depth is None:
        raise ValueError("Input size (depth of inputs) must be accessible via shape inference, " "but saw value None.")

    # Prepare dynamic conditional copying of state & output
    zeros_size = _state_size_with_prefix(cell.output_size, prefix=[batch_size])
    zero_output = array_ops.zeros(array_ops.pack(zeros_size), inputs.dtype)

    if sequence_length is not None:
        min_sequence_length = math_ops.reduce_min(sequence_length)
        max_sequence_length = math_ops.reduce_max(sequence_length)

    time = array_ops.constant(0, dtype=dtypes.int32, name="time")

    state_size = cell.state_size
    state_is_tuple = _is_sequence(state_size)

    state = _unpacked_state(state) if state_is_tuple else (state,)

    with ops.op_scope([], "dynamic_rnn") as scope:
        base_name = scope

    output_ta = tensor_array_ops.TensorArray(
        dtype=inputs.dtype, size=time_steps, tensor_array_name=base_name + "output"
    )

    input_ta = tensor_array_ops.TensorArray(dtype=inputs.dtype, size=time_steps, tensor_array_name=base_name + "input")

    input_ta = input_ta.unpack(inputs)

    def _time_step(time, output_ta_t, *state):
        """Take a time step of the dynamic RNN.

    Args:
      time: int32 scalar Tensor.
      output_ta_t: `TensorArray`, the output with existing flow.
      *state: List of vector tensors.

    Returns:
      The tuple (time + 1, output_ta_t with updated flow) + new_state.
    """

        input_t = input_ta.read(time)
        # Restore some shape information
        input_t.set_shape([const_batch_size] + const_depth)

        # Pack state back up for use by cell
        state = _packed_state(structure=state_size, state=state) if state_is_tuple else state[0]

        call_cell = lambda: cell(input_t, state)

        if sequence_length is not None:
            (output, new_state) = _rnn_step(
                time=time,
                sequence_length=sequence_length,
                min_sequence_length=min_sequence_length,
                max_sequence_length=max_sequence_length,
                zero_output=zero_output,
                state=state,
                call_cell=call_cell,
                state_size=state_size,
                skip_conditionals=True,
            )
        else:
            (output, new_state) = call_cell()

        # Pack state if using state tuples
        new_state = tuple(_unpacked_state(new_state)) if state_is_tuple else (new_state,)

        output_ta_t = output_ta_t.write(time, output)

        return (time + 1, output_ta_t) + new_state

    final_loop_vars = control_flow_ops.while_loop(
        cond=lambda time, *_: time < time_steps,
        body=_time_step,
        loop_vars=(time, output_ta) + tuple(state),
        parallel_iterations=parallel_iterations,
        swap_memory=swap_memory,
    )

    (output_final_ta, final_state) = (final_loop_vars[1], final_loop_vars[2:])

    final_outputs = output_final_ta.pack()
    # Restore some shape information
    final_outputs_size = _state_size_with_prefix(cell.output_size, prefix=[const_time_steps, const_batch_size])
    final_outputs.set_shape(final_outputs_size)

    # Unpack final state if not using state tuples.
    final_state = _packed_state(structure=cell.state_size, state=final_state) if state_is_tuple else final_state[0]
    return (final_outputs, final_state)
Beispiel #58
0
def _dynamic_rnn_loop(
    cell, inputs, initial_state, parallel_iterations, swap_memory,
    sequence_length=None):
  """Internal implementation of Dynamic RNN.

  Args:
    cell: An instance of RNNCell.
    inputs: A `Tensor` of shape [time, batch_size, depth].
    initial_state: A `Tensor` of shape [batch_size, depth].
    parallel_iterations: Positive Python int.
    swap_memory: A Python boolean
    sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].

  Returns:
    Tuple (final_outputs, final_state).
    final_outputs:
      A `Tensor` of shape [time, batch_size, depth]`.
    final_state:
      A `Tensor` of shape [batch_size, depth].

  Raises:
    ValueError: If the input depth cannot be inferred via shape inference
      from the inputs.
  """
  state = initial_state
  assert isinstance(parallel_iterations, int), "parallel_iterations must be int"

  # Construct an initial output
  input_shape = array_ops.shape(inputs)
  (time_steps, batch_size, _) = array_ops.unpack(input_shape, 3)

  inputs_got_shape = inputs.get_shape().with_rank(3)
  (const_time_steps, const_batch_size, const_depth) = inputs_got_shape.as_list()

  if const_depth is None:
    raise ValueError(
        "Input size (depth of inputs) must be accessible via shape inference, "
        "but saw value None.")

  # Prepare dynamic conditional copying of state & output
  zero_output = array_ops.zeros(
      array_ops.pack([batch_size, cell.output_size]), inputs.dtype)
  if sequence_length is not None:
    min_sequence_length = math_ops.reduce_min(sequence_length)
    max_sequence_length = math_ops.reduce_max(sequence_length)

  time = array_ops.constant(0, dtype=dtypes.int32, name="time")

  with ops.op_scope([], "dynamic_rnn") as scope:
    base_name = scope

  output_ta = tensor_array_ops.TensorArray(
      dtype=inputs.dtype, size=time_steps,
      tensor_array_name=base_name + "output")

  input_ta = tensor_array_ops.TensorArray(
      dtype=inputs.dtype, size=time_steps,
      tensor_array_name=base_name + "input")

  input_ta = input_ta.unpack(inputs)

  def _time_step(time, state, output_ta_t):
    """Take a time step of the dynamic RNN.

    Args:
      time: int32 scalar Tensor.
      state: Vector.
      output_ta_t: `TensorArray`, the output with existing flow.

    Returns:
      The tuple (time + 1, new_state, output_ta_t with updated flow).
    """

    input_t = input_ta.read(time)
    # Restore some shape information
    input_t.set_shape([const_batch_size, const_depth])

    call_cell = lambda: cell(input_t, state)

    if sequence_length is not None:
      (output, new_state) = _rnn_step(
          time=time,
          sequence_length=sequence_length,
          min_sequence_length=min_sequence_length,
          max_sequence_length=max_sequence_length,
          zero_output=zero_output,
          state=state,
          call_cell=call_cell,
          skip_conditionals=True)
    else:
      (output, new_state) = call_cell()

    output_ta_t = output_ta_t.write(time, output)

    return (time + 1, new_state, output_ta_t)

  (unused_final_time, final_state, output_final_ta) = control_flow_ops.While(
      cond=lambda time, _1, _2: time < time_steps,
      body=_time_step,
      loop_vars=(time, state, output_ta),
      parallel_iterations=parallel_iterations,
      swap_memory=swap_memory)

  final_outputs = output_final_ta.pack()
  # Restore some shape information
  final_outputs.set_shape([
      const_time_steps, const_batch_size, cell.output_size])

  return (final_outputs, final_state)