Example #1
0
def Counter(start=0, step=1, dtype=dtypes.int64):
  """Creates a `Dataset` that counts from `start` in steps of size `step`.

  For example:

  ```python
  Dataset.count() == [0, 1, 2, ...)
  Dataset.count(2) == [2, 3, ...)
  Dataset.count(2, 5) == [2, 7, 12, ...)
  Dataset.count(0, -1) == [0, -1, -2, ...)
  Dataset.count(10, -1) == [10, 9, ...)
  ```

  Args:
    start: (Optional.) The starting value for the counter. Defaults to 0.
    step: (Optional.) The step size for the counter. Defaults to 1.
    dtype: (Optional.) The data type for counter elements. Defaults to
      `tf.int64`.

  Returns:
    A `Dataset` of scalar `dtype` elements.
  """
  with ops.name_scope("counter"):
    start = ops.convert_to_tensor(start, dtype=dtype, name="start")
    step = ops.convert_to_tensor(step, dtype=dtype, name="step")
    return dataset_ops.Dataset.from_tensors(0).repeat(None).apply(
        scan_ops.scan(start, lambda state, _: (state + step, state)))
  def testChangingStateShape(self):
    # Test the fixed-point shape invariant calculations: start with
    # initial values with known shapes, and use a scan function that
    # changes the size of the state on each element.
    def _scan_fn(state, input_value):
      # Statically known rank, but dynamic length.
      ret_longer_vector = array_ops.concat([state[0], state[0]], 0)
      # Statically unknown rank.
      ret_larger_rank = array_ops.expand_dims(state[1], 0)
      return (ret_longer_vector, ret_larger_rank), (state, input_value)

    dataset = dataset_ops.Dataset.from_tensors(0).repeat(5).apply(
        scan_ops.scan(([0], 1), _scan_fn))
    self.assertEqual([None], dataset.output_shapes[0][0].as_list())
    self.assertIs(None, dataset.output_shapes[0][1].ndims)
    self.assertEqual([], dataset.output_shapes[1].as_list())

    iterator = dataset.make_one_shot_iterator()
    next_element = iterator.get_next()

    with self.test_session() as sess:
      for i in range(5):
        (longer_vector_val, larger_rank_val), _ = sess.run(next_element)
        self.assertAllEqual([0] * (2**i), longer_vector_val)
        self.assertAllEqual(np.array(1, ndmin=i), larger_rank_val)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)
Example #3
0
def Counter(start=0, step=1, dtype=dtypes.int64):
  """Creates a `Dataset` of a `step`-separated count startin from `start`.

  For example:

  ```python
  Dataset.count() == [0, 1, 2, ...)
  Dataset.count(2) == [2, 3, ...)
  Dataset.count(2, 5) == [2, 7, 12, ...)
  Dataset.count(0, -1) == [0, -1, -2, ...)
  Dataset.count(10, -1) == [10, 9, ...)
  ```

  Args:
    start: starting value for count.
    step: step size.
    dtype: counter data type.

  Returns:
    A `Dataset` of scalar elements.
  """
  with ops.name_scope("counter"):
    start = ops.convert_to_tensor(start, dtype=dtype, name="start")
    step = ops.convert_to_tensor(step, dtype=dtype, name="step")
    return dataset_ops.Dataset.from_tensors(0).repeat(None).apply(
        scan_ops.scan(start, lambda state, _: (state + step, state)))
  def testChangingStateShape(self):
    # Test the fixed-point shape invariant calculations: start with
    # initial values with known shapes, and use a scan function that
    # changes the size of the state on each element.
    def _scan_fn(state, input_value):
      # Statically known rank, but dynamic length.
      ret_longer_vector = array_ops.concat([state[0], state[0]], 0)
      # Statically unknown rank.
      ret_larger_rank = array_ops.expand_dims(state[1], 0)
      return (ret_longer_vector, ret_larger_rank), (state, input_value)

    dataset = dataset_ops.Dataset.from_tensors(0).repeat(5).apply(
        scan_ops.scan(([0], 1), _scan_fn))
    self.assertEqual([None], dataset.output_shapes[0][0].as_list())
    self.assertIs(None, dataset.output_shapes[0][1].ndims)
    self.assertEqual([], dataset.output_shapes[1].as_list())

    iterator = dataset.make_one_shot_iterator()
    next_element = iterator.get_next()

    with self.cached_session() as sess:
      for i in range(5):
        (longer_vector_val, larger_rank_val), _ = sess.run(next_element)
        self.assertAllEqual([0] * (2**i), longer_vector_val)
        self.assertAllEqual(np.array(1, ndmin=i), larger_rank_val)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)
Example #5
0
def Counter(start=0, step=1, dtype=dtypes.int64):
    """Creates a `Dataset` that counts from `start` in steps of size `step`.

  For example:

  ```python
  Dataset.count() == [0, 1, 2, ...)
  Dataset.count(2) == [2, 3, ...)
  Dataset.count(2, 5) == [2, 7, 12, ...)
  Dataset.count(0, -1) == [0, -1, -2, ...)
  Dataset.count(10, -1) == [10, 9, ...)
  ```

  Args:
    start: (Optional.) The starting value for the counter. Defaults to 0.
    step: (Optional.) The step size for the counter. Defaults to 1.
    dtype: (Optional.) The data type for counter elements. Defaults to
      `tf.int64`.

  Returns:
    A `Dataset` of scalar `dtype` elements.
  """
    with ops.name_scope("counter"):
        start = ops.convert_to_tensor(start, dtype=dtype, name="start")
        step = ops.convert_to_tensor(step, dtype=dtype, name="step")
        return dataset_ops.Dataset.from_tensors(0).repeat(None).apply(
            scan_ops.scan(start, lambda state, _: (state + step, state)))
Example #6
0
  def _apply_fn(dataset):
    """Function from `Dataset` to `Dataset` that applies the transformation."""
    dist_estimation_batch_size = 32
    target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist")
    class_values_ds = dataset.map(class_func)
    if initial_dist is not None:
      initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist")
      acceptance_dist = _calculate_acceptance_probs(initial_dist_t,
                                                    target_dist_t)
      initial_dist_ds = dataset_ops.Dataset.from_tensors(
          initial_dist_t).repeat()
      acceptance_dist_ds = dataset_ops.Dataset.from_tensors(
          acceptance_dist).repeat()
    else:
      num_classes = (target_dist_t.shape[0].value or
                     array_ops.shape(target_dist_t)[0])
      smoothing_constant = 10
      initial_examples_per_class_seen = array_ops.fill(
          [num_classes], np.int64(smoothing_constant))

      def update_estimate_and_tile(num_examples_per_class_seen, c):
        updated_examples_per_class_seen, dist = _estimate_data_distribution(
            c, num_examples_per_class_seen)
        tiled_dist = array_ops.tile(
            array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
        return updated_examples_per_class_seen, tiled_dist

      initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
                         .apply(scan_ops.scan(initial_examples_per_class_seen,
                                              update_estimate_and_tile))
                         .apply(batching.unbatch()))
      acceptance_dist_ds = initial_dist_ds.map(
          lambda initial: _calculate_acceptance_probs(initial, target_dist_t))

    def maybe_warn_on_large_rejection(accept_dist, initial_dist):
      proportion_rejected = math_ops.reduce_sum(
          (1 - accept_dist) * initial_dist)
      return control_flow_ops.cond(
          math_ops.less(proportion_rejected, .5),
          lambda: accept_dist,
          lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
              accept_dist, [proportion_rejected, initial_dist, accept_dist],
              message="Proportion of examples rejected by sampler is high: ",
              summarize=100,
              first_n=10))

    acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds,
                                                   initial_dist_ds))
                          .map(maybe_warn_on_large_rejection))

    def _gather_and_copy(class_val, acceptance_prob, data):
      return (class_val, array_ops.gather(acceptance_prob, class_val), data)
    current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip(
        (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy)
    filtered_ds = (
        current_probabilities_and_class_and_data_ds
        .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p))
    return filtered_ds.map(lambda class_value, _, data: (class_value, data))
  def _apply_fn(dataset):
    """Function from `Dataset` to `Dataset` that applies the transformation."""
    dist_estimation_batch_size = 32
    target_dist_t = ops.convert_to_tensor(target_dist, name="initial_dist")
    class_values_ds = dataset.map(class_func)
    if initial_dist is not None:
      initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist")
      acceptance_dist = _calculate_acceptance_probs(initial_dist_t,
                                                    target_dist_t)
      initial_dist_ds = dataset_ops.Dataset.from_tensors(
          initial_dist_t).repeat()
      acceptance_dist_ds = dataset_ops.Dataset.from_tensors(
          acceptance_dist).repeat()
    else:
      num_classes = (target_dist_t.shape[0].value or
                     array_ops.shape(target_dist_t)[0])
      smoothing_constant = 10
      initial_examples_per_class_seen = array_ops.fill(
          [num_classes], np.int64(smoothing_constant))

      def update_estimate_and_tile(num_examples_per_class_seen, c):
        updated_examples_per_class_seen, dist = _estimate_data_distribution(
            c, num_examples_per_class_seen)
        tiled_dist = array_ops.tile(
            array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
        return updated_examples_per_class_seen, tiled_dist

      initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
                         .apply(scan_ops.scan(initial_examples_per_class_seen,
                                              update_estimate_and_tile))
                         .apply(batching.unbatch()))
      acceptance_dist_ds = initial_dist_ds.map(
          lambda initial: _calculate_acceptance_probs(initial, target_dist_t))

    def maybe_warn_on_large_rejection(accept_dist, initial_dist):
      proportion_rejected = math_ops.reduce_sum(
          (1 - accept_dist) * initial_dist)
      return control_flow_ops.cond(
          math_ops.less(proportion_rejected, .5),
          lambda: accept_dist,
          lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
              accept_dist, [proportion_rejected, initial_dist, accept_dist],
              message="Proportion of examples rejected by sampler is high: ",
              summarize=100,
              first_n=10))

    acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds,
                                                   initial_dist_ds))
                          .map(maybe_warn_on_large_rejection))

    current_probabilities_ds = dataset_ops.Dataset.zip(
        (acceptance_dist_ds, class_values_ds)).map(array_ops.gather)
    filtered_ds = (
        dataset_ops.Dataset.zip((class_values_ds, current_probabilities_ds,
                                 dataset))
        .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p))
    return filtered_ds.map(lambda class_value, _, data: (class_value, data))
  def testIncorrectStateType(self):

    def _scan_fn(state, _):
      return constant_op.constant(1, dtype=dtypes.int64), state

    dataset = dataset_ops.Dataset.range(10)
    with self.assertRaisesRegexp(
        TypeError,
        "The element types for the new state must match the initial state."):
      dataset.apply(
          scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn))
  def testIncorrectStateType(self):

    def _scan_fn(state, _):
      return constant_op.constant(1, dtype=dtypes.int64), state

    dataset = dataset_ops.Dataset.range(10)
    with self.assertRaisesRegexp(
        TypeError,
        "The element types for the new state must match the initial state."):
      dataset.apply(
          scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn))
  def testIncorrectReturnType(self):

    def _scan_fn(unused_state, unused_input_value):
      return constant_op.constant(1, dtype=dtypes.int64)

    dataset = dataset_ops.Dataset.range(10)
    with self.assertRaisesRegexp(
        TypeError,
        "The scan function must return a pair comprising the new state and the "
        "output value."):
      dataset.apply(
          scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn))
  def testIncorrectReturnType(self):

    def _scan_fn(unused_state, unused_input_value):
      return constant_op.constant(1, dtype=dtypes.int64)

    dataset = dataset_ops.Dataset.range(10)
    with self.assertRaisesRegexp(
        TypeError,
        "The scan function must return a pair comprising the new state and the "
        "output value."):
      dataset.apply(
          scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn))
  def testFibonacci(self):
    iterator = dataset_ops.Dataset.from_tensors(1).repeat(None).apply(
        scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1]))
    ).make_one_shot_iterator()
    next_element = iterator.get_next()

    with self.test_session() as sess:
      self.assertEqual(1, sess.run(next_element))
      self.assertEqual(1, sess.run(next_element))
      self.assertEqual(2, sess.run(next_element))
      self.assertEqual(3, sess.run(next_element))
      self.assertEqual(5, sess.run(next_element))
      self.assertEqual(8, sess.run(next_element))
Example #13
0
  def testFibonacci(self):
    iterator = dataset_ops.Dataset.from_tensors(1).repeat(None).apply(
        scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1]))
    ).make_one_shot_iterator()

    if context.executing_eagerly():
      next_element = iterator.get_next
    else:
      get_next = iterator.get_next()
      next_element = lambda: get_next

    self.assertEqual(1, self.evaluate(next_element()))
    self.assertEqual(1, self.evaluate(next_element()))
    self.assertEqual(2, self.evaluate(next_element()))
    self.assertEqual(3, self.evaluate(next_element()))
    self.assertEqual(5, self.evaluate(next_element()))
    self.assertEqual(8, self.evaluate(next_element()))
  def testFibonacci(self):
    iterator = dataset_ops.Dataset.from_tensors(1).repeat(None).apply(
        scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1]))
    ).make_one_shot_iterator()

    if context.executing_eagerly():
      next_element = iterator.get_next
    else:
      get_next = iterator.get_next()
      next_element = lambda: get_next

    self.assertEqual(1, self.evaluate(next_element()))
    self.assertEqual(1, self.evaluate(next_element()))
    self.assertEqual(2, self.evaluate(next_element()))
    self.assertEqual(3, self.evaluate(next_element()))
    self.assertEqual(5, self.evaluate(next_element()))
    self.assertEqual(8, self.evaluate(next_element()))
Example #15
0
def _estimate_initial_dist_ds(
    target_dist_t, class_values_ds, dist_estimation_batch_size=32,
    smoothing_constant=10):
  num_classes = (target_dist_t.shape[0].value or
                 array_ops.shape(target_dist_t)[0])
  initial_examples_per_class_seen = array_ops.fill(
      [num_classes], np.int64(smoothing_constant))

  def update_estimate_and_tile(num_examples_per_class_seen, c):
    updated_examples_per_class_seen, dist = _estimate_data_distribution(
        c, num_examples_per_class_seen)
    tiled_dist = array_ops.tile(
        array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
    return updated_examples_per_class_seen, tiled_dist

  initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
                     .apply(scan_ops.scan(initial_examples_per_class_seen,
                                          update_estimate_and_tile))
                     .apply(batching.unbatch()))

  return initial_dist_ds
 def _build_dataset(self, num_elements):
   return dataset_ops.Dataset.from_tensors(1).repeat(num_elements).apply(
       scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1])))
 def _counting_dataset(self, start, scan_fn):
   return dataset_ops.Dataset.from_tensors(0).repeat().apply(
       scan_ops.scan(start, scan_fn))
Example #18
0
 def _counting_dataset(self, start, scan_fn):
     return dataset_ops.Dataset.from_tensors(0).repeat().apply(
         scan_ops.scan(start, scan_fn))
 def _count(self, start, step):
   return dataset_ops.Dataset.from_tensors(0).repeat(None).apply(
       scan_ops.scan(start, lambda state, _: (state + step, state)))