def Counter(start=0, step=1, dtype=dtypes.int64): """Creates a `Dataset` that counts from `start` in steps of size `step`. For example: ```python Dataset.count() == [0, 1, 2, ...) Dataset.count(2) == [2, 3, ...) Dataset.count(2, 5) == [2, 7, 12, ...) Dataset.count(0, -1) == [0, -1, -2, ...) Dataset.count(10, -1) == [10, 9, ...) ``` Args: start: (Optional.) The starting value for the counter. Defaults to 0. step: (Optional.) The step size for the counter. Defaults to 1. dtype: (Optional.) The data type for counter elements. Defaults to `tf.int64`. Returns: A `Dataset` of scalar `dtype` elements. """ with ops.name_scope("counter"): start = ops.convert_to_tensor(start, dtype=dtype, name="start") step = ops.convert_to_tensor(step, dtype=dtype, name="step") return dataset_ops.Dataset.from_tensors(0).repeat(None).apply( scan_ops.scan(start, lambda state, _: (state + step, state)))
def testChangingStateShape(self): # Test the fixed-point shape invariant calculations: start with # initial values with known shapes, and use a scan function that # changes the size of the state on each element. def _scan_fn(state, input_value): # Statically known rank, but dynamic length. ret_longer_vector = array_ops.concat([state[0], state[0]], 0) # Statically unknown rank. ret_larger_rank = array_ops.expand_dims(state[1], 0) return (ret_longer_vector, ret_larger_rank), (state, input_value) dataset = dataset_ops.Dataset.from_tensors(0).repeat(5).apply( scan_ops.scan(([0], 1), _scan_fn)) self.assertEqual([None], dataset.output_shapes[0][0].as_list()) self.assertIs(None, dataset.output_shapes[0][1].ndims) self.assertEqual([], dataset.output_shapes[1].as_list()) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() with self.test_session() as sess: for i in range(5): (longer_vector_val, larger_rank_val), _ = sess.run(next_element) self.assertAllEqual([0] * (2**i), longer_vector_val) self.assertAllEqual(np.array(1, ndmin=i), larger_rank_val) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element)
def Counter(start=0, step=1, dtype=dtypes.int64): """Creates a `Dataset` of a `step`-separated count startin from `start`. For example: ```python Dataset.count() == [0, 1, 2, ...) Dataset.count(2) == [2, 3, ...) Dataset.count(2, 5) == [2, 7, 12, ...) Dataset.count(0, -1) == [0, -1, -2, ...) Dataset.count(10, -1) == [10, 9, ...) ``` Args: start: starting value for count. step: step size. dtype: counter data type. Returns: A `Dataset` of scalar elements. """ with ops.name_scope("counter"): start = ops.convert_to_tensor(start, dtype=dtype, name="start") step = ops.convert_to_tensor(step, dtype=dtype, name="step") return dataset_ops.Dataset.from_tensors(0).repeat(None).apply( scan_ops.scan(start, lambda state, _: (state + step, state)))
def testChangingStateShape(self): # Test the fixed-point shape invariant calculations: start with # initial values with known shapes, and use a scan function that # changes the size of the state on each element. def _scan_fn(state, input_value): # Statically known rank, but dynamic length. ret_longer_vector = array_ops.concat([state[0], state[0]], 0) # Statically unknown rank. ret_larger_rank = array_ops.expand_dims(state[1], 0) return (ret_longer_vector, ret_larger_rank), (state, input_value) dataset = dataset_ops.Dataset.from_tensors(0).repeat(5).apply( scan_ops.scan(([0], 1), _scan_fn)) self.assertEqual([None], dataset.output_shapes[0][0].as_list()) self.assertIs(None, dataset.output_shapes[0][1].ndims) self.assertEqual([], dataset.output_shapes[1].as_list()) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() with self.cached_session() as sess: for i in range(5): (longer_vector_val, larger_rank_val), _ = sess.run(next_element) self.assertAllEqual([0] * (2**i), longer_vector_val) self.assertAllEqual(np.array(1, ndmin=i), larger_rank_val) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element)
def Counter(start=0, step=1, dtype=dtypes.int64): """Creates a `Dataset` that counts from `start` in steps of size `step`. For example: ```python Dataset.count() == [0, 1, 2, ...) Dataset.count(2) == [2, 3, ...) Dataset.count(2, 5) == [2, 7, 12, ...) Dataset.count(0, -1) == [0, -1, -2, ...) Dataset.count(10, -1) == [10, 9, ...) ``` Args: start: (Optional.) The starting value for the counter. Defaults to 0. step: (Optional.) The step size for the counter. Defaults to 1. dtype: (Optional.) The data type for counter elements. Defaults to `tf.int64`. Returns: A `Dataset` of scalar `dtype` elements. """ with ops.name_scope("counter"): start = ops.convert_to_tensor(start, dtype=dtype, name="start") step = ops.convert_to_tensor(step, dtype=dtype, name="step") return dataset_ops.Dataset.from_tensors(0).repeat(None).apply( scan_ops.scan(start, lambda state, _: (state + step, state)))
def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" dist_estimation_batch_size = 32 target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist") class_values_ds = dataset.map(class_func) if initial_dist is not None: initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist") acceptance_dist = _calculate_acceptance_probs(initial_dist_t, target_dist_t) initial_dist_ds = dataset_ops.Dataset.from_tensors( initial_dist_t).repeat() acceptance_dist_ds = dataset_ops.Dataset.from_tensors( acceptance_dist).repeat() else: num_classes = (target_dist_t.shape[0].value or array_ops.shape(target_dist_t)[0]) smoothing_constant = 10 initial_examples_per_class_seen = array_ops.fill( [num_classes], np.int64(smoothing_constant)) def update_estimate_and_tile(num_examples_per_class_seen, c): updated_examples_per_class_seen, dist = _estimate_data_distribution( c, num_examples_per_class_seen) tiled_dist = array_ops.tile( array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) return updated_examples_per_class_seen, tiled_dist initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) .apply(scan_ops.scan(initial_examples_per_class_seen, update_estimate_and_tile)) .apply(batching.unbatch())) acceptance_dist_ds = initial_dist_ds.map( lambda initial: _calculate_acceptance_probs(initial, target_dist_t)) def maybe_warn_on_large_rejection(accept_dist, initial_dist): proportion_rejected = math_ops.reduce_sum( (1 - accept_dist) * initial_dist) return control_flow_ops.cond( math_ops.less(proportion_rejected, .5), lambda: accept_dist, lambda: logging_ops.Print( # pylint: disable=g-long-lambda accept_dist, [proportion_rejected, initial_dist, accept_dist], message="Proportion of examples rejected by sampler is high: ", summarize=100, first_n=10)) acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds, initial_dist_ds)) .map(maybe_warn_on_large_rejection)) def _gather_and_copy(class_val, acceptance_prob, data): return (class_val, array_ops.gather(acceptance_prob, class_val), data) current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) filtered_ds = ( current_probabilities_and_class_and_data_ds .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) return filtered_ds.map(lambda class_value, _, data: (class_value, data))
def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" dist_estimation_batch_size = 32 target_dist_t = ops.convert_to_tensor(target_dist, name="initial_dist") class_values_ds = dataset.map(class_func) if initial_dist is not None: initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist") acceptance_dist = _calculate_acceptance_probs(initial_dist_t, target_dist_t) initial_dist_ds = dataset_ops.Dataset.from_tensors( initial_dist_t).repeat() acceptance_dist_ds = dataset_ops.Dataset.from_tensors( acceptance_dist).repeat() else: num_classes = (target_dist_t.shape[0].value or array_ops.shape(target_dist_t)[0]) smoothing_constant = 10 initial_examples_per_class_seen = array_ops.fill( [num_classes], np.int64(smoothing_constant)) def update_estimate_and_tile(num_examples_per_class_seen, c): updated_examples_per_class_seen, dist = _estimate_data_distribution( c, num_examples_per_class_seen) tiled_dist = array_ops.tile( array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) return updated_examples_per_class_seen, tiled_dist initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) .apply(scan_ops.scan(initial_examples_per_class_seen, update_estimate_and_tile)) .apply(batching.unbatch())) acceptance_dist_ds = initial_dist_ds.map( lambda initial: _calculate_acceptance_probs(initial, target_dist_t)) def maybe_warn_on_large_rejection(accept_dist, initial_dist): proportion_rejected = math_ops.reduce_sum( (1 - accept_dist) * initial_dist) return control_flow_ops.cond( math_ops.less(proportion_rejected, .5), lambda: accept_dist, lambda: logging_ops.Print( # pylint: disable=g-long-lambda accept_dist, [proportion_rejected, initial_dist, accept_dist], message="Proportion of examples rejected by sampler is high: ", summarize=100, first_n=10)) acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds, initial_dist_ds)) .map(maybe_warn_on_large_rejection)) current_probabilities_ds = dataset_ops.Dataset.zip( (acceptance_dist_ds, class_values_ds)).map(array_ops.gather) filtered_ds = ( dataset_ops.Dataset.zip((class_values_ds, current_probabilities_ds, dataset)) .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) return filtered_ds.map(lambda class_value, _, data: (class_value, data))
def testIncorrectStateType(self): def _scan_fn(state, _): return constant_op.constant(1, dtype=dtypes.int64), state dataset = dataset_ops.Dataset.range(10) with self.assertRaisesRegexp( TypeError, "The element types for the new state must match the initial state."): dataset.apply( scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn))
def testIncorrectStateType(self): def _scan_fn(state, _): return constant_op.constant(1, dtype=dtypes.int64), state dataset = dataset_ops.Dataset.range(10) with self.assertRaisesRegexp( TypeError, "The element types for the new state must match the initial state."): dataset.apply( scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn))
def testIncorrectReturnType(self): def _scan_fn(unused_state, unused_input_value): return constant_op.constant(1, dtype=dtypes.int64) dataset = dataset_ops.Dataset.range(10) with self.assertRaisesRegexp( TypeError, "The scan function must return a pair comprising the new state and the " "output value."): dataset.apply( scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn))
def testIncorrectReturnType(self): def _scan_fn(unused_state, unused_input_value): return constant_op.constant(1, dtype=dtypes.int64) dataset = dataset_ops.Dataset.range(10) with self.assertRaisesRegexp( TypeError, "The scan function must return a pair comprising the new state and the " "output value."): dataset.apply( scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn))
def testFibonacci(self): iterator = dataset_ops.Dataset.from_tensors(1).repeat(None).apply( scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1])) ).make_one_shot_iterator() next_element = iterator.get_next() with self.test_session() as sess: self.assertEqual(1, sess.run(next_element)) self.assertEqual(1, sess.run(next_element)) self.assertEqual(2, sess.run(next_element)) self.assertEqual(3, sess.run(next_element)) self.assertEqual(5, sess.run(next_element)) self.assertEqual(8, sess.run(next_element))
def testFibonacci(self): iterator = dataset_ops.Dataset.from_tensors(1).repeat(None).apply( scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1])) ).make_one_shot_iterator() if context.executing_eagerly(): next_element = iterator.get_next else: get_next = iterator.get_next() next_element = lambda: get_next self.assertEqual(1, self.evaluate(next_element())) self.assertEqual(1, self.evaluate(next_element())) self.assertEqual(2, self.evaluate(next_element())) self.assertEqual(3, self.evaluate(next_element())) self.assertEqual(5, self.evaluate(next_element())) self.assertEqual(8, self.evaluate(next_element()))
def testFibonacci(self): iterator = dataset_ops.Dataset.from_tensors(1).repeat(None).apply( scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1])) ).make_one_shot_iterator() if context.executing_eagerly(): next_element = iterator.get_next else: get_next = iterator.get_next() next_element = lambda: get_next self.assertEqual(1, self.evaluate(next_element())) self.assertEqual(1, self.evaluate(next_element())) self.assertEqual(2, self.evaluate(next_element())) self.assertEqual(3, self.evaluate(next_element())) self.assertEqual(5, self.evaluate(next_element())) self.assertEqual(8, self.evaluate(next_element()))
def _estimate_initial_dist_ds( target_dist_t, class_values_ds, dist_estimation_batch_size=32, smoothing_constant=10): num_classes = (target_dist_t.shape[0].value or array_ops.shape(target_dist_t)[0]) initial_examples_per_class_seen = array_ops.fill( [num_classes], np.int64(smoothing_constant)) def update_estimate_and_tile(num_examples_per_class_seen, c): updated_examples_per_class_seen, dist = _estimate_data_distribution( c, num_examples_per_class_seen) tiled_dist = array_ops.tile( array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) return updated_examples_per_class_seen, tiled_dist initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) .apply(scan_ops.scan(initial_examples_per_class_seen, update_estimate_and_tile)) .apply(batching.unbatch())) return initial_dist_ds
def _build_dataset(self, num_elements): return dataset_ops.Dataset.from_tensors(1).repeat(num_elements).apply( scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1])))
def _counting_dataset(self, start, scan_fn): return dataset_ops.Dataset.from_tensors(0).repeat().apply( scan_ops.scan(start, scan_fn))
def _counting_dataset(self, start, scan_fn): return dataset_ops.Dataset.from_tensors(0).repeat().apply( scan_ops.scan(start, scan_fn))
def _count(self, start, step): return dataset_ops.Dataset.from_tensors(0).repeat(None).apply( scan_ops.scan(start, lambda state, _: (state + step, state)))