def parse_encoded_spec_from_file(input_path):
    """Returns the tensor data spec stored at a path.

  Args:
    input_path: The path to the TFRecord file which contains the spec.

  Returns:
    `TensorSpec` nested structure parsed from the TFRecord file.
  Raises:
    IOError: File at input path does not exist.
  """
    if not tf.io.gfile.exists(input_path):
        raise IOError('Could not find spec file at %s.' % input_path)
    dataset = tf.data.TFRecordDataset(input_path, buffer_size=1)
    dataset_iterator = eager_utils.dataset_iterator(dataset)
    signature_proto_string = eager_utils.get_next(dataset_iterator)
    if tf.executing_eagerly():
        signature_proto = struct_pb2.StructuredValue.FromString(
            signature_proto_string.numpy())
    else:
        # In non-eager mode a session must be run in order to get the value
        with tf.Session() as sess:
            signature_proto_string_value = sess.run(signature_proto_string)
        signature_proto = struct_pb2.StructuredValue.FromString(
            signature_proto_string_value)
    return tensor_spec.from_proto(signature_proto)
Exemple #2
0
 def testIteration(self):
     data = np.arange(100)
     ds = tf.data.Dataset.from_tensor_slices(data)
     itr = eager_utils.dataset_iterator(ds)
     for d in data:
         self.assertEqual(np.array([d]),
                          self.evaluate(eager_utils.get_next(itr)))
    def test_with_dynamic_step_driver(self):
        env = driver_test_utils.PyEnvironmentMock()
        tf_env = tf_py_environment.TFPyEnvironment(env)
        policy = driver_test_utils.TFPolicyMock(tf_env.time_step_spec(),
                                                tf_env.action_spec())

        trajectory_spec = trajectory.from_transition(tf_env.time_step_spec(),
                                                     policy.policy_step_spec,
                                                     tf_env.time_step_spec())

        tfrecord_observer = example_encoding_dataset.TFRecordObserver(
            self.dataset_path, trajectory_spec)
        driver = dynamic_step_driver.DynamicStepDriver(
            tf_env,
            policy,
            observers=[common.function(tfrecord_observer)],
            num_steps=10)
        self.evaluate(tf.compat.v1.global_variables_initializer())

        time_step = self.evaluate(tf_env.reset())
        initial_policy_state = policy.get_initial_state(batch_size=1)
        self.evaluate(
            common.function(driver.run)(time_step, initial_policy_state))
        tfrecord_observer.flush()
        tfrecord_observer.close()

        dataset = example_encoding_dataset.load_tfrecord_dataset(
            [self.dataset_path], buffer_size=2, as_trajectories=True)
        iterator = eager_utils.dataset_iterator(dataset)
        sample = self.evaluate(eager_utils.get_next(iterator))
        self.assertIsInstance(sample, trajectory.Trajectory)
    def test_with_py_driver(self):
        env = driver_test_utils.PyEnvironmentMock()
        policy = driver_test_utils.PyPolicyMock(env.time_step_spec(),
                                                env.action_spec())
        trajectory_spec = trajectory.from_transition(env.time_step_spec(),
                                                     policy.policy_step_spec,
                                                     env.time_step_spec())
        trajectory_spec = tensor_spec.from_spec(trajectory_spec)

        tfrecord_observer = example_encoding_dataset.TFRecordObserver(
            self.dataset_path, trajectory_spec, py_mode=True)

        driver = py_driver.PyDriver(env,
                                    policy, [tfrecord_observer],
                                    max_steps=10)
        time_step = env.reset()
        driver.run(time_step)
        tfrecord_observer.flush()
        tfrecord_observer.close()

        dataset = example_encoding_dataset.load_tfrecord_dataset(
            [self.dataset_path], buffer_size=2, as_trajectories=True)

        iterator = eager_utils.dataset_iterator(dataset)
        sample = self.evaluate(eager_utils.get_next(iterator))
        self.assertIsInstance(sample, trajectory.Trajectory)
    def __init__(self, dataset, reward_distribution, batch_size):
        """Initialize `ClassificationBanditEnvironment`.

    Args:
      dataset: a `tf.data.Dataset` consisting of two `Tensor`s, [inputs, labels]
        where inputs can be of any shape, while labels are integer class labels.
        The label tensor can be of any rank as long as it has 1 element.
      reward_distribution: a `tfd.Distribution` with event_shape
        `[num_classes, num_actions]`. Entry `[i, j]` is the reward for taking
        action `j` for an instance of class `i`.
      batch_size: if `dataset` is batched, this is the size of the batches.
    Raises:
      ValueError: if `reward_distribution` does not have an event shape with
        rank 2.
    """

        # Computing `action_spec`.
        event_shape = reward_distribution.event_shape
        if len(event_shape) != 2:
            raise ValueError(
                'reward_distribution must have event shape of rank 2; '
                'got event shape {}'.format(event_shape))
        _, num_actions = event_shape
        action_spec = tensor_spec.BoundedTensorSpec(shape=(),
                                                    dtype=tf.int32,
                                                    minimum=0,
                                                    maximum=num_actions - 1,
                                                    name='action')
        output_shapes = tf.compat.v1.data.get_output_shapes(dataset)

        # Computing `time_step_spec`.
        if len(output_shapes) != 2:
            raise ValueError(
                'Dataset must have exactly two outputs; got {}'.format(
                    len(output_shapes)))
        context_shape = output_shapes[0]
        context_dtype, lbl_dtype = tf.compat.v1.data.get_output_types(dataset)
        observation_spec = tensor_spec.TensorSpec(shape=context_shape,
                                                  dtype=context_dtype)
        time_step_spec = time_step.time_step_spec(observation_spec)

        super(ClassificationBanditEnvironment,
              self).__init__(action_spec=action_spec,
                             time_step_spec=time_step_spec,
                             batch_size=batch_size)

        self._data_iterator = eager_utils.dataset_iterator(
            dataset.batch(batch_size, drop_remainder=True))
        self._current_label = tf.compat.v2.Variable(
            tf.zeros(batch_size, dtype=lbl_dtype))
        self._previous_label = tf.compat.v2.Variable(
            tf.zeros(batch_size, dtype=lbl_dtype))
        self._reward_distribution = reward_distribution

        reward_means = self._reward_distribution.mean()
        self._optimal_action_table = tf.argmax(
            reward_means, axis=1, output_type=self._action_spec.dtype)
        self._optimal_reward_table = tf.reduce_max(reward_means, axis=1)
Exemple #6
0
    def __init__(self,
                 dataset: tf.data.Dataset,
                 reward_distribution: types.Distribution,
                 batch_size: types.Int,
                 label_dtype_cast: Optional[tf.DType] = None,
                 shuffle_buffer_size: Optional[types.Int] = None,
                 repeat_dataset: Optional[bool] = True,
                 prefetch_size: Optional[types.Int] = None,
                 seed: Optional[types.Int] = None):
        """Initialize `ClassificationBanditEnvironment`.

    Args:
      dataset: a `tf.data.Dataset` consisting of two `Tensor`s, [inputs, labels]
        where inputs can be of any shape, while labels are integer class labels.
        The label tensor can be of any rank as long as it has 1 element.
      reward_distribution: a `tfd.Distribution` with event_shape
        `[num_classes, num_actions]`. Entry `[i, j]` is the reward for taking
        action `j` for an instance of class `i`.
      batch_size: if `dataset` is batched, this is the size of the batches.
      label_dtype_cast: if not None, casts dataset labels to this dtype.
      shuffle_buffer_size: If None, do not shuffle.  Otherwise, a shuffle buffer
        of the specified size is used in the environment's `dataset`.
      repeat_dataset: Makes the environment iterate on the `dataset` once
        avoiding `OutOfRangeError:  End of sequence` errors when the environment
        is stepped past the end of the `dataset`.
      prefetch_size: If None, do not prefetch.  Otherwise, a prefetch buffer
        of the specified size is used in the environment's `dataset`.
      seed: Used to make results deterministic.
    Raises:
      ValueError: if `reward_distribution` does not have an event shape with
        rank 2.
    """

        # Computing `action_spec`.
        event_shape = reward_distribution.event_shape
        if len(event_shape) != 2:
            raise ValueError(
                'reward_distribution must have event shape of rank 2; '
                'got event shape {}'.format(event_shape))
        _, num_actions = event_shape
        action_spec = tensor_spec.BoundedTensorSpec(shape=(),
                                                    dtype=tf.int32,
                                                    minimum=0,
                                                    maximum=num_actions - 1,
                                                    name='action')
        output_shapes = tf.compat.v1.data.get_output_shapes(dataset)

        # Computing `time_step_spec`.
        if len(output_shapes) != 2:
            raise ValueError(
                'Dataset must have exactly two outputs; got {}'.format(
                    len(output_shapes)))
        context_shape = output_shapes[0]
        context_dtype, lbl_dtype = tf.compat.v1.data.get_output_types(dataset)
        if label_dtype_cast:
            lbl_dtype = label_dtype_cast
        observation_spec = tensor_spec.TensorSpec(shape=context_shape,
                                                  dtype=context_dtype)
        time_step_spec = time_step.time_step_spec(observation_spec)

        super(ClassificationBanditEnvironment,
              self).__init__(action_spec=action_spec,
                             time_step_spec=time_step_spec,
                             batch_size=batch_size)

        if shuffle_buffer_size:
            dataset = dataset.shuffle(buffer_size=shuffle_buffer_size,
                                      seed=seed,
                                      reshuffle_each_iteration=True)
        if repeat_dataset:
            dataset = dataset.repeat()
        dataset = dataset.batch(batch_size, drop_remainder=True)
        if prefetch_size:
            dataset = dataset.prefetch(prefetch_size)
        self._data_iterator = eager_utils.dataset_iterator(dataset)
        self._current_label = tf.compat.v2.Variable(
            tf.zeros(batch_size, dtype=lbl_dtype))
        self._previous_label = tf.compat.v2.Variable(
            tf.zeros(batch_size, dtype=lbl_dtype))
        self._reward_distribution = reward_distribution
        self._label_dtype = lbl_dtype

        reward_means = self._reward_distribution.mean()
        self._optimal_action_table = tf.argmax(
            reward_means, axis=1, output_type=self._action_spec.dtype)
        self._optimal_reward_table = tf.reduce_max(reward_means, axis=1)