Ejemplo n.º 1
0
    def setUpClass(cls):
        # hyper-parameters
        cls.batch_size = 16
        cls.horizon = 20

        cls.compiler = rddlgym.make('Navigation-v2', mode=rddlgym.SCG)
        cls.compiler.batch_mode_on()

        cls.noise_shapes = get_cpfs_reparameterization(cls.compiler.rddl)

        with cls.compiler.graph.as_default():
            cls.noise_variables = utils.get_noise_variables(
                cls.noise_shapes, cls.batch_size, cls.horizon)
            cls.inputs, cls.encoding = utils.encode_noise_as_inputs(
                cls.noise_variables)
Ejemplo n.º 2
0
    def setUpClass(cls):

        # hyper-parameters
        cls.horizon = 40
        cls.batch_size = 16

        # rddl
        rddl = rddlgym.make('Navigation-v2', mode=rddlgym.AST)
        cls.compiler = rddl2tf.compilers.ReparameterizationCompiler(
            rddl, batch_size=cls.batch_size)
        cls.compiler.init()

        # initial state
        cls.initial_state = cls.compiler.initial_state()

        # default action
        cls.default_action = cls.compiler.default_action()

        # policy
        cls.policy = FeedforwardPolicy(cls.compiler, {
            'layers': [64, 64],
            'activation': 'relu',
            'input_layer_norm': True
        })
        cls.policy.build()

        with cls.compiler.graph.as_default():

            # reparameterization
            cls.noise_shapes = cls.compiler.get_cpfs_reparameterization()
            cls.noise_variables = utils.get_noise_variables(
                cls.noise_shapes, cls.batch_size, cls.horizon)
            cls.noise_inputs, cls.encoding = utils.encode_noise_as_inputs(
                cls.noise_variables)

            # timestep
            cls.timestep = tf.constant(cls.horizon, dtype=tf.float32)
            cls.timestep = tf.expand_dims(cls.timestep, -1)
            cls.timestep = tf.stack([cls.timestep] * cls.batch_size)

            # inputs
            cls.inputs = tf.concat([cls.timestep, cls.noise_inputs[:, 0, :]],
                                   axis=1)

        # cell
        cls.config = {'encoding': cls.encoding}
        cls.cell = ReparameterizationCell(cls.compiler, cls.policy, cls.config)
Ejemplo n.º 3
0
    def __call__(
            self, initial_state: Sequence[tf.Tensor],
            horizon: int) -> Tuple[Trajectory, Sequence[tf.Tensor], tf.Tensor]:
        '''Samples a batch state-action-reward trajectory with given
        `initial_state` and `horizon`, and returns the corresponding total reward.

        Args:
            initial_state (Sequence[tf.Tensor]): The initial state tensors.
            horizon (int): The number of timesteps in each sampled trajectory.

        Returns:
            Tuple[Trajectory, Sequence[tf.Tensor], tf.Tensor]: A triple of (namedtuple, tensors, tensor)
            representing the trajectory, final state, and total reward.
        '''
        batch_size = int(initial_state[0].shape[0])

        with self.graph.as_default():

            with tf.variable_scope('reparameterization'):
                self.noise_map = utils.get_noise_variables(
                    self.reparameterization_map, batch_size, horizon)
                self.noise, encoding = utils.encode_noise_as_inputs(
                    self.noise_map)
                self.cell.config['encoding'] = encoding

            with tf.name_scope('inputs'):
                self.timesteps = self.timesteps(horizon, batch_size)
                self.inputs = tf.concat([self.timesteps, self.noise], axis=2)

            with tf.name_scope('trajectory'):
                outputs, final_state = tf.nn.dynamic_rnn(
                    self.cell,
                    self.inputs,
                    initial_state=initial_state,
                    dtype=tf.float32)

                states = tuple(fluent[0] for fluent in outputs[0])
                actions = tuple(fluent[0] for fluent in outputs[1])
                interms = tuple(fluent[0] for fluent in outputs[2])
                rewards = outputs[3]
                trajectory = Trajectory(states, actions, interms, rewards)

            with tf.name_scope('total_reward'):
                total_reward = tf.reduce_sum(tf.squeeze(trajectory.rewards),
                                             axis=1)

        return (trajectory, final_state, total_reward)
Ejemplo n.º 4
0
    def setUpClass(cls):
        # hyper-parameters
        cls.batch_size = 16
        cls.horizon = 20

        rddl = rddlgym.make('Navigation-v2', mode=rddlgym.AST)
        cls.compiler = rddl2tf.compilers.ReparameterizationCompiler(
            rddl, batch_size=cls.batch_size)
        cls.compiler.init()

        cls.noise_shapes = cls.compiler.get_cpfs_reparameterization()

        with cls.compiler.graph.as_default():
            cls.noise_variables = utils.get_noise_variables(
                cls.noise_shapes, cls.batch_size, cls.horizon)
            cls.inputs, cls.encoding = utils.encode_noise_as_inputs(
                cls.noise_variables)