Example #1
0
def _dummy_keras_model_fixture(number_input_tensor_specs):
    input_specs = [
        TensorSpec(shape=(4, ), dtype=tf.float64, name=f"input_spec_{i}")
        for i in range(number_input_tensor_specs)
    ]
    output_layer, input_tensors = create_concatenated_inputs(input_specs, "")
    return tf.keras.Model(inputs=input_tensors, outputs=output_layer)
Example #2
0
def test_size(n_dims):
    shape = np.random.randint(1, 10, (n_dims, ))

    tensor = np.random.randint(0, 1, shape)
    tensor_spec = TensorSpec(shape)

    assert size(tensor_spec) == np.size(tensor)
Example #3
0
def create_merlin_algorithm(env,
                            encoder_fc_layers=(3, ),
                            latent_dim=3,
                            lstm_size=(4, ),
                            memory_size=20,
                            learning_rate=1e-1):
    """Create a simple MerlinAlgorithm

    Args:
        env (TFEnvironment): A TFEnvironment
        encoder_fc_layers (list[int]): list of fc layers parameters for encoder
        latent_dim (int): the dimension of the hidden representation of VAE.
        lstm_size (list[int]): size of lstm layers for MBP and MBA
        memory_size (int): number of memory slots
        learning_rate (float): learning rate for training
    """
    observation_spec = env.observation_spec()
    action_spec = env.action_spec()

    encoder = EncodingNetwork(
        input_tensor_spec=observation_spec,
        fc_layer_params=encoder_fc_layers,
        activation_fn=None,
        name="ObsEncoder")

    decoder = DecodingAlgorithm(
        decoder=EncodingNetwork(
            input_tensor_spec=TensorSpec((latent_dim, ), dtype=tf.float32),
            fc_layer_params=encoder_fc_layers,
            activation_fn=None,
            name="ObsDecoder"),
        loss_weight=100.)

    optimizer = tf.optimizers.Adam(learning_rate=learning_rate)

    algorithm = MerlinAlgorithm(
        observation_spec=observation_spec,
        action_spec=action_spec,
        encoders=encoder,
        decoders=decoder,
        latent_dim=latent_dim,
        lstm_size=lstm_size,
        memory_size=memory_size,
        optimizer=optimizer,
        debug_summaries=True)

    return algorithm
Example #4
0
def get_rnn_cell_state_spec(cell):
    """Get the state spec for RNN cell."""
    return tf.nest.map_structure(
        lambda size: TensorSpec(size, dtype=tf.float32), cell.state_size)
Example #5
0
    def __init__(self,
                 observation_spec,
                 action_spec,
                 memory: MemoryWithUsage,
                 num_read_keys=1,
                 lstm_size=(256, 256),
                 latent_dim=200,
                 loss=None,
                 loss_weight=1.0,
                 name="mba"):
        """Create the policy module of MERLIN.

        Args:
            action_spec (nested BoundedTensorSpec): representing the actions.
            memory (MemoryWithUsage): the memory module from MemoryBasedPredictor
            num_read_keys (int): number of keys for reading memory.
            latent_dim (int): the dimension of the hidden representation of VAE.
            lstm_size (list[int]): size of lstm layers
            loss (None|ActorCriticLoss): an object for calculating the loss
                for reinforcement learning. If None, a default ActorCriticLoss
                will be used.
            name (str): name of the algorithm.
        """
        # This is different from Merlin LSTM. This rnn only uses the output
        # from ths last LSTM layer, while Merlin uses outputs from all LSTM
        # layers
        rnn = make_lstm_cell(lstm_size, name=name + "/lstm")

        actor_input_dim = (latent_dim + lstm_size[-1] +
                           num_read_keys * memory.dim)

        actor_net = ActorDistributionNetwork(
            input_tensor_spec=TensorSpec((actor_input_dim, ),
                                         dtype=tf.float32),
            output_tensor_spec=action_spec,
            fc_layer_params=(200, ),
            activation_fn=tf.keras.activations.tanh,
            name=name + "/actor_net")

        super(MemoryBasedActor,
              self).__init__(observation_spec=observation_spec,
                             action_spec=action_spec,
                             train_state_spec=get_rnn_cell_state_spec(rnn),
                             name=name)

        self._loss = ActorCriticLoss(action_spec) if loss is None else loss
        self._loss_weight = loss_weight
        self._memory = memory

        self._key_net = tf.keras.layers.Dense(num_read_keys *
                                              (self._memory.dim + 1),
                                              name=name + "/key_net")

        # TODO: add log p(a_i) as input to value net
        value_input_dim = latent_dim
        self._value_net = ValueNetwork(input_tensor_spec=TensorSpec(
            (value_input_dim, ), dtype=tf.float32),
                                       fc_layer_params=(200, ),
                                       activation_fn=tf.keras.activations.tanh,
                                       name=name + "/value_net")

        self._rnn = rnn
        self._actor_net = actor_net
Example #6
0
    def __init__(self,
                 action_spec,
                 encoders,
                 decoders,
                 num_read_keys=3,
                 lstm_size=(256, 256),
                 latent_dim=200,
                 memory_size=1350,
                 loss_weight=1.0,
                 name="mbp"):
        """Create a MemoryBasedPredictor.

        Args:
            action_spec (nested BoundedTensorSpec): representing the actions.
            encoders (nested Network): the nest should match observation_spec
            decoders (nested Algorithm): the nest should match observation_spec
            num_read_keys (int): number of keys for reading memory.
            lstm_size (list[int]): size of lstm layers for MBP and MBA
            latent_dim (int): the dimension of the hidden representation of VAE.
            memroy_size (int): number of memory slots
            loss_weight (float): weight for the loss
            name (str): name of the algorithm.
        """
        rnn = make_lstm_cell(lstm_size, name=name + "/lstm")
        memory = MemoryWithUsage(latent_dim,
                                 memory_size,
                                 name=name + "/memory")

        state_spec = MBPState(latent_vector=TensorSpec(shape=(latent_dim, ),
                                                       dtype=tf.float32),
                              mem_readout=TensorSpec(shape=(num_read_keys *
                                                            latent_dim, ),
                                                     dtype=tf.float32),
                              rnn_state=get_rnn_cell_state_spec(rnn),
                              memory=memory.state_spec)

        super(MemoryBasedPredictor, self).__init__(train_state_spec=state_spec,
                                                   name=name)

        self._encoders = encoders
        self._decoders = decoders
        self._action_encoder = SimpleActionEncoder(action_spec)

        # This is different from Merlin LSTM. This rnn only uses the output from
        # ths last LSTM layer, while Merlin uses outputs from all LSTM layers
        self._rnn = rnn
        self._memory = memory

        self._key_net = tf.keras.layers.Dense(num_read_keys *
                                              (self._memory.dim + 1),
                                              name=name + "/key_net")

        prior_network = tf.keras.Sequential(
            name=name + "/prior_network",
            layers=[
                tf.keras.layers.Dense(2 * latent_dim, activation='tanh'),
                tf.keras.layers.Dense(2 * latent_dim, activation='tanh'),
                tf.keras.layers.Dense(2 * latent_dim),
                alf.layers.Split(2, axis=-1)
            ])

        self._vae = VariationalAutoEncoder(latent_dim,
                                           prior_network,
                                           name=name + "/vae")

        self._loss_weight = loss_weight
Example #7
0
    def __init__(
        self,
        time_step_spec: TimeStep,
        action_spec: NestedTensorSpec,
        transition_model: Union[Tuple[TrainableTransitionModel,
                                      TransitionModelTrainingSpec],
                                TransitionModel],
        reward_model: RewardModel,
        termination_model: TerminationModel,
        initial_state_distribution_model: InitialStateDistributionModel,
        policy: TFPolicy,
        collect_policy: TFPolicy,
        debug_summaries: bool = False,
        train_step_counter: Optional[tf.Variable] = None,
    ):
        """
        :param time_step_spec: A nest of tf.TypeSpec representing the time_steps.
        :param action_spec: A nest of BoundedTensorSpec representing the actions.
        :param transition_model: A component of the environment model that describes the
            transition dynamics. Either a tuple containing a trainable transition model together
            with training specs, or a pre-specified transition model.
        :param reward_model: A component of the environment model that describes the
            rewards. At the moment only pre-specified reward models are allowed, i.e. the agent
            assumes the reward function is known.
        :param termination_model: A component of the environment model that describes the
            termination of the episode. At the moment only pre-specified termination models are
            allowed, i.e. the agent assumes the termination function is known.
        :param initial_state_distribution_model: A component of the environment model that
            describes the initial state distribution (can be both deterministic or
            probabilistic). At the moment only pre-specified initial state distribution models
            are allowed, i.e. the agent assumes the initial state distribution is known.
        :param policy: An instance of `tf_policy.TFPolicy` representing the agent's current policy.
        :param collect_policy: An instance of `tf_policy.TFPolicy` representing the agent's current
            data collection policy (used to set `self.step_spec`).
        :param debug_summaries: A bool; if true, subclasses should gather debug summaries.
        :param train_step_counter: An optional counter to increment every time the train op is run.
            Defaults to the global_step.
        """

        assert isinstance(termination_model, ConstantFalseTermination
                          ), "Only constant false termination supported"

        # unpack and create a dictionary with trainable models
        self._trainable_components: Dict[Enum, Any] = dict()
        if isinstance(transition_model, tuple):
            self._transition_model, self._transition_model_spec = transition_model
            self._trainable_components[EnvironmentModelComponents.TRANSITION.
                                       value.numpy()] = transition_model
        else:
            self._transition_model = transition_model  # type: ignore
        self._reward_model = reward_model
        self._termination_model = termination_model
        self._initial_state_distribution_model = initial_state_distribution_model
        if not self._trainable_components:
            warn("No trainable model specified!", RuntimeWarning)

        # additional input for the _train method
        train_argspec = {
            TRAIN_ARGSPEC_COMPONENT_ID: TensorSpec(shape=(), dtype=tf.string)
        }

        super().__init__(
            time_step_spec,
            action_spec,
            policy,
            collect_policy,
            train_sequence_length=None,
            train_argspec=train_argspec,
            debug_summaries=debug_summaries,
            summarize_grads_and_vars=False,
            train_step_counter=train_step_counter,
            validate_args=True,
        )