def _dummy_keras_model_fixture(number_input_tensor_specs): input_specs = [ TensorSpec(shape=(4, ), dtype=tf.float64, name=f"input_spec_{i}") for i in range(number_input_tensor_specs) ] output_layer, input_tensors = create_concatenated_inputs(input_specs, "") return tf.keras.Model(inputs=input_tensors, outputs=output_layer)
def test_size(n_dims): shape = np.random.randint(1, 10, (n_dims, )) tensor = np.random.randint(0, 1, shape) tensor_spec = TensorSpec(shape) assert size(tensor_spec) == np.size(tensor)
def create_merlin_algorithm(env, encoder_fc_layers=(3, ), latent_dim=3, lstm_size=(4, ), memory_size=20, learning_rate=1e-1): """Create a simple MerlinAlgorithm Args: env (TFEnvironment): A TFEnvironment encoder_fc_layers (list[int]): list of fc layers parameters for encoder latent_dim (int): the dimension of the hidden representation of VAE. lstm_size (list[int]): size of lstm layers for MBP and MBA memory_size (int): number of memory slots learning_rate (float): learning rate for training """ observation_spec = env.observation_spec() action_spec = env.action_spec() encoder = EncodingNetwork( input_tensor_spec=observation_spec, fc_layer_params=encoder_fc_layers, activation_fn=None, name="ObsEncoder") decoder = DecodingAlgorithm( decoder=EncodingNetwork( input_tensor_spec=TensorSpec((latent_dim, ), dtype=tf.float32), fc_layer_params=encoder_fc_layers, activation_fn=None, name="ObsDecoder"), loss_weight=100.) optimizer = tf.optimizers.Adam(learning_rate=learning_rate) algorithm = MerlinAlgorithm( observation_spec=observation_spec, action_spec=action_spec, encoders=encoder, decoders=decoder, latent_dim=latent_dim, lstm_size=lstm_size, memory_size=memory_size, optimizer=optimizer, debug_summaries=True) return algorithm
def get_rnn_cell_state_spec(cell): """Get the state spec for RNN cell.""" return tf.nest.map_structure( lambda size: TensorSpec(size, dtype=tf.float32), cell.state_size)
def __init__(self, observation_spec, action_spec, memory: MemoryWithUsage, num_read_keys=1, lstm_size=(256, 256), latent_dim=200, loss=None, loss_weight=1.0, name="mba"): """Create the policy module of MERLIN. Args: action_spec (nested BoundedTensorSpec): representing the actions. memory (MemoryWithUsage): the memory module from MemoryBasedPredictor num_read_keys (int): number of keys for reading memory. latent_dim (int): the dimension of the hidden representation of VAE. lstm_size (list[int]): size of lstm layers loss (None|ActorCriticLoss): an object for calculating the loss for reinforcement learning. If None, a default ActorCriticLoss will be used. name (str): name of the algorithm. """ # This is different from Merlin LSTM. This rnn only uses the output # from ths last LSTM layer, while Merlin uses outputs from all LSTM # layers rnn = make_lstm_cell(lstm_size, name=name + "/lstm") actor_input_dim = (latent_dim + lstm_size[-1] + num_read_keys * memory.dim) actor_net = ActorDistributionNetwork( input_tensor_spec=TensorSpec((actor_input_dim, ), dtype=tf.float32), output_tensor_spec=action_spec, fc_layer_params=(200, ), activation_fn=tf.keras.activations.tanh, name=name + "/actor_net") super(MemoryBasedActor, self).__init__(observation_spec=observation_spec, action_spec=action_spec, train_state_spec=get_rnn_cell_state_spec(rnn), name=name) self._loss = ActorCriticLoss(action_spec) if loss is None else loss self._loss_weight = loss_weight self._memory = memory self._key_net = tf.keras.layers.Dense(num_read_keys * (self._memory.dim + 1), name=name + "/key_net") # TODO: add log p(a_i) as input to value net value_input_dim = latent_dim self._value_net = ValueNetwork(input_tensor_spec=TensorSpec( (value_input_dim, ), dtype=tf.float32), fc_layer_params=(200, ), activation_fn=tf.keras.activations.tanh, name=name + "/value_net") self._rnn = rnn self._actor_net = actor_net
def __init__(self, action_spec, encoders, decoders, num_read_keys=3, lstm_size=(256, 256), latent_dim=200, memory_size=1350, loss_weight=1.0, name="mbp"): """Create a MemoryBasedPredictor. Args: action_spec (nested BoundedTensorSpec): representing the actions. encoders (nested Network): the nest should match observation_spec decoders (nested Algorithm): the nest should match observation_spec num_read_keys (int): number of keys for reading memory. lstm_size (list[int]): size of lstm layers for MBP and MBA latent_dim (int): the dimension of the hidden representation of VAE. memroy_size (int): number of memory slots loss_weight (float): weight for the loss name (str): name of the algorithm. """ rnn = make_lstm_cell(lstm_size, name=name + "/lstm") memory = MemoryWithUsage(latent_dim, memory_size, name=name + "/memory") state_spec = MBPState(latent_vector=TensorSpec(shape=(latent_dim, ), dtype=tf.float32), mem_readout=TensorSpec(shape=(num_read_keys * latent_dim, ), dtype=tf.float32), rnn_state=get_rnn_cell_state_spec(rnn), memory=memory.state_spec) super(MemoryBasedPredictor, self).__init__(train_state_spec=state_spec, name=name) self._encoders = encoders self._decoders = decoders self._action_encoder = SimpleActionEncoder(action_spec) # This is different from Merlin LSTM. This rnn only uses the output from # ths last LSTM layer, while Merlin uses outputs from all LSTM layers self._rnn = rnn self._memory = memory self._key_net = tf.keras.layers.Dense(num_read_keys * (self._memory.dim + 1), name=name + "/key_net") prior_network = tf.keras.Sequential( name=name + "/prior_network", layers=[ tf.keras.layers.Dense(2 * latent_dim, activation='tanh'), tf.keras.layers.Dense(2 * latent_dim, activation='tanh'), tf.keras.layers.Dense(2 * latent_dim), alf.layers.Split(2, axis=-1) ]) self._vae = VariationalAutoEncoder(latent_dim, prior_network, name=name + "/vae") self._loss_weight = loss_weight
def __init__( self, time_step_spec: TimeStep, action_spec: NestedTensorSpec, transition_model: Union[Tuple[TrainableTransitionModel, TransitionModelTrainingSpec], TransitionModel], reward_model: RewardModel, termination_model: TerminationModel, initial_state_distribution_model: InitialStateDistributionModel, policy: TFPolicy, collect_policy: TFPolicy, debug_summaries: bool = False, train_step_counter: Optional[tf.Variable] = None, ): """ :param time_step_spec: A nest of tf.TypeSpec representing the time_steps. :param action_spec: A nest of BoundedTensorSpec representing the actions. :param transition_model: A component of the environment model that describes the transition dynamics. Either a tuple containing a trainable transition model together with training specs, or a pre-specified transition model. :param reward_model: A component of the environment model that describes the rewards. At the moment only pre-specified reward models are allowed, i.e. the agent assumes the reward function is known. :param termination_model: A component of the environment model that describes the termination of the episode. At the moment only pre-specified termination models are allowed, i.e. the agent assumes the termination function is known. :param initial_state_distribution_model: A component of the environment model that describes the initial state distribution (can be both deterministic or probabilistic). At the moment only pre-specified initial state distribution models are allowed, i.e. the agent assumes the initial state distribution is known. :param policy: An instance of `tf_policy.TFPolicy` representing the agent's current policy. :param collect_policy: An instance of `tf_policy.TFPolicy` representing the agent's current data collection policy (used to set `self.step_spec`). :param debug_summaries: A bool; if true, subclasses should gather debug summaries. :param train_step_counter: An optional counter to increment every time the train op is run. Defaults to the global_step. """ assert isinstance(termination_model, ConstantFalseTermination ), "Only constant false termination supported" # unpack and create a dictionary with trainable models self._trainable_components: Dict[Enum, Any] = dict() if isinstance(transition_model, tuple): self._transition_model, self._transition_model_spec = transition_model self._trainable_components[EnvironmentModelComponents.TRANSITION. value.numpy()] = transition_model else: self._transition_model = transition_model # type: ignore self._reward_model = reward_model self._termination_model = termination_model self._initial_state_distribution_model = initial_state_distribution_model if not self._trainable_components: warn("No trainable model specified!", RuntimeWarning) # additional input for the _train method train_argspec = { TRAIN_ARGSPEC_COMPONENT_ID: TensorSpec(shape=(), dtype=tf.string) } super().__init__( time_step_spec, action_spec, policy, collect_policy, train_sequence_length=None, train_argspec=train_argspec, debug_summaries=debug_summaries, summarize_grads_and_vars=False, train_step_counter=train_step_counter, validate_args=True, )