Esempio n. 1
0
    def sample_actions_sequence(self, policy, obs, actions, next_obs, rewards):
        batch_size = tf.shape(obs)[0]
        seq_len = nn_utils.int_shape(obs)[1]

        z, jacob = self.encode(obs, actions, next_obs, rewards)

        act_samples = []
        act_probs = []

        self.gp_layer.reset()
        self.gp_layer.replicate_state(batch_size)

        for i in range(seq_len):
            posterior_mu, posterior_var = self.gp_layer.get_posterior_params()
            if self.use_posterior_var:
                policy_inputs = tf.concat(
                    [obs[:, i, :], posterior_mu, posterior_var], axis=-1)
            else:
                policy_inputs = tf.concat([obs[:, i, :], posterior_mu],
                                          axis=-1)

            _, act_sample, act_prob = policy(inputs=policy_inputs)
            act_samples.append(act_sample)
            act_probs.append(act_prob)

            self.gp_layer.update_distribution(z[:, i, :])

        act_samples = tf.stack(act_samples, axis=1)
        act_probs = tf.stack(act_probs, axis=1)

        return act_samples, act_probs
Esempio n. 2
0
def dense(x,
          num_units,
          name,
          nonlinearity=None,
          kernel_initializer=Orthogonal(),
          bias_initializer=tf.constant_initializer(0.),
          condition=None):
    with tf.variable_scope(name):
        if condition is None:
            return tf.layers.dense(x,
                                   units=num_units,
                                   activation=nonlinearity,
                                   kernel_initializer=kernel_initializer,
                                   bias_initializer=bias_initializer,
                                   name=name)
        else:
            ndim = int_shape(x)[-1]
            h = tf.layers.dense(condition,
                                units=ndim,
                                activation=tf.nn.leaky_relu,
                                bias_initializer=bias_initializer,
                                kernel_initializer=Orthogonal(),
                                name='label')
            o1 = tf.concat([h, x], axis=-1)
            output = tf.layers.dense(o1,
                                     units=num_units,
                                     activation=None,
                                     bias_initializer=bias_initializer,
                                     kernel_initializer=kernel_initializer,
                                     name='dense')

            if nonlinearity is not None:
                output = nonlinearity(output)
            return output
Esempio n. 3
0
def dense_wn(x,
             num_units,
             name,
             activation=None,
             use_bias=True,
             condition=None,
             eps=1e-12):
    """
    Weight norm with initialization from Arpit et al., 2019
    """
    with tf1.variable_scope(name):
        if condition is None:
            fan_in = int(x.get_shape()[1])
            num_units = num_units if isinstance(num_units,
                                                int) else num_units.value
            V = tf1.get_variable(name='V',
                                 shape=[fan_in, num_units],
                                 dtype=tf.float32,
                                 initializer=Orthogonal(),
                                 trainable=True)
            g = tf1.get_variable(name='g',
                                 shape=[num_units],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(
                                     np.sqrt(2. * fan_in / num_units)),
                                 trainable=True)
            b = tf1.get_variable(name='b',
                                 shape=[num_units],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.),
                                 trainable=use_bias)

            x = tf.matmul(x, V)
            scaler = g / tf.norm(V + eps, axis=0)
            x = tf.reshape(scaler, [1, num_units]) * x + tf.reshape(
                b, [1, num_units])

            if activation is not None:
                x = activation(x)
            return x
        else:
            ndim = int_shape(x)[-1]
            h = dense_wn(condition,
                         num_units=ndim,
                         activation=tf.nn.relu,
                         use_bias=True,
                         name='label')

            o1 = tf.concat([h, x], axis=-1)
            output = dense_wn(o1,
                              num_units=num_units,
                              activation=None,
                              use_bias=True,
                              name='dense')

            if activation is not None:
                output = activation(output)
            return output
Esempio n. 4
0
def masked_dense(x,
                 num_units,
                 num_blocks,
                 exclusive_mask,
                 name,
                 activation=None,
                 kernel_initializer=Orthogonal(),
                 bias_initializer=tf.constant_initializer(0.),
                 condition=None):
    with tf1.variable_scope(name):

        input_dim = int_shape(x)[-1]
        mask = generate_mask(num_blocks, input_dim, num_units,
                             exclusive_mask).T

        def masked_initializer(shape, dtype=None, partition_info=None):
            return mask * kernel_initializer(shape, dtype, partition_info)

        if condition is None:
            output = tf1.layers.dense(x,
                                      units=num_units,
                                      activation=activation,
                                      kernel_initializer=masked_initializer,
                                      kernel_constraint=lambda x: mask * x,
                                      bias_initializer=bias_initializer,
                                      name='masked_dense')
            return output
        else:
            ndim_condition = int_shape(condition)[-1]
            o1 = tf.concat([condition, x], axis=-1)
            mask = np.concatenate([np.ones((ndim_condition, num_units)), mask],
                                  axis=0)
            output = tf1.layers.dense(o1,
                                      units=num_units,
                                      activation=None,
                                      bias_initializer=bias_initializer,
                                      kernel_initializer=masked_initializer,
                                      kernel_constraint=lambda x: mask * x,
                                      name='masked_dense')

            if activation is not None:
                output = activation(output)
            return output
def value_nn(inputs, hidden_layers_sizes=(128,), scope_name='critic_nn'):
    input_shape = tf.shape(inputs)
    batch_size = input_shape[0]
    seq_len = input_shape[1]
    input_dim = nn_utils.int_shape(inputs)[-1]

    inputs = tf.reshape(inputs, (batch_size * seq_len, input_dim))

    with tf1.variable_scope(scope_name):
        for i, size in enumerate(hidden_layers_sizes):
            inputs = tf.layers.dense(inputs, size,
                                     activation=tf.nn.leaky_relu,
                                     name=scope_name + '_l' + str(i))
        out = tf.layers.dense(inputs, 1, activation=None, name=scope_name + '_l_out')

    out = tf.reshape(out, (batch_size, seq_len))
    return out
Esempio n. 6
0
    def get_states_given_sequence(self, obs, actions, next_obs, rewards):
        batch_size = tf.shape(obs)[0]
        seq_len = nn_utils.int_shape(obs)[1]

        z, jacob = self.encode(obs, actions, next_obs, rewards)

        bruno_states_seq = []

        self.gp_layer.reset()
        self.gp_layer.replicate_state(batch_size)

        for i in range(seq_len):
            mu, var = self.gp_layer.get_posterior_params()
            bruno_states_seq.append(mu)
            self.gp_layer.update_distribution(z[:, i, :])

        bruno_states_seq = tf.stack(bruno_states_seq, axis=1)
        return bruno_states_seq
Esempio n. 7
0
    def sample_oracle_actions_sequence(self, policy, obs, env_params):
        seq_len = nn_utils.int_shape(obs)[1]

        act_samples = []
        act_probs = []

        for i in range(seq_len):
            if self.no_info_policy:
                policy_inputs = obs[:, i, :]
            else:
                policy_inputs = tf.concat([obs[:, i, :], env_params[:, i, :]],
                                          axis=-1)
            _, act_sample, act_prob = policy(inputs=policy_inputs)
            act_samples.append(act_sample)
            act_probs.append(act_prob)

        act_samples = tf.stack(act_samples, axis=1)
        act_probs = tf.stack(act_probs, axis=1)

        return act_samples, act_probs
Esempio n. 8
0
def masked_dense_wn(x,
                    num_units,
                    num_blocks,
                    exclusive_mask,
                    name,
                    activation=None,
                    use_bias=True,
                    condition=None,
                    mask=None,
                    kernel_initializer=Orthogonal(),
                    eps=1e-12):
    """
    Weight norm with initialization from Arpit et al., 2019
    """
    with tf1.variable_scope(name):

        input_dim = int_shape(x)[-1]
        if mask is None:
            mask = generate_mask(num_blocks, input_dim, num_units,
                                 exclusive_mask).T

        def masked_initializer(shape, dtype=None, partition_info=None):
            return mask * kernel_initializer(shape, dtype, partition_info)

        if condition is None:
            fan_in = int(x.get_shape()[1])
            V = mask * tf.get_variable(name='V',
                                       shape=[input_dim, num_units],
                                       dtype=tf.float32,
                                       initializer=masked_initializer,
                                       trainable=True)
            g = tf1.get_variable(name='g',
                                 shape=[num_units],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(
                                     np.sqrt(2. * fan_in / num_units)),
                                 trainable=True)
            b = tf1.get_variable(name='b',
                                 shape=[num_units],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.),
                                 trainable=use_bias)

            x = tf.matmul(x, V)
            scaler = g / tf.norm(V + eps, axis=0)
            x = tf.reshape(scaler, [1, num_units]) * x + tf.reshape(
                b, [1, num_units])

            if activation is not None:
                x = activation(x)
            return x
        else:
            ndim_condition = int_shape(condition)[-1]
            o1 = tf.concat([condition, x], axis=-1)
            mask = np.concatenate([np.ones((ndim_condition, num_units)), mask],
                                  axis=0)
            output = masked_dense_wn(o1,
                                     num_units=num_units,
                                     num_blocks=num_blocks,
                                     exclusive_mask=exclusive_mask,
                                     activation=None,
                                     use_bias=True,
                                     name='masked_dense_wn',
                                     mask=mask)

            if activation is not None:
                output = activation(output)
            return output