Esempio n. 1
0
    def __init__(self, config, name='discriminator'):
        super().__init__(name)
        config = config.copy()

        self._obs_action = config.pop('obs_action', True)
        self._g = mlp(
            **config, out_size=1, name='g'
        )  # the AIRL paper use a linear model here, we use the same model for g and h for simplicity
        self._h = mlp(**config, out_size=1, name='h')
Esempio n. 2
0
File: nn.py Progetto: xlnwel/d2rl
 def _add_layer(self, config):
     """ Network definition """
     if self._duel:
         self._v_layers = mlp(
             **config,
             out_size=1, 
             name=self.name+'/v',
             out_dtype='float32')
     self._layers = mlp(
         **config, 
         out_size=self.action_dim, 
         name=self.name,
         out_dtype='float32')
Esempio n. 3
0
File: nn.py Progetto: xlnwel/d2rl
 def _add_layer(self, config):
     """ Network definition """
     if getattr(self, '_duel', False):
         self._v_layers = mlp(
             **config,
             out_size=1, 
             name=self.name+'/v',
             out_dtype='float32')
     # we do not define the phi net here to make it consistent with the CNN output size
     self._layers = mlp(
         **config,
         out_size=self.action_dim, 
         name=self.name,
         out_dtype='float32')
Esempio n. 4
0
File: nn.py Progetto: xlnwel/d2rl
    def __init__(self, name='rssm'):
        super().__init__(name)

        self._embed_layer = layers.Dense(self._hidden_size,
                                         activation=self._activation,
                                         name='embed')
        self._cell = layers.GRUCell(self._deter_size)
        self._img_layers = mlp([self._hidden_size],
                               out_size=2 * self._stoch_size,
                               activation=self._activation,
                               name='img')
        self._obs_layers = mlp([self._hidden_size],
                               out_size=2 * self._stoch_size,
                               activation=self._activation,
                               name='obs')
Esempio n. 5
0
    def test_mlp(self):
        units_list = [10, 5]
        activation = 'relu'
        kernel_initializer = 'he_uniform'
        out_dim = 3
        layer_seed = 10

        tf.random.set_seed(0)
        x = tf.random.normal([1, 2])
        
        tf.random.set_seed(layer_seed)
        plain_layers = [tf.keras.layers.Dense(
            u, activation=activation, kernel_initializer=kernel_initializer)
            for u in units_list]
        plain_layers.append(tf.keras.layers.Dense(out_dim))
        plain_y = x
        for l in plain_layers:
            plain_y = l(plain_y)

        tf.random.set_seed(layer_seed)
        mlp_layers = mlp(units_list, out_dim, activation=activation, kernel_initializer=kernel_initializer)
        mlp_y = mlp_layers(x)

        np.testing.assert_allclose(plain_y.numpy(), mlp_y.numpy())
        plain_vars = []
        for l in plain_layers:
            plain_vars += l.variables
        for pv, mv in zip(plain_vars, mlp_layers.variables):
            np.testing.assert_allclose(pv.numpy(), mv.numpy())
Esempio n. 6
0
File: nn.py Progetto: xlnwel/d2rl
    def __init__(self, config, n_agents, name='qmixer'):
        super().__init__(name=name)

        config = config.copy()
        self.n_agents = n_agents
        self.hidden_dim = config.pop('hidden_dim')
        self.w1 = mlp(**config,
                      out_size=n_agents * self.hidden_dim,
                      name=f'{self.name}/w1')
        self.w2 = mlp(**config,
                      out_size=self.hidden_dim,
                      name=f'{self.name}/w2')
        self.b = mlp([], self.hidden_dim, name=f'{self.name}/b')

        config['units_list'] = [self.hidden_dim]
        self.v = mlp(**config, out_size=1, name=f'{self.name}/v')
Esempio n. 7
0
 def __init__(self, name='fqn'):
     super().__init__(name=name)
     kernel_initializer = tf.keras.initializers.VarianceScaling(
         1./np.sqrt(3.), distribution='uniform')
     self._layers = mlp(
         out_size=self.N,
         name=f'{self.name}/fpn',
         kernel_initializer=kernel_initializer)
Esempio n. 8
0
File: nn.py Progetto: xlnwel/d2rl
 def __init__(self, config, name='value'):
     super().__init__(name=name)
     config = config.copy()
     
     config.setdefault('out_gain', 1)
     self._layers = mlp(**config,
                       out_size=1,
                       out_dtype='float32',
                       name=name)
Esempio n. 9
0
File: nn.py Progetto: xlnwel/d2rl
    def __init__(self, name='encoder'):
        super().__init__(name=name)

        if getattr(self, '_has_cnn', True):
            self._layers = ConvEncoder(time_distributed=True)
        else:
            self._layers = mlp(self._units_list,
                               activation=self._activation,
                               name=name)
Esempio n. 10
0
File: nn.py Progetto: xlnwel/d2rl
    def __init__(self, action_dim, is_action_discrete, name='actor'):
        super().__init__(name=name)
        """ Network definition """
        out_size = action_dim if is_action_discrete else 2 * action_dim
        self._layers = mlp(self._units_list,
                           out_size=out_size,
                           activation=self._activation,
                           name=name)

        self._is_action_discrete = is_action_discrete
Esempio n. 11
0
File: nn.py Progetto: xlnwel/d2rl
    def __init__(self, out_size=1, dist='normal', name='decoder'):
        super().__init__(name=name)

        self._dist = dist
        if getattr(self, '_has_cnn', None):
            self._layers = ConvDecoder(time_distributed=True)
        else:
            self._layers = mlp(self._units_list,
                               out_size=out_size,
                               activation=self._activation,
                               name=name)
Esempio n. 12
0
File: nn.py Progetto: xlnwel/d2rl
    def __init__(self, config, action_dim, name='actor'):
        super().__init__(name=name)
        config = config.copy()

        self._action_dim = action_dim
        self.LOG_STD_MIN = config.pop('LOG_STD_MIN', -20)
        self.LOG_STD_MAX = config.pop('LOG_STD_MAX', 2)
        self._tsallis_q = config.pop('tsallis_q', 1)

        out_size = 2*action_dim
        self._layers = mlp(**config, out_size=out_size, name=name)
Esempio n. 13
0
File: nn.py Progetto: xlnwel/d2rl
    def __init__(self, config, action_dim, name='actor'):
        super().__init__(name=name)
        config = config.copy()

        self._action_dim = action_dim
        prior = np.ones(action_dim, dtype=np.float32)
        prior /= np.sum(prior)
        self.prior = tf.Variable(prior, trainable=False, name='prior')
        self._epsilon_scaled_logits = config.pop('epsilon_scaled_logits',
                                                 False)

        self._layers = mlp(**config,
                           out_size=action_dim,
                           out_dtype='float32',
                           name=name)
Esempio n. 14
0
File: nn.py Progetto: xlnwel/d2rl
    def __init__(self, config, action_dim, is_action_discrete, name='actor'):
        super().__init__(name=name)
        config = config.copy()

        self.action_dim = action_dim
        self.is_action_discrete = is_action_discrete
        self.eval_act_temp = config.pop('eval_act_temp', 1)
        assert self.eval_act_temp >= 0, self.eval_act_temp

        self._init_std = config.pop('init_std', 1)
        if not self.is_action_discrete:
            self.logstd = tf.Variable(
                initial_value=np.log(self._init_std)*np.ones(action_dim), 
                dtype='float32', 
                trainable=True, 
                name=f'actor/logstd')
        config.setdefault('out_gain', .01)
        self._layers = mlp(**config, 
                        out_size=action_dim, 
                        out_dtype='float32',
                        name=name)
Esempio n. 15
0
File: nn.py Progetto: xlnwel/d2rl
 def __init__(self, config, name='q'):
     super().__init__(name=name)
     config = config.copy()
     
     self._layers = mlp(**config, out_size=1, name=name)
Esempio n. 16
0
 def mlp(self, x, *args, name, **kwargs):
     if not hasattr(self, f'_{name}'):
         from nn.func import mlp
         setattr(self, f'_{name}', mlp(*args, name=name, **kwargs))
     return getattr(self, f'_{name}')(x)
Esempio n. 17
0
File: nn.py Progetto: xlnwel/d2rl
    def __init__(self, config, name='discriminator'):
        super().__init__(name)
        config = config.copy()

        self._layers = mlp(**config, out_size=1)