def __init__(self, config, name='discriminator'): super().__init__(name) config = config.copy() self._obs_action = config.pop('obs_action', True) self._g = mlp( **config, out_size=1, name='g' ) # the AIRL paper use a linear model here, we use the same model for g and h for simplicity self._h = mlp(**config, out_size=1, name='h')
def _add_layer(self, config): """ Network definition """ if self._duel: self._v_layers = mlp( **config, out_size=1, name=self.name+'/v', out_dtype='float32') self._layers = mlp( **config, out_size=self.action_dim, name=self.name, out_dtype='float32')
def _add_layer(self, config): """ Network definition """ if getattr(self, '_duel', False): self._v_layers = mlp( **config, out_size=1, name=self.name+'/v', out_dtype='float32') # we do not define the phi net here to make it consistent with the CNN output size self._layers = mlp( **config, out_size=self.action_dim, name=self.name, out_dtype='float32')
def __init__(self, name='rssm'): super().__init__(name) self._embed_layer = layers.Dense(self._hidden_size, activation=self._activation, name='embed') self._cell = layers.GRUCell(self._deter_size) self._img_layers = mlp([self._hidden_size], out_size=2 * self._stoch_size, activation=self._activation, name='img') self._obs_layers = mlp([self._hidden_size], out_size=2 * self._stoch_size, activation=self._activation, name='obs')
def test_mlp(self): units_list = [10, 5] activation = 'relu' kernel_initializer = 'he_uniform' out_dim = 3 layer_seed = 10 tf.random.set_seed(0) x = tf.random.normal([1, 2]) tf.random.set_seed(layer_seed) plain_layers = [tf.keras.layers.Dense( u, activation=activation, kernel_initializer=kernel_initializer) for u in units_list] plain_layers.append(tf.keras.layers.Dense(out_dim)) plain_y = x for l in plain_layers: plain_y = l(plain_y) tf.random.set_seed(layer_seed) mlp_layers = mlp(units_list, out_dim, activation=activation, kernel_initializer=kernel_initializer) mlp_y = mlp_layers(x) np.testing.assert_allclose(plain_y.numpy(), mlp_y.numpy()) plain_vars = [] for l in plain_layers: plain_vars += l.variables for pv, mv in zip(plain_vars, mlp_layers.variables): np.testing.assert_allclose(pv.numpy(), mv.numpy())
def __init__(self, config, n_agents, name='qmixer'): super().__init__(name=name) config = config.copy() self.n_agents = n_agents self.hidden_dim = config.pop('hidden_dim') self.w1 = mlp(**config, out_size=n_agents * self.hidden_dim, name=f'{self.name}/w1') self.w2 = mlp(**config, out_size=self.hidden_dim, name=f'{self.name}/w2') self.b = mlp([], self.hidden_dim, name=f'{self.name}/b') config['units_list'] = [self.hidden_dim] self.v = mlp(**config, out_size=1, name=f'{self.name}/v')
def __init__(self, name='fqn'): super().__init__(name=name) kernel_initializer = tf.keras.initializers.VarianceScaling( 1./np.sqrt(3.), distribution='uniform') self._layers = mlp( out_size=self.N, name=f'{self.name}/fpn', kernel_initializer=kernel_initializer)
def __init__(self, config, name='value'): super().__init__(name=name) config = config.copy() config.setdefault('out_gain', 1) self._layers = mlp(**config, out_size=1, out_dtype='float32', name=name)
def __init__(self, name='encoder'): super().__init__(name=name) if getattr(self, '_has_cnn', True): self._layers = ConvEncoder(time_distributed=True) else: self._layers = mlp(self._units_list, activation=self._activation, name=name)
def __init__(self, action_dim, is_action_discrete, name='actor'): super().__init__(name=name) """ Network definition """ out_size = action_dim if is_action_discrete else 2 * action_dim self._layers = mlp(self._units_list, out_size=out_size, activation=self._activation, name=name) self._is_action_discrete = is_action_discrete
def __init__(self, out_size=1, dist='normal', name='decoder'): super().__init__(name=name) self._dist = dist if getattr(self, '_has_cnn', None): self._layers = ConvDecoder(time_distributed=True) else: self._layers = mlp(self._units_list, out_size=out_size, activation=self._activation, name=name)
def __init__(self, config, action_dim, name='actor'): super().__init__(name=name) config = config.copy() self._action_dim = action_dim self.LOG_STD_MIN = config.pop('LOG_STD_MIN', -20) self.LOG_STD_MAX = config.pop('LOG_STD_MAX', 2) self._tsallis_q = config.pop('tsallis_q', 1) out_size = 2*action_dim self._layers = mlp(**config, out_size=out_size, name=name)
def __init__(self, config, action_dim, name='actor'): super().__init__(name=name) config = config.copy() self._action_dim = action_dim prior = np.ones(action_dim, dtype=np.float32) prior /= np.sum(prior) self.prior = tf.Variable(prior, trainable=False, name='prior') self._epsilon_scaled_logits = config.pop('epsilon_scaled_logits', False) self._layers = mlp(**config, out_size=action_dim, out_dtype='float32', name=name)
def __init__(self, config, action_dim, is_action_discrete, name='actor'): super().__init__(name=name) config = config.copy() self.action_dim = action_dim self.is_action_discrete = is_action_discrete self.eval_act_temp = config.pop('eval_act_temp', 1) assert self.eval_act_temp >= 0, self.eval_act_temp self._init_std = config.pop('init_std', 1) if not self.is_action_discrete: self.logstd = tf.Variable( initial_value=np.log(self._init_std)*np.ones(action_dim), dtype='float32', trainable=True, name=f'actor/logstd') config.setdefault('out_gain', .01) self._layers = mlp(**config, out_size=action_dim, out_dtype='float32', name=name)
def __init__(self, config, name='q'): super().__init__(name=name) config = config.copy() self._layers = mlp(**config, out_size=1, name=name)
def mlp(self, x, *args, name, **kwargs): if not hasattr(self, f'_{name}'): from nn.func import mlp setattr(self, f'_{name}', mlp(*args, name=name, **kwargs)) return getattr(self, f'_{name}')(x)
def __init__(self, config, name='discriminator'): super().__init__(name) config = config.copy() self._layers = mlp(**config, out_size=1)