class Discriminator(Model): def __init__(self, lr, hidden_units, reduce_units): super(Discriminator, self).__init__() self.opt = Adam(learning_rate=lr) self.encoder = VAE_Encoder(latent_num=64) self.concat = Concatenate() self.reduces = [ Dense(unit, activation='relu', kernel_initializer='he_normal') for unit in reduce_units ] # hidden self.hiddens = [ Dense(unit, activation='relu', kernel_initializer='he_normal') for unit in hidden_units ] # output self.out = Dense(1, activation='sigmoid', kernel_initializer=tf.random_uniform_initializer( minval=-3e-3, maxval=3e-3)) # 0: expert, 1: agent def call(self, s, a): s = self.encoder.forward(s) for layer in self.reduces: s = layer(s) x = self.concat([s, a]) for layer in self.hiddens: x = layer(x) out = self.out(x) return out
class Discriminator(Model): def __init__(self, lr, hidden_units, reduce_units): super(Discriminator, self).__init__() self.opt = Adam(learning_rate=lr) self.encoder = VAE_Encoder(latent_num=64) self.concat = Concatenate() self.reduces = [ Dense(unit, activation='relu', kernel_initializer='he_normal') for unit in reduce_units ] # hidden self.hiddens = [ Dense(unit, activation='relu', kernel_initializer='he_normal') for unit in hidden_units ] self.latent_mu = Dense(LATENT_UNIT_NUM, activation='linear') self.latent_std = Dense(LATENT_UNIT_NUM, activation='sigmoid') # output self.out = Dense(1, activation='sigmoid', kernel_initializer=tf.random_uniform_initializer( minval=-3e-3, maxval=3e-3)) # 0: expert, 1: agent def call(self, s, a): mu, _, _ = self.encode(s, a) out = self.decode(mu) return out def encode(self, s, a): s = self.encoder.forward(s) for layer in self.reduces: s = layer(s) x = self.concat([s, a]) for layer in self.hiddens: x = layer(x) mu = self.latent_mu(x) std = self.latent_std(x) # sampling noise = std * np.random.normal(size=std.shape) sampled = noise + mu return mu, std, sampled def decode(self, latent): return self.out(latent)
class Critic(Model): def __init__(self, lr, hidden_units): super(Critic, self).__init__() self.opt = Adam(learning_rate=lr) self.encoder = VAE_Encoder(latent_num=64) # hidden self.hiddens = [ Dense(unit, activation='relu', kernel_initializer='he_normal') for unit in hidden_units ] # output self.value = Dense(1, kernel_initializer=tf.random_uniform_initializer( minval=-3e-3, maxval=3e-3)) def call(self, s): x = self.encoder.forward(s) for layer in self.hiddens: x = layer(x) value = self.value(x) return value
class Actor(Model): def __init__(self, lr, hidden_units): super(Actor, self).__init__() self.opt = Adam(learning_rate=lr) self.encoder = VAE_Encoder(latent_num=64) # hidden self.hiddens = [ Dense(unit, activation='relu', kernel_initializer='he_normal') for unit in hidden_units ] # output self.out = Dense(ACTION_NUM, activation='softmax', kernel_initializer=tf.random_uniform_initializer( minval=-3e-3, maxval=3e-3)) def call(self, s): x = self.encoder.forward(s) for layer in self.hiddens: x = layer(x) policy = self.out(x) return policy