Example #1
0
class Discriminator(Model):
    def __init__(self, lr, hidden_units, reduce_units):
        super(Discriminator, self).__init__()
        self.opt = Adam(learning_rate=lr)
        self.encoder = VAE_Encoder(latent_num=64)
        self.concat = Concatenate()
        self.reduces = [
            Dense(unit, activation='relu', kernel_initializer='he_normal')
            for unit in reduce_units
        ]
        # hidden
        self.hiddens = [
            Dense(unit, activation='relu', kernel_initializer='he_normal')
            for unit in hidden_units
        ]

        # output
        self.out = Dense(1,
                         activation='sigmoid',
                         kernel_initializer=tf.random_uniform_initializer(
                             minval=-3e-3, maxval=3e-3))  # 0: expert, 1: agent

    def call(self, s, a):
        s = self.encoder.forward(s)
        for layer in self.reduces:
            s = layer(s)
        x = self.concat([s, a])
        for layer in self.hiddens:
            x = layer(x)
        out = self.out(x)
        return out
Example #2
0
class Discriminator(Model):
    def __init__(self, lr, hidden_units, reduce_units):
        super(Discriminator, self).__init__()
        self.opt = Adam(learning_rate=lr)
        self.encoder = VAE_Encoder(latent_num=64)
        self.concat = Concatenate()
        self.reduces = [
            Dense(unit, activation='relu', kernel_initializer='he_normal')
            for unit in reduce_units
        ]
        # hidden
        self.hiddens = [
            Dense(unit, activation='relu', kernel_initializer='he_normal')
            for unit in hidden_units
        ]

        self.latent_mu = Dense(LATENT_UNIT_NUM, activation='linear')
        self.latent_std = Dense(LATENT_UNIT_NUM, activation='sigmoid')
        # output
        self.out = Dense(1,
                         activation='sigmoid',
                         kernel_initializer=tf.random_uniform_initializer(
                             minval=-3e-3, maxval=3e-3))  # 0: expert, 1: agent

    def call(self, s, a):
        mu, _, _ = self.encode(s, a)
        out = self.decode(mu)
        return out

    def encode(self, s, a):
        s = self.encoder.forward(s)
        for layer in self.reduces:
            s = layer(s)
        x = self.concat([s, a])
        for layer in self.hiddens:
            x = layer(x)
        mu = self.latent_mu(x)
        std = self.latent_std(x)
        # sampling
        noise = std * np.random.normal(size=std.shape)
        sampled = noise + mu
        return mu, std, sampled

    def decode(self, latent):
        return self.out(latent)
Example #3
0
class Critic(Model):
    def __init__(self, lr, hidden_units):
        super(Critic, self).__init__()
        self.opt = Adam(learning_rate=lr)
        self.encoder = VAE_Encoder(latent_num=64)
        # hidden
        self.hiddens = [
            Dense(unit, activation='relu', kernel_initializer='he_normal')
            for unit in hidden_units
        ]

        # output
        self.value = Dense(1,
                           kernel_initializer=tf.random_uniform_initializer(
                               minval=-3e-3, maxval=3e-3))

    def call(self, s):
        x = self.encoder.forward(s)
        for layer in self.hiddens:
            x = layer(x)
        value = self.value(x)
        return value
Example #4
0
class Actor(Model):
    def __init__(self, lr, hidden_units):
        super(Actor, self).__init__()
        self.opt = Adam(learning_rate=lr)
        self.encoder = VAE_Encoder(latent_num=64)
        # hidden
        self.hiddens = [
            Dense(unit, activation='relu', kernel_initializer='he_normal')
            for unit in hidden_units
        ]

        # output
        self.out = Dense(ACTION_NUM,
                         activation='softmax',
                         kernel_initializer=tf.random_uniform_initializer(
                             minval=-3e-3, maxval=3e-3))

    def call(self, s):
        x = self.encoder.forward(s)
        for layer in self.hiddens:
            x = layer(x)
        policy = self.out(x)
        return policy