Python MLP.repeat Examples

Programming Language: Python

Namespace/Package Name: rls.nn.mlps

Class/Type: MLP

Method/Function: repeat

Examples at hotexamples.com: 2

Python MLP.repeat - 2 examples found. These are the top rated real world Python examples of rls.nn.mlps.MLP.repeat extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MLP(22)

repeat(2)

Frequently Used Methods

MLP (22)

repeat (2)

Example #1

Show file

class ActorMuLogstd(BaseModel):
    """
    use for PPO/PG algorithms' actor network.
    input: vector of state
    output: [stochastic action(mu), log of std]
    """
    def __init__(self, obs_spec, rep_net_params, output_shape,
                 network_settings):
        super().__init__(obs_spec, rep_net_params)
        self.condition_sigma = network_settings['condition_sigma']
        self.log_std_min, self.log_std_max = network_settings['log_std_bound']

        self.share = MLP(self.rep_net.h_dim, network_settings['hidden_units'])
        if network_settings['hidden_units']:
            ins = network_settings['hidden_units'][-1]
        else:
            ins = self.rep_net.h_dim
        self.mu = MLP(ins, [], output_shape=output_shape, out_act='tanh')
        if self.condition_sigma:
            self.log_std = MLP(ins, [], output_shape=output_shape)
        else:
            self.log_std = nn.Parameter(-0.5 * th.ones(output_shape))

    def forward(self, x, **kwargs):
        x = self.repre(x, **kwargs)
        x = self.share(x)
        mu = self.mu(x)
        if self.condition_sigma:
            log_std = self.log_std(x)  # [T, B, *] or [B, *]
        else:
            # TODO:
            log_std = self.log_std.repeat(mu.shape[:-1] +
                                          (1, ))  # [T, B, *] or [B, *]
        log_std = log_std.clamp(self.log_std_min, self.log_std_max)
        return mu, log_std

Example #2

Show file

class ActorCriticValueCts(BaseModel):
    """
    combine actor network and critic network, share some nn layers. use for continuous action space.
    input: vector of state
    output: mean(mu) of Gaussian Distribution of actions given a state, v(s)
    """
    def __init__(self, obs_spec, rep_net_params, output_shape,
                 network_settings):
        super().__init__(obs_spec, rep_net_params)
        self.condition_sigma = network_settings['condition_sigma']
        self.log_std_min, self.log_std_max = network_settings['log_std_bound']

        self.share = MLP(self.rep_net.h_dim, network_settings['share'])
        if network_settings['share']:
            ins = network_settings['share'][-1]
        else:
            ins = self.rep_net.h_dim
        self.mu_logstd_share = MLP(ins, network_settings['mu'])
        self.v = MLP(ins, network_settings['v'], output_shape=1)
        if network_settings['mu']:
            ins = network_settings['mu'][-1]
        self.mu = MLP(ins, [], output_shape=output_shape, out_act='tanh')
        if self.condition_sigma:
            self.log_std = MLP(ins, [], output_shape=output_shape)
        else:
            self.log_std = nn.Parameter(-0.5 * th.ones(output_shape))

    def forward(self, x, **kwargs):
        x = self.repre(x, **kwargs)
        x = self.share(x)
        v = self.v(x)
        x_mu_logstd = self.mu_logstd_share(x)
        mu = self.mu(x_mu_logstd)
        if self.condition_sigma:
            log_std = self.log_std(x_mu_logstd)  # [T, B, *] or [B, *]
        else:
            log_std = self.log_std.repeat(mu.shape[:-1] +
                                          (1, ))  # [T, B, *] or [B, *]
        log_std = log_std.clamp(self.log_std_min, self.log_std_max)
        return mu, log_std, v