コード例 #1
0
 def __init__(self,
              in_dim,
              size,
              logstd=0.0,
              init_scale=1.0,
              init_bias=0.0):
     super(DiagGaussianPdType, self).__init__()
     self.in_dim = in_dim
     self.size = size
     self.fc = nn.Linear(in_dim, size)
     self.logstd = torch.Tensor([[logstd] * size])  # first dim for batch
     init_weight(self.fc, init_scale, init_bias)
コード例 #2
0
ファイル: models.py プロジェクト: KongCDY/baselines_pytorch
    def __init__(self,
                 env,
                 network,
                 hiddens=[256],
                 dueling=True,
                 layer_norm=False,
                 **network_kwargs):
        super(QNet, self).__init__()
        self.dueling = dueling
        self.num_actions = env.action_space.n
        if isinstance(network, str):
            self.base_net = get_network_builder(network)(
                env.observation_space.shape, **network_kwargs)
        else:
            self.base_net = network
        action_layers = []
        out_dim = self.base_net.out_dim
        for hidden in hiddens:
            action_layers.append(nn.Linear(out_dim, hidden))
            if layer_norm:
                action_layers.append(nn.LayerNorm(hidden))
            action_layers.append(nn.ReLU())
            out_dim = hidden
        action_layers.append(nn.Linear(out_dim, self.num_actions))
        self.action_layers = nn.Sequential(*action_layers)
        # init
        for m in self.action_layers.modules():
            init_weight(m, init_scale=np.sqrt(2.0))

        if dueling:
            state_layers = []
            out_dim = self.base_net.out_dim
            for hidden in hiddens:
                state_layers.append(nn.Linear(out_dim, hidden))
                if layer_norm:
                    state_layers.append(nn.LayerNorm(hidden))
                state_layers.append(nn.ReLU())
                out_dim = hidden
            state_layers.append(nn.Linear(out_dim, 1))
            self.state_layers = nn.Sequential(*state_layers)
            # init
            for m in self.state_layers.modules():
                init_weight(m, init_scale=np.sqrt(2.0))
コード例 #3
0
    def __init__(self, input_size, convs, **conv_kwargs):
        super(cnn_convs_only, self).__init__()
        in_dim = input_size[-1]
        layers = []
        for num_outputs, kernel_size, stride in convs:
            layers.append(
                nn.Conv2d(in_dim,
                          num_outputs,
                          kernel_size=kernel_size,
                          stride=stride))
            layers.append(nn.ReLU())
            in_dim = num_outputs
        self.convs = nn.Sequential(*layers)
        x = torch.zeros((1, *input_size))
        out = self.forward(x).view(1, -1)
        self.out_dim = out.size(1)

        # init
        for m in self.modules():
            init_weight(m, **conv_kwargs)
コード例 #4
0
    def __init__(self, input_size, **conv_kwargs):
        super(nature_cnn, self).__init__()
        """
        CNN from Nature paper.

        Args:
        input_size: (H, W, C)
        """
        in_dim = input_size[-1]
        self.convs = nn.Sequential(
            nn.Conv2d(in_dim, 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU(),
        )
        out_h = oSize(oSize(oSize(input_size[-3], 8, 4), 4, 2), 3)
        out_w = oSize(oSize(oSize(input_size[-2], 8, 4), 4, 2), 3)
        self.fc = nn.Sequential(
            nn.Linear(out_h * out_w * 64, 512),
            nn.ReLU(),
        )
        self.out_dim = 512

        # init
        for m in self.modules():
            init_weight(m, **conv_kwargs)
        init_weight(self.convs[0], init_scale=np.sqrt(2.0))
        init_weight(self.fc[0], init_scale=np.sqrt(2.0))
コード例 #5
0
    def __init__(self,
                 input_size,
                 num_layers=2,
                 num_hidden=64,
                 activation=nn.Tanh,
                 layer_norm=False):
        super(nature_mlp, self).__init__()
        """
        Stack of fully-connected layers to be used in a policy / q-function approximator
        Parameters:
        ----------
        input_size: (int, )             input size, use env.observation_space.shape
        num_layers: int                 number of fully-connected layers (default: 2)
        num_hidden: int                 size of fully-connected layers (default: 64)
        activation:                     activation function (default: nn.Tanh)

        Returns:
        -------
        fully connected network model
        """
        in_dim = input_size[0]
        self.out_dim = num_hidden
        layers = []

        layers.append(nn.Linear(in_dim, num_hidden))
        if layer_norm:
            layers.append(nn.LayerNorm(num_hidden))
        layers.append(activation())

        for i in range(1, num_layers):
            layers.append(nn.Linear(num_hidden, num_hidden))
            if layer_norm:
                layers.append(nn.LayerNorm(num_hidden))
            layers.append(activation())
        self.layers = nn.Sequential(*layers)

        # init
        for m in self.modules():
            init_weight(m, init_scale=np.sqrt(2.0))
コード例 #6
0
    def __init__(self,
                 env,
                 latent,
                 estimate_q=False,
                 vf_latent=None,
                 **tensors):
        super(PolicyWithValue, self).__init__()
        """
        Parameters:
        ----------
        env             RL environment
        latent          latent state from which policy distribution parameters should be inferred
        vf_latent       latent state from which value function should be inferred (if None, then latent is used)
        **tensors       tensorflow tensors for additional attributes such as state or mask
        """

        self.state = None
        self.initial_state = None
        self.__dict__.update(tensors)

        self.latent = latent
        self.vf_latent = vf_latent if vf_latent is not None else latent

        # Based on the action space, will select what probability distribution type
        self.pdtype = make_pdtype(self.latent.out_dim,
                                  env.action_space,
                                  init_scale=0.01)

        if estimate_q:
            assert isinstance(env.action_space, gym.spaces.Discrete)
            self.vf = nn.Linear(self.latent.out_dim, env.action_space.n)
            self.q = self.vf
        else:
            self.vf = nn.Linear(self.latent.out_dim, 1)

        # init weight
        torch_utils.init_weight(self.vf)
コード例 #7
0
 def __init__(self, in_dim, ncat, init_scale=1.0, init_bias=0.0):
     super(CategoricalPdType, self).__init__()
     self.in_dim = in_dim
     self.ncat = ncat
     self.fc = nn.Linear(in_dim, ncat)
     init_weight(self.fc, init_scale, init_bias)