Ejemplo n.º 1
0
    def __init__(
        self,
        input_size: int,
        action_size: int,
        layers: int,
        units: int,
        dist: str = "tanh_normal",
        act: ActFunc = None,
        min_std: float = 1e-4,
        init_std: float = 5.0,
        mean_scale: float = 5.0,
    ):
        """Initializes Policy

        Args:
            input_size (int): Input size to network
            action_size (int): Action space size
            layers (int): Number of layers in network
            units (int): Size of the hidden layers
            dist (str): Output distribution, with tanh_normal implemented
            act (Any): Activation function
            min_std (float): Minimum std for output distribution
            init_std (float): Intitial std
            mean_scale (float): Augmenting mean output from FC network
        """
        super().__init__()
        self.layrs = layers
        self.units = units
        self.dist = dist
        self.act = act
        if not act:
            self.act = nn.ReLU
        self.min_std = min_std
        self.init_std = init_std
        self.mean_scale = mean_scale
        self.action_size = action_size

        self.layers = []
        self.softplus = nn.Softplus()

        # MLP Construction
        cur_size = input_size
        for _ in range(self.layrs):
            self.layers.extend([Linear(cur_size, self.units), self.act()])
            cur_size = self.units
        if self.dist == "tanh_normal":
            self.layers.append(Linear(cur_size, 2 * action_size))
        elif self.dist == "onehot":
            self.layers.append(Linear(cur_size, action_size))
        self.model = nn.Sequential(*self.layers)
Ejemplo n.º 2
0
    def __init__(
        self,
        input_size: int,
        depth: int = 32,
        act: ActFunc = None,
        shape: Tuple[int] = (3, 64, 64),
    ):
        """Initializes a ConvDecoder instance.

        Args:
            input_size (int): Input size, usually feature size output from
                RSSM.
            depth (int): Number of channels in the first conv layer
            act (Any): Activation for Encoder, default ReLU
            shape (List): Shape of observation input
        """
        super().__init__()
        self.act = act
        if not act:
            self.act = nn.ReLU
        self.depth = depth
        self.shape = shape

        self.layers = [
            Linear(input_size, 32 * self.depth),
            Reshape([-1, 32 * self.depth, 1, 1]),
            ConvTranspose2d(32 * self.depth, 4 * self.depth, 5, stride=2),
            self.act(),
            ConvTranspose2d(4 * self.depth, 2 * self.depth, 5, stride=2),
            self.act(),
            ConvTranspose2d(2 * self.depth, self.depth, 6, stride=2),
            self.act(),
            ConvTranspose2d(self.depth, self.shape[0], 6, stride=2),
        ]
        self.model = nn.Sequential(*self.layers)
Ejemplo n.º 3
0
    def __init__(
        self,
        action_size: int,
        embed_size: int,
        stoch: int = 30,
        deter: int = 200,
        hidden: int = 200,
        act: ActFunc = None,
    ):
        """Initializes RSSM

        Args:
            action_size (int): Action space size
            embed_size (int): Size of ConvEncoder embedding
            stoch (int): Size of the distributional hidden state
            deter (int): Size of the deterministic hidden state
            hidden (int): General size of hidden layers
            act (Any): Activation function
        """
        super().__init__()
        self.stoch_size = stoch
        self.deter_size = deter
        self.hidden_size = hidden
        self.act = act
        if act is None:
            self.act = nn.ELU

        self.obs1 = Linear(embed_size + deter, hidden)
        self.obs2 = Linear(hidden, 2 * stoch)

        self.cell = GRUCell(self.hidden_size, hidden_size=self.deter_size)
        self.img1 = Linear(stoch + action_size, hidden)
        self.img2 = Linear(deter, hidden)
        self.img3 = Linear(hidden, 2 * stoch)

        self.softplus = nn.Softplus

        self.device = (
            torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        )
Ejemplo n.º 4
0
    def __init__(
        self,
        input_size: int,
        output_size: int,
        layers: int,
        units: int,
        dist: str = "normal",
        act: ActFunc = None,
    ):
        """Initializes FC network

        Args:
            input_size (int): Input size to network
            output_size (int): Output size to network
            layers (int): Number of layers in network
            units (int): Size of the hidden layers
            dist (str): Output distribution, parameterized by FC output
                logits.
            act (Any): Activation function
        """
        super().__init__()
        self.layrs = layers
        self.units = units
        self.act = act
        if not act:
            self.act = nn.ELU
        self.dist = dist
        self.input_size = input_size
        self.output_size = output_size
        self.layers = []
        cur_size = input_size
        for _ in range(self.layrs):
            self.layers.extend([Linear(cur_size, self.units), self.act()])
            cur_size = units
        self.layers.append(Linear(cur_size, output_size))
        self.model = nn.Sequential(*self.layers)