Ejemplo n.º 1
0
    def __init__(self, input_shape, use_vae, reward_prediction_bits):
        super(DecoderModule, self).__init__()
        self.use_vae = use_vae
        reward_head_conv = nn.Conv2d(in_channels=input_shape[0], out_channels=24, kernel_size=3, stride=1)
        reward_head_linear_dims = get_linear_dims_after_conv([reward_head_conv], (input_shape[1], input_shape[2]))
        self.reward_head = nn.Sequential(
            reward_head_conv,
            nn.ReLU(),
            Flatten(),
            nn.Linear(in_features=reward_head_linear_dims, out_features=reward_prediction_bits)
        )

        if self.use_vae:
            #state input channels + channels for z
            input_channels = input_shape[0] + input_shape[0]
        else:
            input_channels = input_shape[0]

        image_head_conv1_output_channels = 64
        image_head_d2s1_block_size = 2
        image_head_conv2_input_channels = image_head_conv1_output_channels/(pow(image_head_d2s1_block_size, 2))
        self.image_head = nn.Sequential(
            ConvStack(input_channels=input_channels, kernel_sizes=(1, 5, 3), output_channels=(32, 32, image_head_conv1_output_channels)),
            DepthToSpace(block_size=image_head_d2s1_block_size),
            ConvStack(input_channels=image_head_conv2_input_channels, kernel_sizes=(3, 3, 1), output_channels=(64, 64, 48)),
            DepthToSpace(block_size=4)
        )
        self.sigmoid = torch.nn.Sigmoid()
Ejemplo n.º 2
0
    def __init__(
        self, obs_shape, action_space, use_cuda
    ):  #use_cuda is not used and for compatibility reasons (I2A needs the use_cuda parameter)
        super(AtariModel, self).__init__()
        from i2a.utils import get_linear_dims_after_conv

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        input_channels = obs_shape[0]
        input_dims = obs_shape[1:]

        self.conv1 = init_(
            nn.Conv2d(input_channels, 16, kernel_size=3, stride=1, padding=0))
        self.conv2 = init_(
            nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=0))
        self.conv3 = init_(
            nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=0))
        self.conv4 = init_(
            nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=0))

        self.linear_input_size = get_linear_dims_after_conv(
            [self.conv1, self.conv2, self.conv3, self.conv4], input_dims)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.linear1 = init_(nn.Linear(self.linear_input_size, 256))

        self.critic_linear = init_(nn.Linear(256, 1))
        self.actor_linear = init_(nn.Linear(256, action_space))

        self.train()
    def __init__(self, obs_shape, num_outputs=512):
        super(LatentSpaceModelFreeNetwork, self).__init__()
        self._output_size = num_outputs

        input_channels = obs_shape[0]
        input_dims = obs_shape[1:]

        self.conv1 = nn.Conv2d(input_channels, 16, kernel_size=3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=0)
        self.conv3 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=0)
        self.conv4 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=0)

        linear_input_size = get_linear_dims_after_conv([self.conv1, self.conv2, self.conv3, self.conv4], input_dims)
        self.fc = nn.Linear(linear_input_size, num_outputs)
    def __init__(self, obs_shape, action_space):
        super(RolloutPolicy, self).__init__()

        input_channels = obs_shape[0]
        input_dims = obs_shape[1:]

        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, stride=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1)
        self.conv3 = nn.Conv2d(64, 32, kernel_size=3, stride=1)

        self.linear_input_size = get_linear_dims_after_conv(
            [self.conv1, self.conv2, self.conv3], input_dims)

        self.linear1 = nn.Linear(self.linear_input_size, 256)

        self.critic_linear = nn.Linear(256, 1)
        self.actor_linear = nn.Linear(256, action_space)

        self.apply(xavier_weights_init_relu)
        self.train()
Ejemplo n.º 5
0
    def __init__(
        self, obs_shape, action_space, use_cuda
    ):  #use_cuda is not used and for compatibility reasons (I2A needs the use_cuda parameter)
        super(I2A_MiniModel, self).__init__()

        input_channels = obs_shape[0]
        input_dims = obs_shape[1:]

        self.conv1 = nn.Conv2d(input_channels, 16, 3, stride=1)  #17x17
        self.conv2 = nn.Conv2d(16, 16, 3, stride=2)  #8x8

        self.linear_input_size = get_linear_dims_after_conv(
            [self.conv1, self.conv2], input_dims)

        self.linear1 = nn.Linear(self.linear_input_size, 256)

        self.critic_linear = nn.Linear(256, 1)
        self.actor_linear = nn.Linear(256, action_space)

        self.apply(xavier_weights_init)
        self.train()