Esempio n. 1
0
    def __init__(self, config=None, **kwargs):
        BaseDNNEncoder.__init__(self, config=config, **kwargs)

        # need square image input
        assert self.config.input_size[0] == self.config.input_size[
            1], "CedricEncoder needs a square image input size"

        # network architecture
        if self.config.hidden_channels is None:
            self.config.hidden_channels = 32
        hidden_channels = self.config.hidden_channels
        if self.config.hidden_dim is None:
            self.config.hidden_dim = 64
        hidden_dim = self.config.hidden_dim
        kernels_size = [5] * self.config.n_conv_layers
        strides = [2] * self.config.n_conv_layers
        pads = [0] * self.config.n_conv_layers
        dils = [1] * self.config.n_conv_layers

        # feature map size
        feature_map_sizes = conv2d_output_sizes(self.config.input_size,
                                                self.config.n_conv_layers,
                                                kernels_size, strides, pads,
                                                dils)

        # local feature
        self.local_feature_shape = (
            hidden_channels, feature_map_sizes[self.config.feature_layer][0],
            feature_map_sizes[self.config.feature_layer][1])
        self.lf = nn.Sequential()
        for conv_layer_id in range(self.config.feature_layer + 1):
            if conv_layer_id == 0:
                self.lf.add_module(
                    "conv_{}".format(0),
                    nn.Sequential(
                        nn.Conv2d(self.config.n_channels, hidden_channels,
                                  kernels_size[0]), nn.PReLU(),
                        nn.MaxPool2d(2, stride=strides[0])))
            else:
                self.lf.add_module(
                    "conv_{}".format(conv_layer_id),
                    nn.Sequential(
                        nn.Conv2d(hidden_channels // 2, hidden_channels,
                                  kernels_size[conv_layer_id]), nn.PReLU(),
                        nn.MaxPool2d(2, stride=strides[conv_layer_id])))
            hidden_channels *= 2
        self.lf.out_connection_type = ("conv", hidden_channels)

        # global feature
        self.gf = nn.Sequential()
        ## convolutional layers
        for conv_layer_id in range(self.config.feature_layer + 1,
                                   self.config.n_conv_layers):
            self.gf.add_module(
                "conv_{}".format(conv_layer_id),
                nn.Sequential(
                    nn.Conv2d(hidden_channels // 2, hidden_channels,
                              kernels_size[conv_layer_id]), nn.PReLU(),
                    nn.MaxPool2d(2, stride=strides[conv_layer_id])))
            hidden_channels *= 2
        self.gf.add_module("flatten", Flatten())
        ## linear layers
        h_after_convs, w_after_convs = feature_map_sizes[-1]
        self.gf.add_module(
            "lin_0",
            nn.Sequential(
                nn.Linear(hidden_channels // 2 * h_after_convs * w_after_convs,
                          hidden_dim), nn.PReLU()))

        # encoding feature
        if self.config.encoder_conditional_type == "gaussian":
            self.add_module("ef",
                            nn.Linear(hidden_dim, 2 * self.config.n_latents))
        elif self.config.encoder_conditional_type == "deterministic":
            self.add_module("ef", nn.Linear(hidden_dim, self.config.n_latents))
        else:
            raise ValueError(
                "The conditional type must be either gaussian or deterministic"
            )

        # attention feature
        if self.config.use_attention:
            self.add_module("af",
                            nn.Linear(hidden_dim, 4 * self.config.n_latents))
Esempio n. 2
0
    def __init__(self, config=None, **kwargs):
        BaseDNNDecoder.__init__(self, config=config, **kwargs)

        # network architecture
        # WARNING: incrementation order follow the encoder top-down order
        if self.config.hidden_channels is None:
            self.config.hidden_channels = 32
        hidden_channels = self.config.hidden_channels
        if self.config.hidden_dim is None:
            self.config.hidden_dim = 256
        hidden_dim = self.config.hidden_dim
        kernels_size = [4] * self.config.n_conv_layers
        strides = [2] * self.config.n_conv_layers
        pads = [1] * self.config.n_conv_layers
        dils = [1] * self.config.n_conv_layers
        feature_map_sizes = conv2d_output_sizes(self.config.input_size,
                                                self.config.n_conv_layers,
                                                kernels_size, strides, pads,
                                                dils)
        h_after_convs, w_after_convs = feature_map_sizes[-1]
        output_pads = [None] * self.config.n_conv_layers
        output_pads[0] = convtranspose2d_get_output_padding(
            feature_map_sizes[0], self.config.input_size, kernels_size[0],
            strides[0], pads[0])
        for conv_layer_id in range(1, self.config.n_conv_layers):
            output_pads[conv_layer_id] = convtranspose2d_get_output_padding(
                feature_map_sizes[conv_layer_id],
                feature_map_sizes[conv_layer_id - 1],
                kernels_size[conv_layer_id], strides[conv_layer_id],
                pads[conv_layer_id])

        # encoder feature inverse
        self.efi = nn.Sequential(nn.Linear(self.config.n_latents, hidden_dim),
                                 nn.ReLU())
        self.efi.out_connection_type = ("lin", hidden_dim)

        # global feature inverse
        ## linear layers
        self.gfi = nn.Sequential()
        self.gfi.add_module(
            "lin_1_i",
            nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU()))
        self.gfi.add_module(
            "lin_0_i",
            nn.Sequential(
                nn.Linear(hidden_dim,
                          hidden_channels * h_after_convs * w_after_convs),
                nn.ReLU()))
        self.gfi.add_module(
            "channelize",
            Channelize(hidden_channels, h_after_convs, w_after_convs))
        ## convolutional layers
        for conv_layer_id in range(self.config.n_conv_layers - 1,
                                   self.config.feature_layer + 1 - 1, -1):
            self.gfi.add_module(
                "conv_{}_i".format(conv_layer_id),
                nn.Sequential(
                    nn.ConvTranspose2d(
                        hidden_channels,
                        hidden_channels,
                        kernels_size[conv_layer_id],
                        strides[conv_layer_id],
                        pads[conv_layer_id],
                        output_padding=output_pads[conv_layer_id]), nn.ReLU()))
        self.gfi.out_connection_type = ("conv", hidden_channels)

        # local feature inverse
        self.lfi = nn.Sequential()
        for conv_layer_id in range(self.config.feature_layer + 1 - 1, 0, -1):
            self.lfi.add_module(
                "conv_{}_i".format(conv_layer_id),
                nn.Sequential(
                    nn.ConvTranspose2d(
                        hidden_channels,
                        hidden_channels,
                        kernels_size[conv_layer_id],
                        strides[conv_layer_id],
                        pads[conv_layer_id],
                        output_padding=output_pads[conv_layer_id]), nn.ReLU()))
        self.lfi.add_module(
            "conv_0_i",
            nn.ConvTranspose2d(hidden_channels,
                               self.config.n_channels,
                               kernels_size[0],
                               strides[0],
                               pads[0],
                               output_padding=output_pads[0]))
        self.lfi.out_connection_type = ("conv", self.config.n_channels)
Esempio n. 3
0
    def __init__(self, config=None, **kwargs):
        BaseDNNEncoder.__init__(self, config=config, **kwargs)

        # need square and power of 2 image size input
        power = math.log(self.config.input_size[0], 2)
        assert (power % 1 == 0.0) and (
            power > 3
        ), "Dumoulin Encoder needs a power of 2 as image input size (>=16)"
        assert self.config.input_size[0] == self.config.input_size[
            1], "Dumoulin Encoder needs a square image input size"

        assert self.config.n_conv_layers == power - 2, "The number of convolutional layers in DumoulinEncoder must be log(input_size, 2) - 2 "

        # network architecture
        if self.config.hidden_channels is None:
            self.config.hidden_channels = int(
                512 // math.pow(2, self.config.n_conv_layers))
        hidden_channels = self.config.hidden_channels
        kernels_size = [4, 4] * self.config.n_conv_layers
        strides = [1, 2] * self.config.n_conv_layers
        pads = [0, 1] * self.config.n_conv_layers
        dils = [1, 1] * self.config.n_conv_layers

        # feature map size
        feature_map_sizes = conv2d_output_sizes(self.config.input_size,
                                                2 * self.config.n_conv_layers,
                                                kernels_size, strides, pads,
                                                dils)

        # local feature
        ## convolutional layers
        self.local_feature_shape = (
            int(hidden_channels * math.pow(2, self.config.feature_layer + 1)),
            feature_map_sizes[2 * self.config.feature_layer + 1][0],
            feature_map_sizes[2 * self.config.feature_layer + 1][1])
        self.lf = nn.Sequential()
        for conv_layer_id in range(self.config.feature_layer + 1):
            if conv_layer_id == 0:
                self.lf.add_module(
                    "conv_{}".format(conv_layer_id),
                    nn.Sequential(
                        nn.Conv2d(self.config.n_channels, hidden_channels,
                                  kernels_size[2 * conv_layer_id],
                                  strides[2 * conv_layer_id],
                                  pads[2 * conv_layer_id],
                                  dils[2 * conv_layer_id]),
                        nn.BatchNorm2d(hidden_channels),
                        nn.LeakyReLU(inplace=True),
                        nn.Conv2d(hidden_channels, 2 * hidden_channels,
                                  kernels_size[2 * conv_layer_id + 1],
                                  strides[2 * conv_layer_id + 1],
                                  pads[2 * conv_layer_id + 1],
                                  dils[2 * conv_layer_id + 1]),
                        nn.BatchNorm2d(2 * hidden_channels),
                        nn.LeakyReLU(inplace=True)))
            else:
                self.lf.add_module(
                    "conv_{}".format(conv_layer_id),
                    nn.Sequential(
                        nn.Conv2d(hidden_channels, hidden_channels,
                                  kernels_size[2 * conv_layer_id],
                                  strides[2 * conv_layer_id],
                                  pads[2 * conv_layer_id],
                                  dils[2 * conv_layer_id]),
                        nn.BatchNorm2d(hidden_channels),
                        nn.LeakyReLU(inplace=True),
                        nn.Conv2d(hidden_channels, 2 * hidden_channels,
                                  kernels_size[2 * conv_layer_id + 1],
                                  strides[2 * conv_layer_id + 1],
                                  pads[2 * conv_layer_id + 1],
                                  dils[2 * conv_layer_id + 1]),
                        nn.BatchNorm2d(2 * hidden_channels),
                        nn.LeakyReLU(inplace=True)))
            hidden_channels *= 2
        self.lf.out_connection_type = ("conv", hidden_channels)

        # global feature
        self.gf = nn.Sequential()
        ## convolutional layers
        for conv_layer_id in range(self.config.feature_layer + 1,
                                   self.config.n_conv_layers):
            self.gf.add_module(
                "conv_{}".format(conv_layer_id),
                nn.Sequential(
                    nn.Conv2d(hidden_channels, hidden_channels,
                              kernels_size[2 * conv_layer_id],
                              strides[2 * conv_layer_id],
                              pads[2 * conv_layer_id],
                              dils[2 * conv_layer_id]),
                    nn.BatchNorm2d(hidden_channels),
                    nn.LeakyReLU(inplace=True),
                    nn.Conv2d(hidden_channels, 2 * hidden_channels,
                              kernels_size[2 * conv_layer_id + 1],
                              strides[2 * conv_layer_id + 1],
                              pads[2 * conv_layer_id + 1],
                              dils[2 * conv_layer_id + 1]),
                    nn.BatchNorm2d(2 * hidden_channels),
                    nn.LeakyReLU(inplace=True)))
            hidden_channels *= 2
        self.gf.out_connection_type = ("conv", hidden_channels)

        # encoding feature
        if self.config.encoder_conditional_type == "gaussian":
            self.add_module(
                "ef",
                nn.Sequential(
                    nn.Conv2d(hidden_channels,
                              hidden_channels,
                              kernel_size=1,
                              stride=1), nn.BatchNorm2d(hidden_channels),
                    nn.LeakyReLU(inplace=True),
                    nn.Conv2d(hidden_channels,
                              2 * self.config.n_latents,
                              kernel_size=1,
                              stride=1)))
        elif self.config.encoder_conditional_type == "deterministic":
            self.add_module(
                "ef",
                nn.Sequential(
                    nn.Conv2d(hidden_channels,
                              hidden_channels,
                              kernel_size=1,
                              stride=1), nn.BatchNorm2d(hidden_channels),
                    nn.LeakyReLU(inplace=True),
                    nn.Conv2d(hidden_channels,
                              self.config.n_latents,
                              kernel_size=1,
                              stride=1)))

        # attention feature
        if self.config.use_attention:
            self.add_module("af",
                            nn.Linear(hidden_dim, 4 * self.config.n_latents))
Esempio n. 4
0
    def __init__(self, config=None, **kwargs):
        BaseDNNDecoder.__init__(self, config=config, **kwargs)

        # need square and power of 2 image size input
        power = math.log(self.config.input_size[0], 2)
        assert (power % 1 == 0.0) and (
            power > 3
        ), "Dumoulin Encoder needs a power of 2 as image input size (>=16)"
        assert self.config.input_size[0] == self.config.input_size[
            1], "Dumoulin Encoder needs a square image input size"

        assert self.config.n_conv_layers == power - 2, "The number of convolutional layers in DumoulinEncoder must be log(input_size, 2) - 2 "

        # network architecture
        if self.config.hidden_channels is None:
            self.config.hidden_channels = int(
                512 // math.pow(2, self.config.n_conv_layers))
        hidden_channels = self.config.hidden_channels
        kernels_size = [4, 4] * self.config.n_conv_layers
        strides = [1, 2] * self.config.n_conv_layers
        pads = [0, 1] * self.config.n_conv_layers
        dils = [1, 1] * self.config.n_conv_layers

        feature_map_sizes = conv2d_output_sizes(self.config.input_size,
                                                2 * self.config.n_conv_layers,
                                                kernels_size, strides, pads,
                                                dils)
        output_pads = [None] * 2 * self.config.n_conv_layers
        output_pads[0] = convtranspose2d_get_output_padding(
            feature_map_sizes[0], self.config.input_size, kernels_size[0],
            strides[0], pads[0])
        output_pads[1] = convtranspose2d_get_output_padding(
            feature_map_sizes[1], feature_map_sizes[0], kernels_size[1],
            strides[1], pads[1])
        for conv_layer_id in range(1, self.config.n_conv_layers):
            output_pads[2 *
                        conv_layer_id] = convtranspose2d_get_output_padding(
                            feature_map_sizes[2 * conv_layer_id],
                            feature_map_sizes[2 * conv_layer_id - 1],
                            kernels_size[2 * conv_layer_id],
                            strides[2 * conv_layer_id],
                            pads[2 * conv_layer_id])
            output_pads[2 * conv_layer_id +
                        1] = convtranspose2d_get_output_padding(
                            feature_map_sizes[2 * conv_layer_id + 1],
                            feature_map_sizes[2 * conv_layer_id + 1 - 1],
                            kernels_size[2 * conv_layer_id + 1],
                            strides[2 * conv_layer_id + 1],
                            pads[2 * conv_layer_id + 1])

        # encoder feature inverse
        hidden_channels = int(hidden_channels *
                              math.pow(2, self.config.n_conv_layers))
        self.efi = nn.Sequential(
            nn.ConvTranspose2d(self.config.n_latents,
                               hidden_channels,
                               kernel_size=1,
                               stride=1), nn.BatchNorm2d(hidden_channels),
            nn.LeakyReLU(inplace=True),
            nn.ConvTranspose2d(hidden_channels,
                               hidden_channels,
                               kernel_size=1,
                               stride=1), nn.BatchNorm2d(hidden_channels),
            nn.LeakyReLU(inplace=True))
        self.efi.out_connection_type = ("conv", hidden_channels)

        # global feature inverse
        self.gfi = nn.Sequential()
        ## convolutional layers
        for conv_layer_id in range(self.config.n_conv_layers - 1,
                                   self.config.feature_layer + 1 - 1, -1):
            self.gfi.add_module(
                "conv_{}_i".format(conv_layer_id),
                nn.Sequential(
                    nn.ConvTranspose2d(
                        hidden_channels,
                        hidden_channels // 2,
                        kernels_size[2 * conv_layer_id + 1],
                        strides[2 * conv_layer_id + 1],
                        pads[2 * conv_layer_id + 1],
                        output_padding=output_pads[2 * conv_layer_id + 1]),
                    nn.BatchNorm2d(hidden_channels // 2),
                    nn.LeakyReLU(inplace=True),
                    nn.ConvTranspose2d(
                        hidden_channels // 2,
                        hidden_channels // 2,
                        kernels_size[2 * conv_layer_id],
                        strides[2 * conv_layer_id],
                        pads[2 * conv_layer_id],
                        output_padding=output_pads[2 * conv_layer_id]),
                    nn.BatchNorm2d(hidden_channels // 2),
                    nn.LeakyReLU(inplace=True),
                ))
            hidden_channels = hidden_channels // 2
        self.gfi.out_connection_type = ("conv", hidden_channels)

        # local feature inverse
        self.lfi = nn.Sequential()
        for conv_layer_id in range(self.config.feature_layer + 1 - 1, 0, -1):
            self.lfi.add_module(
                "conv_{}_i".format(conv_layer_id),
                nn.Sequential(
                    nn.ConvTranspose2d(
                        hidden_channels,
                        hidden_channels // 2,
                        kernels_size[2 * conv_layer_id + 1],
                        strides[2 * conv_layer_id + 1],
                        pads[2 * conv_layer_id + 1],
                        output_padding=output_pads[2 * conv_layer_id + 1]),
                    nn.BatchNorm2d(hidden_channels // 2),
                    nn.LeakyReLU(inplace=True),
                    nn.ConvTranspose2d(
                        hidden_channels // 2,
                        hidden_channels // 2,
                        kernels_size[2 * conv_layer_id],
                        strides[2 * conv_layer_id],
                        pads[2 * conv_layer_id],
                        output_padding=output_pads[2 * conv_layer_id]),
                    nn.BatchNorm2d(hidden_channels // 2),
                    nn.LeakyReLU(inplace=True),
                ))
            hidden_channels = hidden_channels // 2
        self.lfi.add_module(
            "conv_0_i",
            nn.Sequential(
                nn.ConvTranspose2d(hidden_channels,
                                   hidden_channels // 2,
                                   kernels_size[1],
                                   strides[1],
                                   pads[1],
                                   output_padding=output_pads[1]),
                nn.BatchNorm2d(hidden_channels // 2),
                nn.LeakyReLU(inplace=True),
                nn.ConvTranspose2d(hidden_channels // 2,
                                   self.config.n_channels,
                                   kernels_size[0],
                                   strides[0],
                                   pads[0],
                                   output_padding=output_pads[0]),
                nn.Sigmoid()))
        self.lfi.out_connection_type = ("conv", self.config.n_channels)