Esempio n. 1
0
    def __init__(self, config={}, **kwargs):
        MEEncoder.__init__(self, config=config, **kwargs)

        # need square and power of 2 image size input
        power = math.log(self.config.input_size[0], 2)
        assert (power % 1 == 0.0) and (
            power > 3
        ), "Dumoulin Encoder needs a power of 2 as image input size (>=16)"
        # need square image input
        assert torch.all(
            torch.tensor([
                self.config.input_size[i] == self.config.input_size[0]
                for i in range(1, len(self.config.input_size))
            ])), "Dumoulin Encoder needs a square image input size"

        assert self.config.n_conv_layers == power, "The number of convolutional layers in DumoulinEncoder must be log2(input_size) "

        # network architecture
        if self.config.hidden_channels is None:
            self.config.hidden_channels = 8

        hidden_channels = self.config.hidden_channels
        kernels_size = [4, 4] * self.config.n_conv_layers
        strides = [1, 2] * self.config.n_conv_layers
        pads = [0, 1] * self.config.n_conv_layers
        dils = [1, 1] * self.config.n_conv_layers

        # feature map size
        feature_map_sizes = conv_output_sizes(self.config.input_size,
                                              2 * self.config.n_conv_layers,
                                              kernels_size, strides, pads,
                                              dils)

        # local feature
        ## convolutional layers
        self.local_feature_shape = (
            int(hidden_channels * math.pow(2, self.config.feature_layer + 1)),
            feature_map_sizes[2 * self.config.feature_layer + 1][0],
            feature_map_sizes[2 * self.config.feature_layer + 1][1])
        self.lf = nn.Sequential()

        for conv_layer_id in range(self.config.feature_layer + 1):
            if conv_layer_id == 0:
                self.lf.add_module(
                    "conv_{}".format(conv_layer_id),
                    nn.Sequential(
                        ME.MinkowskiConvolution(
                            self.config.n_channels,
                            hidden_channels,
                            kernel_size=kernels_size[2 * conv_layer_id],
                            stride=strides[2 * conv_layer_id],
                            dilation=dils[2 * conv_layer_id],
                            dimension=self.spatial_dims,
                            bias=True),
                        ME.MinkowskiBatchNorm(hidden_channels),
                        ME.MinkowskiELU(inplace=True),
                        ME.MinkowskiConvolution(
                            hidden_channels,
                            2 * hidden_channels,
                            kernel_size=kernels_size[2 * conv_layer_id + 1],
                            stride=strides[2 * conv_layer_id + 1],
                            dilation=dils[2 * conv_layer_id + 1],
                            dimension=self.spatial_dims,
                            bias=True),
                        ME.MinkowskiBatchNorm(2 * hidden_channels),
                        ME.MinkowskiELU(inplace=True),
                    ))
            else:
                self.lf.add_module(
                    "conv_{}".format(conv_layer_id),
                    nn.Sequential(
                        ME.MinkowskiConvolution(
                            hidden_channels,
                            hidden_channels,
                            kernel_size=kernels_size[2 * conv_layer_id],
                            stride=strides[2 * conv_layer_id],
                            dilation=dils[2 * conv_layer_id],
                            dimension=self.spatial_dims,
                            bias=True),
                        ME.MinkowskiBatchNorm(hidden_channels),
                        ME.MinkowskiELU(inplace=True),
                        ME.MinkowskiConvolution(
                            hidden_channels,
                            2 * hidden_channels,
                            kernel_size=kernels_size[2 * conv_layer_id + 1],
                            stride=strides[2 * conv_layer_id + 1],
                            dilation=dils[2 * conv_layer_id + 1],
                            dimension=self.spatial_dims,
                            bias=True),
                        ME.MinkowskiBatchNorm(2 * hidden_channels),
                        ME.MinkowskiELU(inplace=True),
                    ))
            hidden_channels *= 2
        self.lf.out_connection_type = ("conv", hidden_channels)

        # global feature
        self.gf = nn.Sequential()
        ## convolutional layers
        for conv_layer_id in range(self.config.feature_layer + 1,
                                   self.config.n_conv_layers):
            self.gf.add_module(
                "conv_{}".format(conv_layer_id),
                nn.Sequential(
                    ME.MinkowskiConvolution(
                        hidden_channels,
                        hidden_channels,
                        kernel_size=kernels_size[2 * conv_layer_id],
                        stride=strides[2 * conv_layer_id],
                        dilation=dils[2 * conv_layer_id],
                        dimension=self.spatial_dims,
                        bias=True),
                    ME.MinkowskiBatchNorm(hidden_channels),
                    ME.MinkowskiELU(inplace=True),
                    ME.MinkowskiConvolution(
                        hidden_channels,
                        2 * hidden_channels,
                        kernel_size=kernels_size[2 * conv_layer_id + 1],
                        stride=strides[2 * conv_layer_id + 1],
                        dilation=dils[2 * conv_layer_id + 1],
                        dimension=self.spatial_dims,
                        bias=True),
                    ME.MinkowskiBatchNorm(2 * hidden_channels),
                    ME.MinkowskiELU(inplace=True),
                ))
            hidden_channels *= 2
        self.gf.out_connection_type = ("conv", hidden_channels)

        # encoding feature
        if self.config.encoder_conditional_type == "gaussian":
            self.add_module(
                "ef",
                nn.Sequential(
                    ME.MinkowskiConvolution(hidden_channels,
                                            hidden_channels,
                                            kernel_size=1,
                                            stride=1,
                                            dilation=1,
                                            dimension=self.spatial_dims,
                                            bias=True),
                    ME.MinkowskiBatchNorm(hidden_channels),
                    ME.MinkowskiELU(inplace=True),
                    ME.MinkowskiConvolution(hidden_channels,
                                            2 * self.config.n_latents,
                                            kernel_size=1,
                                            stride=1,
                                            dilation=1,
                                            dimension=self.spatial_dims,
                                            bias=True),
                ))
        elif self.config.encoder_conditional_type == "deterministic":
            self.add_module(
                "ef",
                nn.Sequential(
                    ME.MinkowskiConvolution(hidden_channels,
                                            hidden_channels,
                                            kernel_size=1,
                                            stride=1,
                                            dilation=1,
                                            dimension=self.spatial_dims,
                                            bias=True),
                    ME.MinkowskiBatchNorm(hidden_channels),
                    ME.MinkowskiELU(inplace=True),
                    ME.MinkowskiConvolution(hidden_channels,
                                            self.config.n_latents,
                                            kernel_size=1,
                                            stride=1,
                                            dilation=1,
                                            dimension=self.spatial_dims,
                                            bias=True),
                ))

        # global pool
        self.global_pool = ME.MinkowskiGlobalPooling()

        # attention feature
        if self.config.use_attention:
            self.add_module(
                "af",
                ME.MinkowskiConvolution(hidden_channels,
                                        4 * self.config.n_latents,
                                        kernel_size=1,
                                        stride=1,
                                        dilation=1,
                                        dimension=self.spatial_dims,
                                        bias=True))
Esempio n. 2
0
    def __init__(self, config={}, **kwargs):
        Decoder.__init__(self, config=config, **kwargs)

        # network architecture
        # WARNING: incrementation order follow the encoder top-down order
        if self.config.hidden_channels is None:
            self.config.hidden_channels = 32
        hidden_channels = self.config.hidden_channels
        if self.config.hidden_dim is None:
            self.config.hidden_dim = 256
        hidden_dim = self.config.hidden_dim
        kernels_size = [4] * self.config.n_conv_layers
        strides = [2] * self.config.n_conv_layers
        pads = [1] * self.config.n_conv_layers
        dils = [1] * self.config.n_conv_layers
        feature_map_sizes = conv_output_sizes(self.config.input_size,
                                              self.config.n_conv_layers,
                                              kernels_size, strides, pads,
                                              dils)
        n_linear_in = hidden_channels * torch.prod(
            torch.tensor(feature_map_sizes[-1])).item()
        output_pads = [None] * self.config.n_conv_layers
        output_pads[0] = convtranspose_get_output_padding(
            feature_map_sizes[0], self.config.input_size, kernels_size[0],
            strides[0], pads[0])
        for conv_layer_id in range(1, self.config.n_conv_layers):
            output_pads[conv_layer_id] = convtranspose_get_output_padding(
                feature_map_sizes[conv_layer_id],
                feature_map_sizes[conv_layer_id - 1],
                kernels_size[conv_layer_id], strides[conv_layer_id],
                pads[conv_layer_id])

        # encoder feature inverse
        self.efi = nn.Sequential(nn.Linear(self.config.n_latents, hidden_dim),
                                 nn.ReLU())
        self.efi.out_connection_type = ("lin", hidden_dim)

        # global feature inverse
        ## linear layers
        self.gfi = nn.Sequential()
        self.gfi.add_module(
            "lin_1_i",
            nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU()))
        self.gfi.add_module(
            "lin_0_i",
            nn.Sequential(nn.Linear(hidden_dim, n_linear_in), nn.ReLU()))
        self.gfi.add_module("channelize",
                            Channelize(hidden_channels, feature_map_sizes[-1]))
        ## convolutional layers
        for conv_layer_id in range(self.config.n_conv_layers - 1,
                                   self.config.feature_layer + 1 - 1, -1):
            self.gfi.add_module(
                "conv_{}_i".format(conv_layer_id),
                nn.Sequential(
                    self.convtranspose_module(
                        hidden_channels,
                        hidden_channels,
                        kernels_size[conv_layer_id],
                        strides[conv_layer_id],
                        pads[conv_layer_id],
                        output_padding=output_pads[conv_layer_id]), nn.ReLU()))
        self.gfi.out_connection_type = ("conv", hidden_channels)

        # local feature inverse
        self.lfi = nn.Sequential()
        for conv_layer_id in range(self.config.feature_layer + 1 - 1, 0, -1):
            self.lfi.add_module(
                "conv_{}_i".format(conv_layer_id),
                nn.Sequential(
                    self.convtranspose_module(
                        hidden_channels,
                        hidden_channels,
                        kernels_size[conv_layer_id],
                        strides[conv_layer_id],
                        pads[conv_layer_id],
                        output_padding=output_pads[conv_layer_id]), nn.ReLU()))
        self.lfi.add_module(
            "conv_0_i",
            self.convtranspose_module(hidden_channels,
                                      self.config.n_channels,
                                      kernels_size[0],
                                      strides[0],
                                      pads[0],
                                      output_padding=output_pads[0]))
        self.lfi.out_connection_type = ("conv", self.config.n_channels)
Esempio n. 3
0
    def __init__(self, config={}, **kwargs):
        Encoder.__init__(self, config=config, **kwargs)

        # need square image input
        assert torch.all(
            torch.tensor([
                self.config.input_size[i] == self.config.input_size[0]
                for i in range(1, len(self.config.input_size))
            ])), "CedricEncoder needs a square image input size"

        # network architecture
        if self.config.hidden_channels is None:
            self.config.hidden_channels = 32
        hidden_channels = self.config.hidden_channels
        if self.config.hidden_dim is None:
            self.config.hidden_dim = 64
        hidden_dim = self.config.hidden_dim
        kernels_size = [5] * self.config.n_conv_layers
        strides = [2] * self.config.n_conv_layers
        pads = [0] * self.config.n_conv_layers
        dils = [1] * self.config.n_conv_layers

        # feature map size
        feature_map_sizes = conv_output_sizes(self.config.input_size,
                                              self.config.n_conv_layers,
                                              kernels_size, strides, pads,
                                              dils)

        # local feature
        self.local_feature_shape = (
            hidden_channels, feature_map_sizes[self.config.feature_layer][0],
            feature_map_sizes[self.config.feature_layer][1])
        self.lf = nn.Sequential()
        for conv_layer_id in range(self.config.feature_layer + 1):
            if conv_layer_id == 0:
                self.lf.add_module(
                    "conv_{}".format(0),
                    nn.Sequential(
                        self.conv_module(self.config.n_channels,
                                         hidden_channels, kernels_size[0]),
                        nn.PReLU(), self.maxpool_module(2, stride=strides[0])))
            else:
                self.lf.add_module(
                    "conv_{}".format(conv_layer_id),
                    nn.Sequential(
                        self.conv_module(hidden_channels // 2, hidden_channels,
                                         kernels_size[conv_layer_id]),
                        nn.PReLU(),
                        self.maxpool_module(2, stride=strides[conv_layer_id])))
            hidden_channels *= 2
        self.lf.out_connection_type = ("conv", hidden_channels)

        # global feature
        self.gf = nn.Sequential()
        ## convolutional layers
        for conv_layer_id in range(self.config.feature_layer + 1,
                                   self.config.n_conv_layers):
            self.gf.add_module(
                "conv_{}".format(conv_layer_id),
                nn.Sequential(
                    self.conv_module(hidden_channels // 2, hidden_channels,
                                     kernels_size[conv_layer_id]), nn.PReLU(),
                    self.maxpool_module(2, stride=strides[conv_layer_id])))
            hidden_channels *= 2
        self.gf.add_module("flatten", Flatten())
        ## linear layers
        n_linear_in = hidden_channels // 2 * torch.prod(
            torch.tensor(feature_map_sizes[-1])).item()
        self.gf.add_module(
            "lin_0",
            nn.Sequential(nn.Linear(n_linear_in, hidden_dim), nn.PReLU()))

        # encoding feature
        if self.config.encoder_conditional_type == "gaussian":
            self.add_module("ef",
                            nn.Linear(hidden_dim, 2 * self.config.n_latents))
        elif self.config.encoder_conditional_type == "deterministic":
            self.add_module("ef", nn.Linear(hidden_dim, self.config.n_latents))
        else:
            raise ValueError(
                "The conditional type must be either gaussian or deterministic"
            )

        # attention feature
        if self.config.use_attention:
            self.add_module("af",
                            nn.Linear(hidden_dim, 4 * self.config.n_latents))
Esempio n. 4
0
    def __init__(self, config={}, **kwargs):
        Decoder.__init__(self, config=config, **kwargs)

        # need square and power of 2 image size input
        power = math.log(self.config.input_size[0], 2)
        assert (power % 1 == 0.0) and (
            power > 3
        ), "Dumoulin Encoder needs a power of 2 as image input size (>=16)"
        assert self.config.input_size[0] == self.config.input_size[
            1], "Dumoulin Encoder needs a square image input size"

        assert self.config.n_conv_layers == power - 2, "The number of convolutional layers in DumoulinEncoder must be log(input_size, 2) - 2 "

        # network architecture
        # encoder feature inverse
        hidden_channels = int(self.config.hidden_channels *
                              math.pow(2, self.config.n_conv_layers))
        kernels_size = [4, 4] * self.config.n_conv_layers
        strides = [1, 2] * self.config.n_conv_layers
        pads = [0, 1] * self.config.n_conv_layers
        dils = [1, 1] * self.config.n_conv_layers

        feature_map_sizes = conv_output_sizes(self.config.input_size,
                                              2 * self.config.n_conv_layers,
                                              kernels_size, strides, pads,
                                              dils)
        output_pads = [None] * 2 * self.config.n_conv_layers
        output_pads[0] = convtranspose_get_output_padding(
            feature_map_sizes[0], self.config.input_size, kernels_size[0],
            strides[0], pads[0])
        output_pads[1] = convtranspose_get_output_padding(
            feature_map_sizes[1], feature_map_sizes[0], kernels_size[1],
            strides[1], pads[1])
        for conv_layer_id in range(1, self.config.n_conv_layers):
            output_pads[2 * conv_layer_id] = convtranspose_get_output_padding(
                feature_map_sizes[2 * conv_layer_id],
                feature_map_sizes[2 * conv_layer_id - 1],
                kernels_size[2 * conv_layer_id], strides[2 * conv_layer_id],
                pads[2 * conv_layer_id])
            output_pads[2 * conv_layer_id +
                        1] = convtranspose_get_output_padding(
                            feature_map_sizes[2 * conv_layer_id + 1],
                            feature_map_sizes[2 * conv_layer_id + 1 - 1],
                            kernels_size[2 * conv_layer_id + 1],
                            strides[2 * conv_layer_id + 1],
                            pads[2 * conv_layer_id + 1])

        # encoder feature inverse
        self.efi = nn.Sequential(
            self.convtranspose_module(self.config.n_latents,
                                      hidden_channels,
                                      kernel_size=1,
                                      stride=1),
            self.batchnorm_module(hidden_channels), nn.LeakyReLU(inplace=True),
            self.convtranspose_module(hidden_channels,
                                      hidden_channels,
                                      kernel_size=1,
                                      stride=1),
            self.batchnorm_module(hidden_channels), nn.LeakyReLU(inplace=True))
        self.efi.out_connection_type = ("conv", hidden_channels)

        # global feature inverse
        self.gfi = nn.Sequential()
        ## convolutional layers
        for conv_layer_id in range(self.config.n_conv_layers - 1,
                                   self.config.feature_layer + 1 - 1, -1):
            self.gfi.add_module(
                "conv_{}_i".format(conv_layer_id),
                nn.Sequential(
                    self.convtranspose_module(
                        hidden_channels,
                        hidden_channels // 2,
                        kernels_size[2 * conv_layer_id + 1],
                        strides[2 * conv_layer_id + 1],
                        pads[2 * conv_layer_id + 1],
                        output_padding=output_pads[2 * conv_layer_id + 1]),
                    self.batchnorm_module(hidden_channels // 2),
                    nn.LeakyReLU(inplace=True),
                    self.convtranspose_module(
                        hidden_channels // 2,
                        hidden_channels // 2,
                        kernels_size[2 * conv_layer_id],
                        strides[2 * conv_layer_id],
                        pads[2 * conv_layer_id],
                        output_padding=output_pads[2 * conv_layer_id]),
                    self.batchnorm_module(hidden_channels // 2),
                    nn.LeakyReLU(inplace=True),
                ))
            hidden_channels = hidden_channels // 2
        self.gfi.out_connection_type = ("conv", hidden_channels)

        # local feature inverse
        self.lfi = nn.Sequential()
        for conv_layer_id in range(self.config.feature_layer + 1 - 1, 0, -1):
            self.lfi.add_module(
                "conv_{}_i".format(conv_layer_id),
                nn.Sequential(
                    self.convtranspose_module(
                        hidden_channels,
                        hidden_channels // 2,
                        kernels_size[2 * conv_layer_id + 1],
                        strides[2 * conv_layer_id + 1],
                        pads[2 * conv_layer_id + 1],
                        output_padding=output_pads[2 * conv_layer_id + 1]),
                    self.batchnorm_module(hidden_channels // 2),
                    nn.LeakyReLU(inplace=True),
                    self.convtranspose_module(
                        hidden_channels // 2,
                        hidden_channels // 2,
                        kernels_size[2 * conv_layer_id],
                        strides[2 * conv_layer_id],
                        pads[2 * conv_layer_id],
                        output_padding=output_pads[2 * conv_layer_id]),
                    self.batchnorm_module(hidden_channels // 2),
                    nn.LeakyReLU(inplace=True),
                ))
            hidden_channels = hidden_channels // 2
        self.lfi.add_module(
            "conv_0_i",
            nn.Sequential(
                self.convtranspose_module(hidden_channels,
                                          hidden_channels // 2,
                                          kernels_size[1],
                                          strides[1],
                                          pads[1],
                                          output_padding=output_pads[1]),
                self.batchnorm_module(hidden_channels // 2),
                nn.LeakyReLU(inplace=True),
                self.convtranspose_module(hidden_channels // 2,
                                          self.config.n_channels,
                                          kernels_size[0],
                                          strides[0],
                                          pads[0],
                                          output_padding=output_pads[0]),
            ))
        self.lfi.out_connection_type = ("conv", self.config.n_channels)