Esempio n. 1
0
    def __init__(self, depth=7, feature_size=512, dilation=1, use_spectral_norm=True):

        from torch.nn import ModuleList
        from MSG_GAN.CustomLayers import DisGeneralConvBlock, DisFinalBlock
        from torch.nn import Conv2d

        super().__init__()

        assert feature_size != 0 and ((feature_size & (feature_size - 1)) == 0), \
            "latent size not a power of 2"
        if depth >= 4:
            assert feature_size >= np.power(2, depth - 4), \
                "feature size cannot be produced"

        # create state of the object
        self.depth = depth
        self.feature_size = feature_size
        self.spectral_norm_mode = None
        self.dilation = dilation

        # create the fromRGB layers for various inputs:
        def from_rgb(out_channels):
            return Conv2d(3, out_channels, (1, 1), bias=True)

        self.rgb_to_features = ModuleList([from_rgb(self.feature_size // 2)])

        # create a module list of the other required general convolution blocks
        self.layers = ModuleList([DisFinalBlock(self.feature_size)])

        # create the remaining layers
        for i in range(self.depth - 1):
            if i > 2:
                layer = DisGeneralConvBlock(
                    int(self.feature_size // np.power(2, i - 2)),
                    int(self.feature_size // np.power(2, i - 2)),
                    dilation=dilation
                )
                rgb = from_rgb(int(self.feature_size // np.power(2, i - 1)))
            else:
                layer = DisGeneralConvBlock(self.feature_size, self.feature_size // 2,
                                            dilation=dilation)
                rgb = from_rgb(self.feature_size // 2)
        
                
            self.layers.append(layer)
            self.rgb_to_features.append(rgb)

        # just replace the last converter
        self.rgb_to_features[self.depth - 1] = \
            from_rgb(self.feature_size // np.power(2, i - 2))

        # if spectral normalization is on:
        if use_spectral_norm:
            self.turn_on_spectral_norm()
Esempio n. 2
0
    def __init__(self, depth=7, feature_size=512,
                 use_eql=True, gpu_parallelize=False):
        """
        constructor for the class
        :param depth: total depth of the discriminator
                       (Must be equal to the Generator depth)
        :param feature_size: size of the deepest features extracted
                             (Must be equal to Generator latent_size)
        :param use_eql: whether to use the equalized learning rate or not
        :param gpu_parallelize: whether to use DataParallel on the discriminator
                                Note that the Last block contains StdDev layer
                                So, it is not parallelized.
        """
        from torch.nn import ModuleList
        from MSG_GAN.CustomLayers import DisGeneralConvBlock, \
            DisFinalBlock, _equalized_conv2d
        from torch.nn import Conv2d

        super().__init__()

        assert feature_size != 0 and ((feature_size & (feature_size - 1)) == 0), \
            "latent size not a power of 2"
        if depth >= 4:
            assert feature_size >= np.power(2, depth - 4), \
                "feature size cannot be produced"

        # create state of the object
        self.gpu_parallelize = gpu_parallelize
        self.use_eql = use_eql
        self.depth = depth
        self.feature_size = feature_size

        # create the fromRGB layers for various inputs:
        if self.use_eql:
            def from_rgb(out_channels):
                #TODO: Make this respect 1 or 3 channels
                inchannels = 1
                return _equalized_conv2d(inchannels, out_channels, (1, 1), bias=True)
        else:
            def from_rgb(out_channels):
                #TODO: Make this respect 1 or 3 channels
                inchannels = 1
                return Conv2d(inchannels, out_channels, (1, 1), bias=True)

        self.rgb_to_features = ModuleList()
        self.final_converter = from_rgb(self.feature_size // 2)

        # create a module list of the other required general convolution blocks
        self.layers = ModuleList()
        self.final_block = DisFinalBlock(self.feature_size, use_eql=self.use_eql)

        # create the remaining layers
        for i in range(self.depth - 1):
            if i > 2:
                layer = DisGeneralConvBlock(
                    int(self.feature_size // np.power(2, i - 2)),
                    int(self.feature_size // np.power(2, i - 2)),
                    use_eql=self.use_eql
                )
                rgb = from_rgb(int(self.feature_size // np.power(2, i - 1)))
            else:
                layer = DisGeneralConvBlock(self.feature_size, self.feature_size // 2,
                                            use_eql=self.use_eql)
                rgb = from_rgb(self.feature_size // 2)

            self.layers.append(layer)
            self.rgb_to_features.append(rgb)

        # just replace the last converter
        self.rgb_to_features[self.depth - 2] = \
            from_rgb(self.feature_size // np.power(2, i - 2))

        # parallelize the modules from the module-lists if asked to:
        if self.gpu_parallelize:
            for i in range(len(self.layers)):
                self.layers[i] = th.nn.DataParallel(self.layers[i])
                self.rgb_to_features[i] = th.nn.DataParallel(
                    self.rgb_to_features[i])
Esempio n. 3
0
    def __init__(self,
                 depth=7,
                 feature_size=512,
                 use_eql=True,
                 gpu_parallelize=False):
        """
        constructor for the class
        :param depth: total depth of the discriminator
                       (Must be equal to the Generator depth)
        :param feature_size: size of the deepest features extracted
                             (Must be equal to Generator latent_size)
        :param use_eql: whether to use the equalized learning rate or not
        :param gpu_parallelize: whether to use DataParallel on the discriminator
                                Note that the Last block contains StdDev layer
                                So, it is not parallelized.
        """
        from torch.nn import ModuleList
        from MSG_GAN.CustomLayers import DisGeneralConvBlock, \
            DisFinalBlock, _equalized_conv2d
        from torch.nn import Conv2d

        super().__init__()

        assert feature_size != 0 and ((feature_size & (feature_size - 1)) == 0), \
            "latent size not a power of 2"
        if depth >= 4:
            assert feature_size >= np.power(2, depth - 4), \
                "feature size cannot be produced"

        # create state of the object
        self.gpu_parallelize = gpu_parallelize
        self.use_eql = use_eql
        self.depth = depth
        self.feature_size = feature_size

        # create the fromRGB layers for various inputs:
        if self.use_eql:

            def from_rgb(out_channels):
                return _equalized_conv2d(3, out_channels, (1, 1), bias=True)
        else:

            def from_rgb(out_channels):
                return Conv2d(3, out_channels, (1, 1), bias=True)

        #   from_rgb用于将一个3通道的rgb图(来自于生成器,或真实图片的下采样)经过1x1的卷积转换为特定通道的特征图

        self.rgb_to_features = ModuleList()
        self.final_converter = from_rgb(self.feature_size // 2)
        #   3 -> 256

        # create a module list of the other required general convolution blocks
        self.layers = ModuleList()
        self.final_block = DisFinalBlock(self.feature_size,
                                         use_eql=self.use_eql)
        #   512 -> 1 (flatten output raw discriminator scores,尺寸可能是4x4)
        '''
            判别器里需要做两个操作,第一是把rgb转换为隐层特征,和已经有的特征并起来
            第二是根据当前的隐层特征,给出一个判别结果
        '''

        # create the remaining layers
        for i in range(self.depth - 1):
            if i > 2:
                '''
                    在高分辨率分支,通道是依次增加的
                    如 i = 5 对应最高分辨率
                        from_rgb 3 -> 32
                        conv     64 -> 64
                '''
                layer = DisGeneralConvBlock(
                    int(self.feature_size // np.power(2, i - 2)),
                    int(self.feature_size // np.power(2, i - 2)),
                    use_eql=self.use_eql)
                rgb = from_rgb(int(self.feature_size // np.power(2, i - 1)))
            else:
                '''
                    i = 0,1,2 的时候(对应于判别器的末端,最低分辨率部分),走的是这个分支,
                    layer = 512 -> 256
                    rgb = 3 -> 256
                '''
                layer = DisGeneralConvBlock(self.feature_size,
                                            self.feature_size // 2,
                                            use_eql=self.use_eql)
                rgb = from_rgb(self.feature_size // 2)

            self.layers.append(layer)
            self.rgb_to_features.append(rgb)
        '''
            i   layer_cin   layer_cout  rgb_to_features_cin rgb_to_features_cout   shape_in  shape_out(经过layers)
            
            F       512           1             3                   256              4
            
            0       512         256             3                   256              8
            1       512         256             3                   256              16
            2       512         256             3                   256              32
            
            3       256         256             3                   128              64
            4       128         128             3                    64              128        64
            5        64          64             3                    32
                                                                    (64)             256        128 
            大概的逻辑是最高分辨率图像(256)通过最后一个converter和layer,变成了(b,64,128,128)的东西,随后和经过convert的input并联,经过layer
            (i=5)全分辨率(3,256,256),经过converter = (64,256,256),经过layer = (64,128,128)
            (i=4)/2分辨率(3,128,128),经过converter = (64,128,128),并联 = (128,128,128),经过layer = (128,64,64)
            (i=3)/4分辨率(3,64,64),经过converter = (128,64,64),并联 = (256,64,64),经过layer = (256,32,32)
            (i=2)/8分辨率(3,32,32),经过converter = (256,32,32),并联 = (512,32,32),经过layer = (256,16,16)
            (i=1)/16分辨率(3,16,16),经过converter = (256,16,16),并联 = (512,16,16),经过layer = (256,8,8)
            (i=0)/32分辨率(3,8,8),经过converter = (256,8,8),并联 = (512,8,8),经过layer = (256,4,4)
            
            final/64分辨率(3,4,4),经过converter = (256,4,4),并联 = (512,4,4),经过layer = (b,)
            
            inputs_idx  0   1   2   3   4   5   6
            shape       4   8  16  32  64 128 256 
        '''

        # just replace the last converter
        self.rgb_to_features[self.depth - 2] = \
            from_rgb(self.feature_size // np.power(2, i - 2))

        # parallelize the modules from the module-lists if asked to:
        if self.gpu_parallelize:
            for i in range(len(self.layers)):
                self.layers[i] = th.nn.DataParallel(self.layers[i])
                self.rgb_to_features[i] = th.nn.DataParallel(
                    self.rgb_to_features[i])
Esempio n. 4
0
class Discriminator(th.nn.Module):
    """ Discriminator of the GAN """
    def __init__(self,
                 depth=7,
                 feature_size=512,
                 use_eql=True,
                 gpu_parallelize=False):
        """
        constructor for the class
        :param depth: total depth of the discriminator
                       (Must be equal to the Generator depth)
        :param feature_size: size of the deepest features extracted
                             (Must be equal to Generator latent_size)
        :param use_eql: whether to use the equalized learning rate or not
        :param gpu_parallelize: whether to use DataParallel on the discriminator
                                Note that the Last block contains StdDev layer
                                So, it is not parallelized.
        """
        from torch.nn import ModuleList
        from MSG_GAN.CustomLayers import DisGeneralConvBlock, \
            DisFinalBlock, _equalized_conv2d
        from torch.nn import Conv2d

        super().__init__()

        assert feature_size != 0 and ((feature_size & (feature_size - 1)) == 0), \
            "latent size not a power of 2"
        if depth >= 4:
            assert feature_size >= np.power(2, depth - 4), \
                "feature size cannot be produced"

        # create state of the object
        self.gpu_parallelize = gpu_parallelize
        self.use_eql = use_eql
        self.depth = depth
        self.feature_size = feature_size

        # create the fromRGB layers for various inputs:
        if self.use_eql:

            def from_rgb(out_channels):
                return _equalized_conv2d(3, out_channels, (1, 1), bias=True)
        else:

            def from_rgb(out_channels):
                return Conv2d(3, out_channels, (1, 1), bias=True)

        #   from_rgb用于将一个3通道的rgb图(来自于生成器,或真实图片的下采样)经过1x1的卷积转换为特定通道的特征图

        self.rgb_to_features = ModuleList()
        self.final_converter = from_rgb(self.feature_size // 2)
        #   3 -> 256

        # create a module list of the other required general convolution blocks
        self.layers = ModuleList()
        self.final_block = DisFinalBlock(self.feature_size,
                                         use_eql=self.use_eql)
        #   512 -> 1 (flatten output raw discriminator scores,尺寸可能是4x4)
        '''
            判别器里需要做两个操作,第一是把rgb转换为隐层特征,和已经有的特征并起来
            第二是根据当前的隐层特征,给出一个判别结果
        '''

        # create the remaining layers
        for i in range(self.depth - 1):
            if i > 2:
                '''
                    在高分辨率分支,通道是依次增加的
                    如 i = 5 对应最高分辨率
                        from_rgb 3 -> 32
                        conv     64 -> 64
                '''
                layer = DisGeneralConvBlock(
                    int(self.feature_size // np.power(2, i - 2)),
                    int(self.feature_size // np.power(2, i - 2)),
                    use_eql=self.use_eql)
                rgb = from_rgb(int(self.feature_size // np.power(2, i - 1)))
            else:
                '''
                    i = 0,1,2 的时候(对应于判别器的末端,最低分辨率部分),走的是这个分支,
                    layer = 512 -> 256
                    rgb = 3 -> 256
                '''
                layer = DisGeneralConvBlock(self.feature_size,
                                            self.feature_size // 2,
                                            use_eql=self.use_eql)
                rgb = from_rgb(self.feature_size // 2)

            self.layers.append(layer)
            self.rgb_to_features.append(rgb)
        '''
            i   layer_cin   layer_cout  rgb_to_features_cin rgb_to_features_cout   shape_in  shape_out(经过layers)
            
            F       512           1             3                   256              4
            
            0       512         256             3                   256              8
            1       512         256             3                   256              16
            2       512         256             3                   256              32
            
            3       256         256             3                   128              64
            4       128         128             3                    64              128        64
            5        64          64             3                    32
                                                                    (64)             256        128 
            大概的逻辑是最高分辨率图像(256)通过最后一个converter和layer,变成了(b,64,128,128)的东西,随后和经过convert的input并联,经过layer
            (i=5)全分辨率(3,256,256),经过converter = (64,256,256),经过layer = (64,128,128)
            (i=4)/2分辨率(3,128,128),经过converter = (64,128,128),并联 = (128,128,128),经过layer = (128,64,64)
            (i=3)/4分辨率(3,64,64),经过converter = (128,64,64),并联 = (256,64,64),经过layer = (256,32,32)
            (i=2)/8分辨率(3,32,32),经过converter = (256,32,32),并联 = (512,32,32),经过layer = (256,16,16)
            (i=1)/16分辨率(3,16,16),经过converter = (256,16,16),并联 = (512,16,16),经过layer = (256,8,8)
            (i=0)/32分辨率(3,8,8),经过converter = (256,8,8),并联 = (512,8,8),经过layer = (256,4,4)
            
            final/64分辨率(3,4,4),经过converter = (256,4,4),并联 = (512,4,4),经过layer = (b,)
            
            inputs_idx  0   1   2   3   4   5   6
            shape       4   8  16  32  64 128 256 
        '''

        # just replace the last converter
        self.rgb_to_features[self.depth - 2] = \
            from_rgb(self.feature_size // np.power(2, i - 2))

        # parallelize the modules from the module-lists if asked to:
        if self.gpu_parallelize:
            for i in range(len(self.layers)):
                self.layers[i] = th.nn.DataParallel(self.layers[i])
                self.rgb_to_features[i] = th.nn.DataParallel(
                    self.rgb_to_features[i])

        # Note that since the FinalBlock contains the StdDev layer,
        # it cannot be parallelized so easily. It will have to be parallelized
        # from the Lower level (from CustomLayers). This much parallelism
        # seems enough for me.

    def forward(self, inputs):
        """
        forward pass of the discriminator
        :param inputs: (multi-scale input images) to the network list[Tensors]
        :return: out => raw prediction values
        """

        assert len(inputs) == self.depth, \
            "Mismatch between input and Network scales"
        #
        y = self.rgb_to_features[self.depth - 2](
            inputs[self.depth - 1])  # 输入图像尺寸256,变换为64维特征
        y = self.layers[self.depth - 2](y)
        for x, block, converter in \
                zip(reversed(inputs[1:-1]),
                    reversed(self.layers[:-1]),
                    reversed(self.rgb_to_features[:-1])):
            input_part = converter(x)  # convert the input:
            y = th.cat((input_part, y), dim=1)  # concatenate the inputs:
            y = block(y)  # apply the block

        # calculate the final block:
        input_part = self.final_converter(inputs[0])
        y = th.cat((input_part, y), dim=1)
        y = self.final_block(y)

        # return calculated y
        return y

    def extract(self, inputs):
        assert len(inputs) == self.depth, \
            "Mismatch between input and Network scales"
        #
        self.eval()

        with th.no_grad():
            y = self.rgb_to_features[self.depth - 2](
                inputs[self.depth - 1])  # 输入图像尺寸256,变换为64维特征
            y = self.layers[self.depth - 2](y)
            for x, block, converter in \
                    zip(reversed(inputs[1:-1]),
                        reversed(self.layers[:-1]),
                        reversed(self.rgb_to_features[:-1])):
                input_part = converter(x)  # convert the input:
                y = th.cat((input_part, y), dim=1)  # concatenate the inputs:
                y = block(y)  # apply the block

            # calculate the final block:
            input_part = self.final_converter(inputs[0])
            y = th.cat((input_part, y), dim=1)
            y = self.final_block.batch_discriminator(y)
            y = self.final_block.lrelu(self.final_block.conv_1(y))
            y = self.final_block.lrelu(self.final_block.conv_2(y))
            y = y.view(-1)

        return y