def __init__(self, dataset='youtubers'):
		super(discriminator, self).__init__()
		self.image_size = 64
		self.num_channels = 3
		self.embed_dim = 62
		self.projected_embed_dim = 128
		self.ndf = 64
		self.B_dim = 128
		self.C_dim = 16
		self.dataset_name = dataset
		self.conv1 = SpectralNorm(nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False))
		self.conv2 = SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False))
		self.conv3 = SpectralNorm(nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False))
		self.conv4 = SpectralNorm(nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False))
		self.disc_linear = nn.Linear(self.ndf * 1, self.ndf)
		self.disc_linear2 = nn.Linear(31, 31)
		self.aux_linear = nn.Linear(4*4*512, self.embed_dim+1)
		self.softmax = nn.Softmax()
		self.sigmoid = nn.Sigmoid()
		self.projector = Concat_embed(self.embed_dim, self.projected_embed_dim)

		self.netD_2 = nn.Sequential(
			# state size. (ndf*8) x 4 x 4
			nn.Conv2d(self.ndf * 8, 1, 4, 1, 0, bias=False),
			#nn.Conv2d(self.ndf * 8 + self.projected_embed_dim, 1, 4, 1, 0, bias=False),
			#nn.Sigmoid()
			)
Exemple #2
0
    def __init__(self, in_channels, out_channels, stride=1, downsample=None, norm=None, sn=False):
        super(ResidualBlock, self).__init__()
        bias = False if norm == 'BN' else True
        if sn:
            self.conv1 = SpectralNorm(
                nn.Conv2d(in_channels, out_channels,
                          kernel_size=3, stride=stride, padding=1, bias=bias))
        else:
            self.conv1 = nn.Conv2d(in_channels, out_channels,
                                   kernel_size=3, stride=stride, padding=1, bias=bias)
        self.norm = norm
        if norm == 'BN':
            self.bn1 = nn.BatchNorm2d(out_channels)
            self.bn2 = nn.BatchNorm2d(out_channels)
        elif norm == 'IN':
            self.bn1 = nn.InstanceNorm2d(out_channels)
            self.bn2 = nn.InstanceNorm2d(out_channels)

        self.relu = nn.ReLU(inplace=True)
        if sn:
            self.conv2 = SpectralNorm(
                nn.Conv2d(out_channels, out_channels,
                          kernel_size=3, stride=1, padding=1, bias=bias))
        else:
            self.conv2 = nn.Conv2d(out_channels, out_channels,
                                   kernel_size=3, stride=1, padding=1, bias=bias)
        self.downsample = downsample
Exemple #3
0
    def __init__(self, improved=False, dataset='youtubers'):
        super(discriminator, self).__init__()
        self.image_size = 64
        self.num_channels = 3
        self.embed_dim = 1024
        self.projected_embed_dim = 128
        self.ndf = 64
        self.dataset_name = dataset

        if improved:
            self.netD_1 = nn.Sequential(
                # input is (nc) x 64 x 64
                nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False),
                nn.LeakyReLU(0.2, inplace=True),
                # state size. (ndf) x 32 x 32
                nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False),
                nn.LeakyReLU(0.2, inplace=True),
                # state size. (ndf*2) x 16 x 16
                nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False),
                nn.LeakyReLU(0.2, inplace=True),
                # state size. (ndf*4) x 8 x 8
                nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False),
                nn.LeakyReLU(0.2, inplace=True),
                # state size. (ndf*8) x 4 x 4
            )
        else:
            self.netD_1 = nn.Sequential(
                # input is (nc) x 64 x 64
                nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False),
                nn.LeakyReLU(0.2, inplace=True),
                # state size. (ndf) x 32 x 32
                SpectralNorm(
                    nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False)),
                #nn.BatchNorm2d(self.ndf * 2),
                nn.LeakyReLU(0.2, inplace=True),
                # state size. (ndf*2) x 16 x 16
                SpectralNorm(
                    nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1,
                              bias=False)),
                #nn.BatchNorm2d(self.ndf * 4),
                nn.LeakyReLU(0.2, inplace=True),
                # state size. (ndf*4) x 8 x 8
                SpectralNorm(
                    nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1,
                              bias=False)),
                #nn.BatchNorm2d(self.ndf * 8),
                nn.LeakyReLU(0.2, inplace=True),
                # state size. (ndf*8) x 4 x 4
            )

        self.projector = Concat_embed(self.embed_dim, self.projected_embed_dim)
        #Uncomment first layer for concatenation and comment second. For projection do the opposit
        #TODO: Handle this!!!
        self.netD_2 = nn.Sequential(
            nn.Conv2d(self.ndf * 8 + 64, 1, 4, 1, 0, bias=False)
            #nn.Conv2d(self.ndf * 8 + self.projected_embed_dim, 1, 4, 1, 0, bias=False)
        )
    def __init__(self, image_size):
        super(discriminator, self).__init__()
        self.image_size = image_size
        self.num_channels = 3
        self.latent_space = 128
        self.ndf = 64

        # common network for both architectures, when generating 64x64 or 128x18 images
        self.netD_1 = nn.Sequential(
            # input is (nc) x 64 x 64
            SpectralNorm(
                nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False)),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1,
                                   bias=False)),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            SpectralNorm(
                nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False)),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            SpectralNorm(
                nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False)),
            nn.LeakyReLU(0.2, inplace=True),
        )

        # if we are feeding D with 64x64 images:
        if self.image_size == 64:
            self.netD_2 = nn.Conv2d(self.ndf * 8 + self.latent_space,
                                    1,
                                    4,
                                    1,
                                    0,
                                    bias=False)

        # if we are feeding D with 128x128 images:
        elif self.image_size == 128:
            self.netD_1 = nn.Sequential(
                self.netD_1,
                SpectralNorm(
                    nn.Conv2d(self.ndf * 8, self.ndf * 16, 4, 2, 1,
                              bias=False)),
                nn.LeakyReLU(0.2, inplace=True),
            )
            self.netD_2 = nn.Conv2d(self.ndf * 16 + self.latent_space,
                                    1,
                                    4,
                                    1,
                                    0,
                                    bias=False)
Exemple #5
0
    def __init__(self, dataset='youtubers'):
        super(discriminator, self).__init__()
        self.image_size = 64
        self.num_channels = 3
        self.embed_dim = 62
        self.projected_embed_dim = 128
        self.ndf = 64
        self.B_dim = 128
        self.C_dim = 16
        self.dataset_name = dataset
        self.conv1 = SpectralNorm(
            nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False))
        self.conv2 = SpectralNorm(
            nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False))
        self.conv3 = SpectralNorm(
            nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False))
        self.conv4 = SpectralNorm(
            nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False))
        self.netD_1 = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(self.ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(self.ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(self.ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
        )

        self.projector = Concat_embed(self.embed_dim, self.projected_embed_dim)

        self.netD_2 = nn.Sequential(
            # state size. (ndf*8) x 4 x 4
            #nn.Conv2d(self.ndf * 8 , 1, 4, 1, 0, bias=False),
            #nn.Conv2d(self.ndf * 8 + self.projected_embed_dim, 1, 4, 1, 0, bias=False),
            nn.Conv2d(self.ndf * 8 + self.projected_embed_dim,
                      1,
                      4,
                      1,
                      0,
                      bias=False),
            #nn.Sigmoid()
        )
Exemple #6
0
    def __init__(self, num_classes, ndf=64):
        super(FCDiscriminator, self).__init__()

        self.conv1 = SpectralNorm(
            nn.Conv2d(num_classes, ndf, kernel_size=4, stride=2, padding=1))
        self.conv2 = SpectralNorm(
            nn.Conv2d(ndf, ndf * 2, kernel_size=4, stride=2, padding=1))
        self.conv3 = SpectralNorm(
            nn.Conv2d(ndf * 2, ndf * 4, kernel_size=4, stride=2, padding=1))
        self.conv4 = SpectralNorm(
            nn.Conv2d(ndf * 4, ndf * 8, kernel_size=4, stride=2, padding=1))
        self.classifier = SpectralNorm(
            nn.Conv2d(ndf * 8, 1, kernel_size=4, stride=2, padding=1))

        self.leaky_relu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
    def __init__(self, image_size):
        super(discriminator, self).__init__()
        self.image_size = image_size
        self.num_channels = 3
        self.latent_space = 128
        self.ndf = 64

        self.netD_1 = nn.Sequential(
            SpectralNorm(
                nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False)),
            nn.LeakyReLU(0.2, inplace=True),
            SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1,
                                   bias=False)),
            nn.LeakyReLU(0.2, inplace=True),
            SpectralNorm(
                nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False)),
            nn.LeakyReLU(0.2, inplace=True),
            SpectralNorm(
                nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False)),
            nn.LeakyReLU(0.2, inplace=True),
        )

        if self.image_size == 64:
            self.netD_2 = nn.Conv2d(self.ndf * 8 + self.latent_space,
                                    1,
                                    4,
                                    1,
                                    0,
                                    bias=False)

        elif self.image_size == 128:
            self.netD_1 = nn.Sequential(
                self.netD_1,
                SpectralNorm(
                    nn.Conv2d(self.ndf * 8, self.ndf * 16, 4, 2, 1,
                              bias=False)),
                nn.LeakyReLU(0.2, inplace=True),
            )
            self.netD_2 = nn.Conv2d(self.ndf * 16 + self.latent_space,
                                    1,
                                    4,
                                    1,
                                    0,
                                    bias=False)
    def __init__(self, image_size, audio_samples):
        super(generator, self).__init__()

        self.audio_samples = audio_samples
        self.num_channels = 3
        self.latent_dim = 128
        self.ngf = 64
        self.image_size = image_size

        self.d_fmaps = [16, 32, 128, 256, 512, 1024]
        self.audio_embedding = Discriminator(1, self.d_fmaps, 15, nn.LeakyReLU(0.3), self.audio_samples)
        self.aux_classifier = auxclassifier()

        self.netG = nn.Sequential(
            SpectralNorm(nn.ConvTranspose2d(self.ngf * 8, self.ngf * 4, 4, 2, 1, bias=False)),
            nn.Dropout(),
            nn.ReLU(True),
            SpectralNorm(nn.ConvTranspose2d(self.ngf * 4, self.ngf * 2, 4, 2, 1, bias=False)),
            nn.Dropout(),
            nn.ReLU(True),
            SpectralNorm(nn.ConvTranspose2d(self.ngf * 2, self.ngf, 4, 2, 1, bias=False)),
            nn.Dropout(),
            nn.ReLU(True),
            SpectralNorm(nn.ConvTranspose2d(self.ngf, self.num_channels, 4, 2, 1, bias=False)),
            nn.Tanh()
        )

        if self.image_size == 64:
            self.netG = nn.Sequential(
            SpectralNorm(nn.ConvTranspose2d(self.latent_dim, self.ngf*8, 4, 1, 0, bias=False)),
            nn.Dropout(),
            nn.ReLU(True),
            self.netG
            )

        if self.image_size == 128:
            self.netG = nn.Sequential(
                SpectralNorm(nn.ConvTranspose2d(self.latent_dim, self.ngf*16, 4, 1, 0, bias=False)),
                nn.Dropout(),
                nn.ReLU(True),
                SpectralNorm(nn.ConvTranspose2d(self.ngf*16, self.ngf*8, 4, 2, 1, bias=False)),
                nn.Dropout(),
                nn.ReLU(True),
                self.netG
            )
Exemple #9
0
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0,
                 activation='LeakyReLU', norm=None, init_method=None, std=1., sn=False):
        super(ConvLayer, self).__init__()

        bias = False if norm == 'BN' else True  # 后边有batchnorm层,不需要设置bias,因为会将输出归一化,设置偏置没有用
        self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding,
                                bias=bias)
        if sn:
            self.conv2d = SpectralNorm(self.conv2d)  #谱归一化
        if activation is not None:
            if activation == 'LeakyReLU':
                self.activation = getattr(torch.nn, activation, 'LeakyReLU') #从torch.nn中返回activation属性,如果不存在,则返回字符串'LeakyReLU'
                self.activation = self.activation()
            else:
                self.activation = getattr(torch, activation, activation)
        else:
            self.activation = None

        self.norm = norm
        if norm == 'BN':
            self.norm_layer = nn.BatchNorm2d(out_channels, momentum=0.01)
        elif norm == 'IN':
            self.norm_layer = nn.InstanceNorm2d(out_channels, track_running_stats=True) #每个sample,每个通道进行Norm
Exemple #10
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 activation='LeakyReLU',
                 norm=None,
                 init_method=None,
                 std=1.,
                 sn=False):
        super(ConvLayer, self).__init__()

        bias = False if norm == 'BN' else True
        self.conv2d = nn.Conv2d(in_channels,
                                out_channels,
                                kernel_size,
                                stride,
                                padding,
                                bias=bias)
        if sn:
            self.conv2d = SpectralNorm(self.conv2d)
        if activation is not None:
            if activation == 'LeakyReLU':
                self.activation = getattr(torch.nn, activation, 'LeakyReLU')
                self.activation = self.activation()
            else:
                self.activation = getattr(torch, activation, activation)
        else:
            self.activation = None

        self.norm = norm
        if norm == 'BN':
            self.norm_layer = nn.BatchNorm2d(out_channels, momentum=0.01)
        elif norm == 'IN':
            self.norm_layer = nn.InstanceNorm2d(out_channels,
                                                track_running_stats=True)
Exemple #11
0
 def __init__(self,
              ninputs,
              fmaps,
              kwidth,
              activation,
              padding=None,
              lnorm=False,
              dropout=0.,
              pooling=2,
              enc=True,
              bias=False,
              aal_h=None,
              linterp=False,
              snorm=False,
              convblock=False):
     # linterp: do linear interpolation instead of simple conv transpose
     # snorm: spectral norm
     super(GBlock, self).__init__()
     self.pooling = pooling
     self.linterp = linterp
     self.enc = enc
     self.kwidth = kwidth
     self.convblock = convblock
     if padding is None:
         padding = 0
     if enc:
         if aal_h is not None:
             self.aal_conv = nn.Conv1d(ninputs,
                                       ninputs,
                                       aal_h.shape[0],
                                       stride=1,
                                       padding=aal_h.shape[0] // 2 - 1,
                                       bias=False)
             if snorm:
                 self.aal_conv = SpectralNorm(self.aal_conv)
             # apply AAL weights, reshaping impulse response to match
             # in channels and out channels
             aal_t = torch.FloatTensor(aal_h).view(1, 1, -1)
             aal_t = aal_t.repeat(ninputs, ninputs, 1)
             self.aal_conv.weight.data = aal_t
         if convblock:
             self.conv = Conv1DResBlock(ninputs,
                                        fmaps,
                                        kwidth,
                                        stride=pooling,
                                        bias=bias)
         else:
             self.conv = nn.Conv1d(ninputs,
                                   fmaps,
                                   kwidth,
                                   stride=pooling,
                                   padding=padding,
                                   bias=bias)
         if snorm:
             self.conv = SpectralNorm(self.conv)
         if activation == 'glu':
             # TODO: REVIEW
             raise NotImplementedError
             self.glu_conv = nn.Conv1d(ninputs,
                                       fmaps,
                                       kwidth,
                                       stride=pooling,
                                       padding=padding,
                                       bias=bias)
             if snorm:
                 self.glu_conv = spectral_norm(self.glu_conv)
     else:
         if linterp:
             # pre-conv prior to upsampling
             self.pre_conv = nn.Conv1d(ninputs,
                                       ninputs // 8,
                                       kwidth,
                                       stride=1,
                                       padding=kwidth // 2,
                                       bias=bias)
             self.conv = nn.Conv1d(ninputs // 8,
                                   fmaps,
                                   kwidth,
                                   stride=1,
                                   padding=kwidth // 2,
                                   bias=bias)
             if snorm:
                 self.conv = SpectralNorm(self.conv)
             if activation == 'glu':
                 self.glu_conv = nn.Conv1d(ninputs,
                                           fmaps,
                                           kwidth,
                                           stride=1,
                                           padding=kwidth // 2,
                                           bias=bias)
                 if snorm:
                     self.glu_conv = SpectralNorm(self.glu_conv)
         else:
             if convblock:
                 self.conv = Conv1DResBlock(ninputs,
                                            fmaps,
                                            kwidth,
                                            stride=pooling,
                                            bias=bias,
                                            transpose=True)
             else:
                 # decoder like with transposed conv
                 # compute padding required based on pooling
                 pad = (2 * pooling - pooling - kwidth) // -2
                 self.conv = nn.ConvTranspose1d(ninputs,
                                                fmaps,
                                                kwidth,
                                                stride=pooling,
                                                padding=pad,
                                                output_padding=0,
                                                bias=bias)
             if snorm:
                 self.conv = SpectralNorm(self.conv)
             if activation == 'glu':
                 # TODO: REVIEW
                 raise NotImplementedError
                 self.glu_conv = nn.ConvTranspose1d(ninputs,
                                                    fmaps,
                                                    kwidth,
                                                    stride=pooling,
                                                    padding=padding,
                                                    output_padding=pooling -
                                                    1,
                                                    bias=bias)
                 if snorm:
                     self.glu_conv = spectral_norm(self.glu_conv)
     if activation is not None:
         self.act = activation
     if lnorm:
         self.ln = LayerNorm()
     if dropout > 0:
         self.dout = nn.Dropout(dropout)
    def __init__(self, dataset='youtubers'):
        super(generator, self).__init__()
        self.image_size = 64
        self.num_channels = 3
        self.noise_dim = 100
        self.embed_dim = 62
        self.projected_embed_dim = 128
        self.raw_wav_dim = 64000
        self.latent_dim = self.projected_embed_dim
        self.dataset_name = dataset
        self.projection = nn.Sequential(
            nn.Linear(in_features=self.embed_dim,
                      out_features=self.projected_embed_dim),
            nn.BatchNorm1d(num_features=self.projected_embed_dim),
            nn.LeakyReLU(negative_slope=0.2, inplace=True))
        #self.d_fmaps = [64, 128, 256, 512, 1024, 1024]
        self.d_fmaps = [16, 32, 128, 256, 512, 1024]
        self.act = [
            nn.PReLU(fmaps) for fmaps in [64, 128, 256, 512, 1024, 1024]
        ]
        self.audio_embedding = Discriminator(1, self.d_fmaps, 15,
                                             nn.LeakyReLU(0.3))
        #self.audio_embedding = Generator(1, self.d_fmaps, 31, self.act)
        self.ngf = 64

        # based on: https://github.com/pytorch/examples/blob/master/dcgan/main.py
        self.netG = nn.Sequential(
            SpectralNorm(
                nn.ConvTranspose2d(self.latent_dim,
                                   self.ngf * 8,
                                   4,
                                   1,
                                   0,
                                   bias=False)),
            #nn.BatchNorm2d(self.ngf * 8),
            nn.Dropout(),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf * 8,
                                   self.ngf * 4,
                                   4,
                                   2,
                                   1,
                                   bias=False)),
            nn.Dropout(),
            #nn.BatchNorm2d(self.ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf * 4,
                                   self.ngf * 2,
                                   4,
                                   2,
                                   1,
                                   bias=False)),
            nn.Dropout(),
            #nn.BatchNorm2d(self.ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf * 2, self.ngf, 4, 2, 1,
                                   bias=False)),
            #nn.BatchNorm2d(self.ngf),
            nn.Dropout(),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf,
                                   self.num_channels,
                                   4,
                                   2,
                                   1,
                                   bias=False)),
            #nn.Dropout2d(),
            nn.Tanh()
            # state size. (num_channels) x 64 x 64
        )
 def __init__(self, ninputs, fmaps, kwidth,
              activation, padding=None,
              lnorm=False, dropout=0.,
              pooling=2, enc=True, bias=False,
              aal_h=None, linterp=False, snorm=False,
              convblock=False):
    
     super(GBlock, self).__init__()
     self.pooling = pooling
     self.linterp = linterp
     self.enc = enc
     self.kwidth = kwidth
     self.convblock= convblock
     if padding is None:
         padding = 0
     if enc:
         if aal_h is not None:
             self.aal_conv = nn.Conv1d(ninputs, ninputs,
                                       aal_h.shape[0],
                                       stride=1,
                                       padding=aal_h.shape[0] // 2 - 1,
                                       bias=False)
             if snorm:
                 self.aal_conv = SpectralNorm(self.aal_conv)
           
             aal_t = torch.FloatTensor(aal_h).view(1, 1, -1)
             aal_t = aal_t.repeat(ninputs, ninputs, 1)
             self.aal_conv.weight.data = aal_t
         if convblock:
             self.conv = Conv1DResBlock(ninputs, fmaps, kwidth,
                                        stride=pooling, bias=bias)
         else:
             self.conv = nn.Conv1d(ninputs, fmaps, kwidth,
                                   stride=pooling,
                                   padding=padding,
                                   bias=bias)
         if snorm:
             self.conv = SpectralNorm(self.conv)
         if activation == 'glu':
             
             raise NotImplementedError
             self.glu_conv = nn.Conv1d(ninputs, fmaps, kwidth,
                                       stride=pooling,
                                       padding=padding,
                                       bias=bias)
             if snorm:
                 self.glu_conv = spectral_norm(self.glu_conv)
     else:
         if linterp:
             
             self.pre_conv = nn.Conv1d(ninputs, ninputs // 8,
                                       kwidth, stride=1, padding=kwidth//2,
                                       bias=bias)
             self.conv = nn.Conv1d(ninputs // 8, fmaps, kwidth,
                                   stride=1, padding=kwidth//2,
                                   bias=bias)
             if snorm:
                 self.conv = SpectralNorm(self.conv)
             if activation == 'glu':
                 self.glu_conv = nn.Conv1d(ninputs, fmaps, kwidth,
                                           stride=1, padding=kwidth//2,
                                           bias=bias)
                 if snorm:
                     self.glu_conv = SpectralNorm(self.glu_conv)
         else:
             if convblock:
                 self.conv = Conv1DResBlock(ninputs, fmaps, kwidth,
                                            stride=pooling, bias=bias,
                                            transpose=True)
             else:
                
                 pad = (2 * pooling - pooling - kwidth)//-2
                 self.conv = nn.ConvTranspose1d(ninputs, fmaps, kwidth,
                                                stride=pooling,
                                                padding=pad,
                                                output_padding=0,
                                                bias=bias)
             if snorm:
                 self.conv = SpectralNorm(self.conv)
             if activation == 'glu':
                 
                 raise NotImplementedError
                 self.glu_conv = nn.ConvTranspose1d(ninputs, fmaps, kwidth,
                                                    stride=pooling,
                                                    padding=padding,
                                                    output_padding=pooling-1,
                                                    bias=bias)
                 if snorm:
                     self.glu_conv = spectral_norm(self.glu_conv)
     if activation is not None:
         self.act = activation
     if lnorm:
         self.ln = LayerNorm()
     if dropout > 0:
         self.dout = nn.Dropout(dropout)
    def __init__(self, image_size, audio_samples):
        super(generator, self).__init__()

        # defining some useful variables
        self.audio_samples = audio_samples
        self.num_channels = 3
        self.latent_dim = 128
        self.ngf = 64
        self.image_size = image_size

        # defining segan's D
        self.d_fmaps = [16, 32, 128, 256, 512, 1024]
        self.audio_embedding = Discriminator(1, self.d_fmaps, 15,
                                             nn.LeakyReLU(0.3),
                                             self.audio_samples)
        # defining the auxiliary classifier
        self.aux_classifier = auxclassifier()

        # common network for both architectures when generating 64x64 or 128x18 images
        self.netG = nn.Sequential(
            # state size. (ngf*4) x 8 x 8
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf * 8,
                                   self.ngf * 4,
                                   4,
                                   2,
                                   1,
                                   bias=False)),
            nn.Dropout(),
            # nn.BatchNorm2d(self.ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf * 4,
                                   self.ngf * 2,
                                   4,
                                   2,
                                   1,
                                   bias=False)),
            # nn.BatchNorm2d(self.ngf),
            nn.Dropout(),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf * 2, self.ngf, 4, 2, 1,
                                   bias=False)),
            nn.Dropout(),
            nn.ReLU(True),
            # If we add here Dropout, we would only generate noise, but not realistic faces
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf,
                                   self.num_channels,
                                   4,
                                   2,
                                   1,
                                   bias=False)),
            # state size. (num_channels) x 128 x 128
            nn.Tanh())

        # if we want to generate 64x64 images:
        if self.image_size == 64:
            self.netG = nn.Sequential(
                SpectralNorm(
                    nn.ConvTranspose2d(self.latent_dim,
                                       self.ngf * 8,
                                       4,
                                       1,
                                       0,
                                       bias=False)),
                nn.Dropout(),
                # nn.BatchNorm2d(self.ngf * 4),
                nn.ReLU(True),
                self.netG)

        # if we want to generate 128 x 128 images:
        if self.image_size == 128:
            self.netG = nn.Sequential(
                SpectralNorm(
                    nn.ConvTranspose2d(self.latent_dim,
                                       self.ngf * 16,
                                       4,
                                       1,
                                       0,
                                       bias=False)),
                nn.Dropout(),
                nn.ReLU(True),
                SpectralNorm(
                    nn.ConvTranspose2d(self.ngf * 16,
                                       self.ngf * 8,
                                       4,
                                       2,
                                       1,
                                       bias=False)),
                nn.Dropout(),
                # nn.BatchNorm2d(self.ngf * 4),
                nn.ReLU(True),
                self.netG)
Exemple #15
0
    def __init__(self, dataset='youtubers'):
        super(generator, self).__init__()
        self.image_size = 64
        self.num_channels = 3
        self.noise_dim = 100
        self.embed_dim = 62
        self.projected_embed_dim = 128
        self.latent_dim = self.noise_dim + self.projected_embed_dim
        self.ngf = 64
        self.dataset_name = dataset
        self.projection = nn.Sequential(
            nn.Linear(in_features=self.embed_dim,
                      out_features=self.projected_embed_dim),
            nn.BatchNorm1d(num_features=self.projected_embed_dim),
            nn.LeakyReLU(negative_slope=0.2, inplace=True))

        # based on: https://github.com/pytorch/examples/blob/master/dcgan/main.py
        self.netG = nn.Sequential(
            SpectralNorm(
                nn.ConvTranspose2d(self.latent_dim,
                                   self.ngf * 8,
                                   4,
                                   1,
                                   0,
                                   bias=False)),
            #nn.BatchNorm2d(self.ngf * 8),
            nn.Dropout2d(),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf * 8,
                                   self.ngf * 4,
                                   4,
                                   2,
                                   1,
                                   bias=False)),
            nn.Dropout2d(),
            #nn.BatchNorm2d(self.ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf * 4,
                                   self.ngf * 2,
                                   4,
                                   2,
                                   1,
                                   bias=False)),
            nn.Dropout2d(),
            #nn.BatchNorm2d(self.ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf * 2, self.ngf, 4, 2, 1,
                                   bias=False)),
            #nn.BatchNorm2d(self.ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            SpectralNorm(
                nn.ConvTranspose2d(self.ngf,
                                   self.num_channels,
                                   4,
                                   2,
                                   1,
                                   bias=False)),
            nn.Dropout2d(),
            nn.Tanh()
            # state size. (num_channels) x 64 x 64
        )