def __init__(self, opt): self.opt = opt self.Tensor = torch.cuda.FloatTensor if opt.cuda_on else torch.Tensor # define tensors self.input_A = self.Tensor(opt.batchSize, opt.image_channel_size, opt.image_size, opt.image_size) self.input_B = self.Tensor(opt.batchSize, opt.test_audio_video_length, opt.image_channel_size, opt.image_size, opt.image_size) self.ID_encoder = IdentityEncoder.IdentityEncoder(opt) self.Decoder = Decoder_network.Decoder(opt) self.mfcc_encoder = mfcc_networks.mfcc_encoder_two(opt) self.lip_feature_encoder = FAN_feature_extractor.FanFusion(opt) self.criterionL1 = torch.nn.L1Loss() if torch.cuda.is_available(): if opt.cuda_on: if opt.mul_gpu: self.ID_encoder = torch.nn.DataParallel(self.ID_encoder) self.Decoder = torch.nn.DataParallel(self.Decoder) self.mfcc_encoder = torch.nn.DataParallel(self.mfcc_encoder) self.lip_feature_encoder = torch.nn.DataParallel(self.lip_feature_encoder) self.ID_encoder.cuda() self.Decoder.cuda() self.mfcc_encoder.cuda() self.lip_feature_encoder.cuda() self.criterionL1.cuda() print('---------- Networks initialized -------------')
def __init__(self, opt): self.opt = opt self.Tensor = torch.cuda.FloatTensor if opt.cuda_on else torch.Tensor # define tensors self.input_A = self.Tensor(opt.batchSize, opt.image_channel_size, opt.image_size, opt.image_size) # (16,3,256,256) 一张图片 self.input_B = self.Tensor( opt.batchSize, opt.pred_length, opt.image_channel_size, opt.image_size, opt.image_size ) # (16,12,3,256,256) 12张图片 pred_length:num of images used for classification self.B_audio = self.Tensor(opt.batchSize, opt.pred_length, 1, opt.mfcc_length, opt.mfcc_width) # (16,12,1,20,12) 12段音频 self.input_video_dis = self.Tensor( opt.batchSize, opt.disfc_length, opt.image_channel_size, opt.image_size, opt.image_size) # (16,20,3,256,256) 20张图片 self.video_pred_data = self.Tensor( opt.batchSize, opt.pred_length, opt.image_channel_size, opt.image_size, opt.image_size) # (16,12,3,256,256) 12张图片 self.audio_pred_data = self.Tensor( opt.batchSize, opt.pred_length, 1, opt.image_size, opt.image_size) # (16,12,1,20,12) 12段音频 self.ID_encoder = IdentityEncoder.IdentityEncoder() self.Decoder = Decoder_network.Decoder(opt) # audio wid feature encoder self.mfcc_encoder = mfcc_networks.mfcc_encoder_two(opt) # visual wid feature encoder self.lip_feature_encoder = FAN_feature_extractor.FanFusion(opt) # discriminator to disentangle wid from pid self.ID_lip_discriminator = Discriminator_networks.ID_dis32( feature_length=64, config=opt) # Classifier from wid to class label self.model_fusion = networks.ModelFusion(opt) # discriminator for adv in embedding wid self.discriminator_audio = networks.discriminator_audio(opt) use_sigmoid = opt.no_lsgan self.netD = Discriminator_networks.Discriminator( input_nc=3, use_sigmoid=use_sigmoid) self.netD_mul = Discriminator_networks.Discriminator( input_nc=3 * opt.sequence_length, use_sigmoid=use_sigmoid) self.netD_mul.apply(networks.weights_init) self.netD.apply(networks.weights_init) # self.Decoder.apply(networks.weights_init) self.ID_lip_discriminator.apply(networks.weights_init) self.old_lr = opt.lr # define loss functions self.criterionGAN = loss_functions.GANLoss(use_lsgan=not opt.no_lsgan, tensor=self.Tensor, softlabel=False) self.criterionGAN_soft = loss_functions.GANLoss( use_lsgan=not opt.no_lsgan, tensor=self.Tensor, softlabel=True) self.criterionL1 = torch.nn.L1Loss() self.criterionSmoothL1 = torch.nn.SmoothL1Loss() self.criterionL2 = torch.nn.MSELoss() self.L2Contrastive = loss_functions.L2ContrastiveLoss( margin=opt.L2margin) self.criterionCE = torch.nn.CrossEntropyLoss() self.inv_dis_loss = loss_functions.L2SoftmaxLoss() self.Contrastive = loss_functions.ContrastiveLoss( margin=opt.ranking_margin) # initialize optimizers self.optimizer_G = torch.optim.Adam( list(self.Decoder.parameters()) + list(self.ID_encoder.parameters()) + list(self.model_fusion.parameters()) + list(self.mfcc_encoder.parameters()) + list(self.lip_feature_encoder.parameters()), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizer_D = torch.optim.Adam( list(self.netD.parameters()) + list(self.netD_mul.parameters()) + list(self.discriminator_audio.parameters()) + list(self.ID_lip_discriminator.parameters()), lr=opt.lr, betas=(opt.beta1, 0.999)) if torch.cuda.is_available(): if opt.cuda_on: if opt.mul_gpu: self.ID_encoder = torch.nn.DataParallel(self.ID_encoder) self.Decoder = torch.nn.DataParallel(self.Decoder) self.mfcc_encoder = torch.nn.DataParallel( self.mfcc_encoder) self.netD_mul = torch.nn.DataParallel(self.netD_mul) self.netD = torch.nn.DataParallel(self.netD) self.lip_feature_encoder = torch.nn.DataParallel( self.lip_feature_encoder) self.ID_lip_discriminator = torch.nn.DataParallel( self.ID_lip_discriminator) self.model_fusion = torch.nn.DataParallel( self.model_fusion) self.discriminator_audio = torch.nn.DataParallel( self.discriminator_audio) self.ID_encoder.cuda() self.Decoder.cuda() self.mfcc_encoder.cuda() self.lip_feature_encoder.cuda() self.ID_lip_discriminator.cuda() self.netD_mul.cuda() self.netD.cuda() self.criterionL1.cuda() self.criterionGAN.cuda() self.criterionGAN_soft.cuda() self.criterionL2.cuda() self.criterionCE.cuda() self.inv_dis_loss.cuda() self.Contrastive.cuda() self.model_fusion.cuda() self.discriminator_audio.cuda() self.L2Contrastive.cuda() print('---------- Networks initialized -------------')