def __init__(self, opt):
        self.opt = opt
        self.Tensor = torch.cuda.FloatTensor if opt.cuda_on else torch.Tensor
        # define tensors
        self.input_A = self.Tensor(opt.batchSize, opt.image_channel_size,
                                   opt.image_size, opt.image_size)
        self.input_B = self.Tensor(opt.batchSize, opt.test_audio_video_length, opt.image_channel_size,
                                   opt.image_size, opt.image_size)

        self.ID_encoder = IdentityEncoder.IdentityEncoder(opt)

        self.Decoder = Decoder_network.Decoder(opt)

        self.mfcc_encoder = mfcc_networks.mfcc_encoder_two(opt)

        self.lip_feature_encoder = FAN_feature_extractor.FanFusion(opt)

        self.criterionL1 = torch.nn.L1Loss()

        if torch.cuda.is_available():
            if opt.cuda_on:
                if opt.mul_gpu:
                    self.ID_encoder = torch.nn.DataParallel(self.ID_encoder)
                    self.Decoder = torch.nn.DataParallel(self.Decoder)
                    self.mfcc_encoder = torch.nn.DataParallel(self.mfcc_encoder)
                    self.lip_feature_encoder = torch.nn.DataParallel(self.lip_feature_encoder)
                self.ID_encoder.cuda()
                self.Decoder.cuda()
                self.mfcc_encoder.cuda()
                self.lip_feature_encoder.cuda()
                self.criterionL1.cuda()

        print('---------- Networks initialized -------------')
    def __init__(self, opt):
        self.opt = opt
        self.Tensor = torch.cuda.FloatTensor if opt.cuda_on else torch.Tensor
        # define tensors
        self.input_A = self.Tensor(opt.batchSize, opt.image_channel_size,
                                   opt.image_size,
                                   opt.image_size)  # (16,3,256,256)  一张图片

        self.input_B = self.Tensor(
            opt.batchSize, opt.pred_length, opt.image_channel_size,
            opt.image_size, opt.image_size
        )  # (16,12,3,256,256)  12张图片  pred_length:num of images used for classification
        self.B_audio = self.Tensor(opt.batchSize, opt.pred_length, 1,
                                   opt.mfcc_length,
                                   opt.mfcc_width)  # (16,12,1,20,12)    12段音频

        self.input_video_dis = self.Tensor(
            opt.batchSize, opt.disfc_length, opt.image_channel_size,
            opt.image_size, opt.image_size)  # (16,20,3,256,256)  20张图片

        self.video_pred_data = self.Tensor(
            opt.batchSize, opt.pred_length, opt.image_channel_size,
            opt.image_size, opt.image_size)  # (16,12,3,256,256)  12张图片
        self.audio_pred_data = self.Tensor(
            opt.batchSize, opt.pred_length, 1, opt.image_size,
            opt.image_size)  # (16,12,1,20,12)    12段音频

        self.ID_encoder = IdentityEncoder.IdentityEncoder()

        self.Decoder = Decoder_network.Decoder(opt)

        # audio wid feature encoder
        self.mfcc_encoder = mfcc_networks.mfcc_encoder_two(opt)

        # visual wid feature encoder
        self.lip_feature_encoder = FAN_feature_extractor.FanFusion(opt)

        # discriminator to disentangle wid from pid
        self.ID_lip_discriminator = Discriminator_networks.ID_dis32(
            feature_length=64, config=opt)

        # Classifier from wid to class label
        self.model_fusion = networks.ModelFusion(opt)

        # discriminator for adv in embedding wid
        self.discriminator_audio = networks.discriminator_audio(opt)

        use_sigmoid = opt.no_lsgan
        self.netD = Discriminator_networks.Discriminator(
            input_nc=3, use_sigmoid=use_sigmoid)
        self.netD_mul = Discriminator_networks.Discriminator(
            input_nc=3 * opt.sequence_length, use_sigmoid=use_sigmoid)
        self.netD_mul.apply(networks.weights_init)
        self.netD.apply(networks.weights_init)
        # self.Decoder.apply(networks.weights_init)
        self.ID_lip_discriminator.apply(networks.weights_init)

        self.old_lr = opt.lr
        # define loss functions
        self.criterionGAN = loss_functions.GANLoss(use_lsgan=not opt.no_lsgan,
                                                   tensor=self.Tensor,
                                                   softlabel=False)
        self.criterionGAN_soft = loss_functions.GANLoss(
            use_lsgan=not opt.no_lsgan, tensor=self.Tensor, softlabel=True)
        self.criterionL1 = torch.nn.L1Loss()
        self.criterionSmoothL1 = torch.nn.SmoothL1Loss()
        self.criterionL2 = torch.nn.MSELoss()
        self.L2Contrastive = loss_functions.L2ContrastiveLoss(
            margin=opt.L2margin)
        self.criterionCE = torch.nn.CrossEntropyLoss()
        self.inv_dis_loss = loss_functions.L2SoftmaxLoss()
        self.Contrastive = loss_functions.ContrastiveLoss(
            margin=opt.ranking_margin)

        # initialize optimizers
        self.optimizer_G = torch.optim.Adam(
            list(self.Decoder.parameters()) +
            list(self.ID_encoder.parameters()) +
            list(self.model_fusion.parameters()) +
            list(self.mfcc_encoder.parameters()) +
            list(self.lip_feature_encoder.parameters()),
            lr=opt.lr,
            betas=(opt.beta1, 0.999))
        self.optimizer_D = torch.optim.Adam(
            list(self.netD.parameters()) + list(self.netD_mul.parameters()) +
            list(self.discriminator_audio.parameters()) +
            list(self.ID_lip_discriminator.parameters()),
            lr=opt.lr,
            betas=(opt.beta1, 0.999))

        if torch.cuda.is_available():
            if opt.cuda_on:
                if opt.mul_gpu:
                    self.ID_encoder = torch.nn.DataParallel(self.ID_encoder)
                    self.Decoder = torch.nn.DataParallel(self.Decoder)
                    self.mfcc_encoder = torch.nn.DataParallel(
                        self.mfcc_encoder)
                    self.netD_mul = torch.nn.DataParallel(self.netD_mul)
                    self.netD = torch.nn.DataParallel(self.netD)
                    self.lip_feature_encoder = torch.nn.DataParallel(
                        self.lip_feature_encoder)
                    self.ID_lip_discriminator = torch.nn.DataParallel(
                        self.ID_lip_discriminator)
                    self.model_fusion = torch.nn.DataParallel(
                        self.model_fusion)
                    self.discriminator_audio = torch.nn.DataParallel(
                        self.discriminator_audio)
                self.ID_encoder.cuda()
                self.Decoder.cuda()
                self.mfcc_encoder.cuda()
                self.lip_feature_encoder.cuda()
                self.ID_lip_discriminator.cuda()
                self.netD_mul.cuda()
                self.netD.cuda()
                self.criterionL1.cuda()
                self.criterionGAN.cuda()
                self.criterionGAN_soft.cuda()
                self.criterionL2.cuda()
                self.criterionCE.cuda()
                self.inv_dis_loss.cuda()
                self.Contrastive.cuda()
                self.model_fusion.cuda()
                self.discriminator_audio.cuda()
                self.L2Contrastive.cuda()

        print('---------- Networks initialized -------------')