def __init__(self): logger.info('Set Data Loader') self.dataset = FoodDataset(transform=transforms.Compose([ToTensor()])) self.data_loader = torch.utils.data.DataLoader(self.dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True) checkpoint, checkpoint_name = self.load_checkpoint(model_dump_path) if checkpoint == None: logger.info( 'Don\'t have pre-trained model. Ignore loading model process.') logger.info('Set Generator and Discriminator') self.G = Generator(tag=tag_size).to(device) self.D = Discriminator(tag=tag_size).to(device) logger.info('Initialize Weights') self.G.apply(initital_network_weights).to(device) self.D.apply(initital_network_weights).to(device) logger.info('Set Optimizers') self.optimizer_G = torch.optim.Adam(self.G.parameters(), lr=learning_rate, betas=(beta_1, 0.999)) self.optimizer_D = torch.optim.Adam(self.D.parameters(), lr=learning_rate, betas=(beta_1, 0.999)) self.epoch = 0 else: logger.info('Load Generator and Discriminator') self.G = Generator(tag=tag_size).to(device) self.D = Discriminator(tag=tag_size).to(device) logger.info('Load Pre-Trained Weights From Checkpoint'.format( checkpoint_name)) self.G.load_state_dict(checkpoint['G']) self.D.load_state_dict(checkpoint['D']) logger.info('Load Optimizers') self.optimizer_G = torch.optim.Adam(self.G.parameters(), lr=learning_rate, betas=(beta_1, 0.999)) self.optimizer_D = torch.optim.Adam(self.D.parameters(), lr=learning_rate, betas=(beta_1, 0.999)) self.optimizer_G.load_state_dict(checkpoint['optimizer_G']) self.optimizer_D.load_state_dict(checkpoint['optimizer_D']) self.epoch = checkpoint['epoch'] logger.info('Set Criterion') self.a_D = alexnet.alexnet(num_classes=tag_size).to(device) self.optimizer_a_D = torch.optim.Adam(self.a_D.parameters(), lr=learning_rate, betas=(beta_1, .999))
def __init__(self, vp_value_count, output_shape, name='Full Network'): """ Initializes the Full Network. :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape. Legal values: (bsz, 3, 8, 112, 112) and (bsz, 3, 16, 112, 112) :param name: (str, optional) The name of the network (default 'Full Network'). Raises: ValueError: if 'vp_value_count' is not a legal value count ValueError: if 'output_shape' does not contain a legal number of frames. """ if vp_value_count not in self.VALID_VP_VALUE_COUNTS: raise ValueError('Invalid number of vp values: %d' % vp_value_count) if output_shape[2] not in self.VALID_FRAME_COUNTS: raise ValueError('Invalid number of frames in desired output: %d' % output_shape[2]) super(FullNetwork, self).__init__() self.net_name = name self.vp_value_count = vp_value_count self.output_shape = output_shape self.out_frames = output_shape[2] self.rep_channels = 256 self.rep_frames = 4 self.rep_size = 14 self.vgg = vgg16(pretrained=True, weights_path=vgg_weights_path) self.i3d = InceptionI3d(final_endpoint='Mixed_5c', in_frames=self.out_frames, pretrained=True, weights_path=i3d_weights_path) self.exp = Expander(vp_value_count=self.vp_value_count, out_frames=self.rep_frames, out_size=self.rep_size) self.trans = Transformer(in_channels=self.rep_channels + self.vp_value_count, out_channels=self.rep_channels) self.gen = Generator(in_channels=[self.rep_channels, self.rep_channels], out_frames=self.out_frames)
def _build_model(self): device = torch.device('cuda') data_dimension = self.config.data['dimension'] generator_hidden_layers = self.config.model['generator_hidden_layers'] use_dropout = self.config.model['use_dropout'] drop_prob = self.config.model['drop_prob'] use_ac_func = self.config.model['use_ac_func'] activation = self.config.model['activation'] disc_hidden_layers = self.config.model['disc_hidden_layers'] logger.log("Loading {} network ...".format(colored('generator', 'red'))) gen_fc_layers = [ self.latent_dim, *generator_hidden_layers, data_dimension ] generator = Generator(gen_fc_layers, use_dropout, drop_prob, use_ac_func, activation).to(device) logger.log("Loading {} network ...".format( colored('discriminator', 'red'))) disc_fc_layers = [data_dimension, *disc_hidden_layers, 1] discriminator = Discriminator(disc_fc_layers, use_dropout, drop_prob, use_ac_func, activation).to(device) wandb.watch([generator, discriminator]) g_optimizer, d_optimizer = self._setup_optimizers( generator, discriminator) return generator, discriminator, g_optimizer, d_optimizer
def __init__(self, opt): self.device = torch.device('cuda') self.opt = opt self.G = Generator(self.opt['network_G']).to(self.device) util.init_weights(self.G, init_type='kaiming', scale=0.1) if self.opt['path']['pretrain_G']: self.G.load_state_dict(torch.load(self.opt['path']['pretrain_G']), strict=True) self.D = Discriminator(self.opt['network_D']).to(self.device) util.init_weights(self.D, init_type='kaiming', scale=1) self.FE = VGGFeatureExtractor().to(self.device) self.G.train() self.D.train() self.FE.eval() self.log_dict = OrderedDict() self.optim_params = [ v for k, v in self.G.named_parameters() if v.requires_grad ] self.opt_G = torch.optim.Adam(self.optim_params, lr=self.opt['train']['lr_G'], betas=(self.opt['train']['b1_G'], self.opt['train']['b2_G'])) self.opt_D = torch.optim.Adam(self.D.parameters(), lr=self.opt['train']['lr_D'], betas=(self.opt['train']['b1_D'], self.opt['train']['b2_D'])) self.optimizers = [self.opt_G, self.opt_D] self.schedulers = [ lr_scheduler.MultiStepLR(optimizer, self.opt['train']['lr_steps'], self.opt['train']['lr_gamma']) for optimizer in self.optimizers ]
def __init__(self, device, num_steps, z_dimension=8): # in and out channels for the generator: a, b = 2, 3 G = Generator(a, b) if not USE_UNET else UNet(a, b) E = ResNetEncoder(b, z_dimension) # conditional discriminators D1 = MultiScaleDiscriminator(a + b - 1) D2 = MultiScaleDiscriminator(a + b - 1) def weights_init(m): if isinstance(m, (nn.Conv2d, nn.Linear, nn.ConvTranspose2d)): init.xavier_normal_(m.weight, gain=0.02) if m.bias is not None: init.zeros_(m.bias) elif isinstance(m, nn.InstanceNorm2d) and m.affine: init.ones_(m.weight) init.zeros_(m.bias) self.G = G.apply(weights_init).to(device) self.E = E.apply(weights_init).to(device) self.D1 = D1.apply(weights_init).to(device) self.D2 = D2.apply(weights_init).to(device) params = { 'lr': 4e-4, 'betas': (0.5, 0.999), 'weight_decay': 1e-8 } generator_groups = [ {'params': [p for n, p in self.G.named_parameters() if 'mapping' not in n]}, {'params': self.G.mapping.parameters(), 'lr': 4e-5} ] self.optimizer = { 'G': optim.Adam(generator_groups, **params), 'E': optim.Adam(self.E.parameters(), **params), 'D1': optim.Adam(self.D1.parameters(), **params), 'D2': optim.Adam(self.D2.parameters(), **params) } def lambda_rule(i): decay = num_steps // 2 m = 1.0 if i < decay else 1.0 - (i - decay) / decay return max(m, 0.0) self.schedulers = [] for o in self.optimizer.values(): self.schedulers.append(LambdaLR(o, lr_lambda=lambda_rule)) self.gan_loss = LSGAN() self.z_dimension = z_dimension self.device = device
def __init__(self, opt): self.device = torch.device('cuda') self.opt = opt self.G = Generator(self.opt['network_G']).to(self.device) util.init_weights(self.G, init_type='kaiming', scale=0.1) self.G.train() self.log_dict = OrderedDict() self.optim_params = [ v for k, v in self.G.named_parameters() if v.requires_grad ] self.opt_G = torch.optim.Adam(self.optim_params, lr=self.opt['train']['lr_G'], betas=(self.opt['train']['b1_G'], self.opt['train']['b2_G'])) self.optimizers = [self.opt_G] self.schedulers = [ lr_scheduler.MultiStepLR(optimizer, self.opt['train']['lr_steps'], self.opt['train']['lr_gamma']) for optimizer in self.optimizers ]
GAMMA2 = 5.0 GAMMA3 = 10.0 WLAMBDA = 5.0 SLAMBDA = 5.0 # Datasets DATASET = Dataset(rootdir=r'D:\GAN\buildingsDataset', max_images=99999) DATASET.load_captions_and_class_ids() #%% DATALOADER = DATASET.make_dataloaders(BATCH_SIZE) #%% # Networks/Modules DEVICE = torch.device('cuda') GENERATOR = Generator(gf_dim=GF_DIM, emb_dim=EMB_DIM, z_dim=Z_DIM, cond_dim=COND_DIM) GENERATOR.cuda() DISCRIMINATORS = [Disc64(DF_DIM), Disc128(DF_DIM), Disc256(DF_DIM)] for d in DISCRIMINATORS: d.cuda() RNN = RNNEncoder(vocabsize=DATASET.vocab.n_words, nhidden=EMB_DIM) RNN.cuda() CNN = CNNEncoder(out_dim=EMB_DIM) CNN.cuda() # Losses WORDSLOSS = WordsLoss(DEVICE, GAMMA1, GAMMA2, GAMMA3, WLAMBDA) SENTLOSS = SentenceLoss(DEVICE, GAMMA3, SLAMBDA) GENLOSS = NonSaturatingGenLoss() DISCLOSS = NonSaturatingDiscLoss()
from tqdm import trange latentdim = 16**2 steps = 4 train_loader, val_loader = get_data() def prepare_batch(batch, latentdim=latentdim, sigma=0.1): z = torch.rand(batch.size(0), latentdim, device=batch.device) inp = batch.clone() + sigma * torch.randn(batch.shape, device=batch.device) inp[..., [1, 4, 32, 33, 35, 36], :] = 0 out = batch.clone() return inp, z, out G = Generator(latentdim=latentdim, steps=steps, filters=64, zsteps=3).cuda() D = PatchDiscriminator(steps=3).cuda() trainable_G = [p for p in G.parameters() if p.requires_grad] trainable_D = [p for p in D.parameters() if p.requires_grad] total_G = sum(p.numel() for p in trainable_G) total_D = sum(p.numel() for p in trainable_D) print("Number of parameters: %d" % total_G) print("Number of discriminator parameters: %d" % total_D) epochs = 3000 plotting = 50 D_steps = 10 eps = 1e-6 lr = 2e-4
"cuda:0" if (torch.cuda.is_available() and args.gpu) else "cpu" ) now = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") compare_dir = f"compare/{now}" print(f'Saving SR images in: ./{compare_dir}/') os.makedirs(compare_dir, exist_ok=True) test_dataset = VisDataset( root=args.data_path, scale_factor=args.scale_factor, hr_size=args.hr_size ) test_dataloader = torch.utils.data.DataLoader( test_dataset, batch_size=args.vis_batch_size ) gen_net = Generator( num_res_blocks=args.gen_res_blocks, upscale_factor=args.scale_factor ).to(device) gen_path = args.from_pretrained_gen if gen_path: gen_net.load_state_dict(torch.load(gen_path)) test_iter = iter(test_dataloader) for i, (hr_test, lr_test, simple_sr_test) in zip(range(args.iterations), test_iter): gan_sr_test = gen_net(lr_test.to(device)).cpu() stacked = torch.cat([hr_test, simple_sr_test, gan_sr_test]) vutils.save_image( stacked, f"{compare_dir}/{i}.png", normalize=True, nrow=args.vis_batch_size )
import cv2 import numpy as np import torch from networks.generator import Generator import utils.utils as util cfg = util.load_yaml('../Configs/Train/config_sr.yml') model_path = sys.argv[1] device = torch.device('cuda') # device = torch.device('cpu') input_dir = '/content/drive/MyDrive/MajorProject/results_r2b/*' output_dir = '/content/drive/MyDrive/MajorProject/results_sr/' util.mkdir(output_dir) model = Generator(cfg['network_G']) model.load_state_dict(torch.load(model_path), strict=False) model.eval() for k, v in model.named_parameters(): v.requires_grad = False model = model.to(device) print('Model path {:s}. \nTesting...'.format(model_path)) idx = 0 for path in glob.glob(input_dir): idx += 1 base = os.path.splitext(os.path.basename(path))[0] print(idx, base) # read image img = cv2.imread(path, cv2.IMREAD_COLOR)
def __init__(self, vp_value_count, output_shape, name='Full Network'): """ Initializes the Full Network. :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape. Legal values: (bsz, 3, 8, 112, 112) and (bsz, 3, 16, 112, 112) :param name: (str, optional) The name of the network (default 'Full Network'). Raises: ValueError: if 'vp_value_count' is not a legal value count ValueError: if 'output_shape' does not contain a legal number of frames. """ if vp_value_count not in self.VALID_VP_VALUE_COUNTS: raise ValueError('Invalid number of vp values: %d' % vp_value_count) if output_shape[2] not in self.VALID_FRAME_COUNTS: raise ValueError('Invalid number of frames in desired output: %d' % output_shape[2]) super(FullNetwork, self).__init__() self.net_name = name self.vp_value_count = vp_value_count self.output_shape = output_shape self.out_frames = output_shape[2] self.rep_channels = 256 self.rep_frames = 4 self.rep_size = 14 self.vgg = vgg16(pretrained=True, weights_path=vgg_weights_path) self.i3d = InceptionI3d(final_endpoint='Mixed_5c', in_frames=self.out_frames, pretrained=True, weights_path=i3d_weights_path) self.exp = Expander(vp_value_count=self.vp_value_count) # convs to make all appearance encoding have same number of channels, so they can be used in the same convLSTM self.app_conv128 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.app_conv256a = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.app_conv256b = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.app_convs = [ self.app_conv128, self.app_conv256a, self.app_conv256b ] # convs for the initial hidden and current states of the convLSTM self.hconv = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.cconv = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # convs to make all motion features have the same number of channels, so they can be used in the same Trans Net self.rep_conv64 = nn.Conv3d(in_channels=64, out_channels=256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1)) self.rep_conv192 = nn.Conv3d(in_channels=192, out_channels=256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1)) self.rep_conv256 = nn.Conv3d(in_channels=256, out_channels=256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1)) self.rep_convs = { 64: self.rep_conv64, 192: self.rep_conv192, 256: self.rep_conv256 } self.trans = Transformer(in_channels=256 + self.vp_value_count, out_channels=128) self.conv_lstm = ConvLSTM(input_dim=128, hidden_dim=[128], kernel_size=(3, 3), num_layers=1, batch_first=True, bias=False, return_all_layers=False) self.gen = Generator(in_channels=[128], out_frames=self.out_frames)
def __init__(self, vp_value_count, output_shape, name='Full Network'): """ Initializes the Full Network. :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape. Legal values: (bsz, 3, 8, 112, 112) and (bsz, 3, 16, 112, 112) :param name: (str, optional) The name of the network (default 'Full Network'). Raises: ValueError: if 'vp_value_count' is not a legal value count ValueError: if 'output_shape' does not contain a legal number of frames. """ if vp_value_count not in self.VALID_VP_VALUE_COUNTS: raise ValueError('Invalid number of vp values: %d' % vp_value_count) if output_shape[2] not in self.VALID_FRAME_COUNTS: raise ValueError('Invalid number of frames in desired output: %d' % output_shape[2]) super(FullNetwork, self).__init__() # params self.net_name = name self.vp_value_count = vp_value_count self.output_shape = output_shape self.out_frames = output_shape[2] self.rep_feat = 128 self.app_feat = 256 # networks self.vgg = vgg16(pretrained=True, weights_path=vgg_weights_path) self.i3d = InceptionI3d(final_endpoint='Mixed_5c', in_frames=self.out_frames, pretrained=True, weights_path=i3d_weights_path) self.exp = Expander(vp_value_count=self.vp_value_count) self.trans = Transformer(in_channels=self.rep_feat + self.vp_value_count, out_channels=self.rep_feat) self.gen = Generator(in_channels=[self.app_feat, self.rep_feat], out_frames=self.out_frames) self.conv_lstms = { 56: ConvLSTM(input_dim=self.rep_feat, hidden_dim=[self.app_feat], kernel_size=(3, 3), num_layers=1, in_shape=(56, 56), batch_first=True, bias=False, return_all_layers=False), 28: ConvLSTM(input_dim=self.rep_feat, hidden_dim=[self.app_feat], kernel_size=(3, 3), num_layers=1, in_shape=(28, 28), batch_first=True, bias=False, return_all_layers=False), 14: ConvLSTM(input_dim=self.rep_feat, hidden_dim=[self.app_feat], kernel_size=(3, 3), num_layers=1, in_shape=(14, 14), batch_first=True, bias=False, return_all_layers=False) } # convs self.app_conv128 = nn.Conv2d(in_channels=128, out_channels=self.app_feat, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.app_conv256 = nn.Conv2d(in_channels=256, out_channels=self.app_feat, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.app_conv512 = nn.Conv2d(in_channels=512, out_channels=self.app_feat, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.app_convs = { 128: self.app_conv128, 256: self.app_conv256, 512: self.app_conv512 } self.hconv = nn.Conv2d(in_channels=self.app_feat, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.cconv = nn.Conv2d(in_channels=self.app_feat, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.rep_conv64 = nn.Conv3d(in_channels=64, out_channels=self.rep_feat, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1)) self.rep_conv192 = nn.Conv3d(in_channels=192, out_channels=self.rep_feat, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1)) self.rep_conv256 = nn.Conv3d(in_channels=256, out_channels=self.rep_feat, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1)) self.rep_convs = { 64: self.rep_conv64, 192: self.rep_conv192, 256: self.rep_conv256 }
optimizer_generator = tf.keras.optimizers.Adam(learning_rate=lr, beta_1=0.5, beta_2=0.999) optimizer_keypoint_detector = tf.keras.optimizers.Adam(learning_rate=lr, beta_1=0.5, beta_2=0.999) optimizer_discriminator = tf.keras.optimizers.Adam(learning_rate=lr, beta_1=0.5, beta_2=0.999) batch_size = 20 epochs = 150 train_steps = 99 # change keypoint_detector = KeypointDetector() generator = Generator() discriminator = Discriminator() generator_full = FullGenerator(keypoint_detector, generator, discriminator) discriminator_full = FullDiscriminator(discriminator) @tf.function def train_step(source_images, driving_images): with tf.GradientTape(persistent=True) as tape: losses_generator, generated = generator_full(source_images, driving_images, tape) generator_loss = tf.math.reduce_sum(list(losses_generator.values())) generator_gradients = tape.gradient(generator_loss, generator_full.trainable_variables)
return checkpoint, new_model_path def generate(G, file_name, tags): ''' Generate fake image. :param G: :param file_name: :param tags: :return: img's tensor and file path. ''' # g_noise = Variable(torch.FloatTensor(1, 128)).to(device).data.normal_(.0, 1) # g_tag = Variable(torch.FloatTensor([utils.get_one_hot(tags)])).to(device) g_noise, g_tag = utils.fake_generator(1, 128, device) img = G(torch.cat([g_noise, g_tag], dim=1)) vutils.save_image(img.data.view(1, 3, 128, 128), os.path.join(tmp_path, '{}.png'.format(file_name))) print('Saved file in {}'.format( os.path.join(tmp_path, '{}.png'.format(file_name)))) return img.data.view(1, 3, 128, 128), os.path.join(tmp_path, '{}.png'.format(file_name)) if __name__ == '__main__': G = Generator().to(device) checkpoint, _ = load_checkpoint(model_dump_path) G.load_state_dict(checkpoint['G']) generate(G, 'test', ['white hair'])
freeze_layer = 21 for i in range(freeze_layer): model.layers[i].trainable = False if True: # BATCH_SIZE不要太小,不然训练效果很差 BATCH_SIZE = 8 # 学习率,粗略地训练 Lr = 5e-4 # 为起始世代 Init_Epoch = 0 # 为冻结训练的世代 Freeze_Epoch = 50 # 数据生成器 gen = Generator(bbox_util, BATCH_SIZE, lines[:num_train], lines[num_train:], (input_shape[0], input_shape[1]), NUM_CLASSES) # 模型的装配 model.compile(optimizer=Adam(lr=Lr), loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=3.0).compute_loss) # 0-50个epoches时,冻结对网络前21层的训练 model.fit(gen.generate(True), steps_per_epoch=num_train//BATCH_SIZE, validation_data=gen.generate(False), validation_steps=num_val//BATCH_SIZE, epochs=Freeze_Epoch, initial_epoch=Init_Epoch, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) # 解冻前21层参数 for i in range(freeze_layer):
os.makedirs(SAMPLE_DIR, exist_ok=True) # Setup CUDA cudnn.benchmark = True if args.USE_CUDA: device = torch.device("cuda") else: device = torch.device("cpu") random.seed(args.SEED) torch.manual_seed(args.SEED) if args.USE_CUDA: torch.cuda.manual_seed_all(args.SEED) # Initialize Generator generator = Generator(args.GAN_TYPE, args.ZDIM, args.NUM_CLASSES) generator.apply(weights_init) generator.to(device) print(generator) # Initialize Discriminator discriminator = Discriminator(args.GAN_TYPE, args.NUM_CLASSES) discriminator.apply(weights_init) discriminator.to(device) print(discriminator) # Initialize loss function and optimizer criterionLabel = nn.BCELoss() criterionClass = nn.CrossEntropyLoss() optimizerD = Adam(discriminator.parameters(), lr=args.LR, betas=(0.5, 0.999)) optimizerG = Adam(generator.parameters(), lr=args.LR, betas=(0.5, 0.999))
def __init__(self, vp_value_count, stdev, output_shape, pretrained=False, vgg_weights_path='', i3d_weights_path='', name='Full Network'): """ Initializes the Full Network. :param vp_value_count: (int) The number of values that identify the viewpoint. :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape. Legal values: (bsz, 3, 8/16, 112, 112) and (bsz, 3, 16, 112, 112) :param name: (str, optional) The name of the network (default 'Full Network'). Raises: ValueError: if 'vp_value_count' is not a legal value count ValueError: if 'output_shape' does not contain a legal number of frames. """ if vp_value_count not in self.VALID_VP_VALUE_COUNTS: raise ValueError('Invalid number of vp values: %d' % vp_value_count) if output_shape[2] not in self.VALID_FRAME_COUNTS: raise ValueError('Invalid number of frames in desired output: %d' % output_shape[2]) super(FullNetwork, self).__init__() self.net_name = name self.vp_value_count = vp_value_count self.stdev = stdev self.output_shape = output_shape self.out_frames = output_shape[2] # specs of various features self.app_feat = 128 self.rep_feat = 128 self.rep_frames = 4 self.rep_size = 14 self.nkp = 32 self.vgg = vgg16(pretrained=pretrained, weights_path=vgg_weights_path) self.i3d = InceptionI3d(final_endpoint='Mixed_5c', in_frames=self.out_frames, pretrained=pretrained, weights_path=i3d_weights_path) self.exp = Expander(vp_value_count=self.vp_value_count) # convs to make all appearance encodings have same number of channels, so they can be used in the same convGRU self.app_conv128 = nn.Conv2d(in_channels=128, out_channels=self.app_feat, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.app_conv256a = nn.Conv2d(in_channels=256, out_channels=self.app_feat, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.app_conv256b = nn.Conv2d(in_channels=256, out_channels=self.app_feat, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.app_convs = [ nn.Sequential(self.app_conv128, nn.ReLU(inplace=True)), nn.Sequential(self.app_conv256a, nn.ReLU(inplace=True)), nn.Sequential(self.app_conv256b, nn.ReLU(inplace=True)) ] # convs to make all motion features have the same number of channels, so they can be used in the same trans net self.rep_conv64 = nn.Conv3d(in_channels=64, out_channels=self.rep_feat, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1)) self.rep_conv192 = nn.Conv3d(in_channels=192, out_channels=self.rep_feat, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1)) self.rep_conv256 = nn.Conv3d(in_channels=256, out_channels=self.rep_feat, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1)) self.rep_convs = [ nn.Sequential(self.rep_conv64, nn.ReLU(inplace=True)), nn.Sequential(self.rep_conv192, nn.ReLU(inplace=True)), nn.Sequential(self.rep_conv256, nn.ReLU(inplace=True)) ] self.trans = Transformer(in_channels=self.rep_feat + self.vp_value_count, out_channels=self.rep_feat) self.kpp = KPPredictor(in_channels=self.rep_feat, nkp=self.nkp, stdev=self.stdev) self.vpp = VPPredictor(in_channels=256) self.gru = ConvGRU(input_dim=self.rep_feat, hidden_dim=[self.app_feat], kernel_size=(7, 7), num_layers=1, batch_first=True, bias=False, return_all_layers=False) self.gen = Generator(in_channels=[self.app_feat, self.nkp], out_frames=self.out_frames)
run_name = 'correlation-GAN_{}'.format(config.version) wandb.init(name=run_name, dir=config.checkpoint_dir, notes=config.description) wandb.config.update(config.__dict__) device = torch.device('cuda') use_dropout = [True, True, False] drop_prob = [0.5, 0.5, 0.5] use_ac_func = [True, True, False] activation = 'relu' latent_dim = 10 gen_fc_layers = [latent_dim, 16, 32, 2] generator = Generator(gen_fc_layers, use_dropout, drop_prob, use_ac_func, activation).to(device) disc_fc_layers = [2, 32, 16, 1] discriminator = Discriminator(disc_fc_layers, use_dropout, drop_prob, use_ac_func, activation).to(device) wandb.watch([generator, discriminator]) g_optimizer = Adam(generator.parameters(), lr=1e-4, betas=(0.5, 0.9)) d_optimizer = Adam(discriminator.parameters(), lr=1e-4, betas=(0.5, 0.9)) wgan_gp = WGAN_GP(config, generator, discriminator, g_optimizer, d_optimizer, latent_shape) wgan_gp.train(dataloader, 200)