Esempio n. 1
0
    def __init__(self):
        logger.info('Set Data Loader')
        self.dataset = FoodDataset(transform=transforms.Compose([ToTensor()]))
        self.data_loader = torch.utils.data.DataLoader(self.dataset,
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       num_workers=num_workers,
                                                       drop_last=True)
        checkpoint, checkpoint_name = self.load_checkpoint(model_dump_path)
        if checkpoint == None:
            logger.info(
                'Don\'t have pre-trained model. Ignore loading model process.')
            logger.info('Set Generator and Discriminator')
            self.G = Generator(tag=tag_size).to(device)
            self.D = Discriminator(tag=tag_size).to(device)
            logger.info('Initialize Weights')
            self.G.apply(initital_network_weights).to(device)
            self.D.apply(initital_network_weights).to(device)
            logger.info('Set Optimizers')
            self.optimizer_G = torch.optim.Adam(self.G.parameters(),
                                                lr=learning_rate,
                                                betas=(beta_1, 0.999))
            self.optimizer_D = torch.optim.Adam(self.D.parameters(),
                                                lr=learning_rate,
                                                betas=(beta_1, 0.999))
            self.epoch = 0
        else:
            logger.info('Load Generator and Discriminator')
            self.G = Generator(tag=tag_size).to(device)
            self.D = Discriminator(tag=tag_size).to(device)
            logger.info('Load Pre-Trained Weights From Checkpoint'.format(
                checkpoint_name))
            self.G.load_state_dict(checkpoint['G'])
            self.D.load_state_dict(checkpoint['D'])
            logger.info('Load Optimizers')
            self.optimizer_G = torch.optim.Adam(self.G.parameters(),
                                                lr=learning_rate,
                                                betas=(beta_1, 0.999))
            self.optimizer_D = torch.optim.Adam(self.D.parameters(),
                                                lr=learning_rate,
                                                betas=(beta_1, 0.999))
            self.optimizer_G.load_state_dict(checkpoint['optimizer_G'])
            self.optimizer_D.load_state_dict(checkpoint['optimizer_D'])

            self.epoch = checkpoint['epoch']
        logger.info('Set Criterion')
        self.a_D = alexnet.alexnet(num_classes=tag_size).to(device)
        self.optimizer_a_D = torch.optim.Adam(self.a_D.parameters(),
                                              lr=learning_rate,
                                              betas=(beta_1, .999))
Esempio n. 2
0
    def __init__(self, vp_value_count, output_shape, name='Full Network'):
        """
        Initializes the Full Network.
        :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape.
                              Legal values: (bsz, 3, 8, 112, 112) and (bsz, 3, 16, 112, 112)
        :param name: (str, optional) The name of the network (default 'Full Network').
        Raises:
            ValueError: if 'vp_value_count' is not a legal value count
            ValueError: if 'output_shape' does not contain a legal number of frames.
        """
        if vp_value_count not in self.VALID_VP_VALUE_COUNTS:
            raise ValueError('Invalid number of vp values: %d' % vp_value_count)
        if output_shape[2] not in self.VALID_FRAME_COUNTS:
            raise ValueError('Invalid number of frames in desired output: %d' % output_shape[2])

        super(FullNetwork, self).__init__()

        self.net_name = name
        self.vp_value_count = vp_value_count
        self.output_shape = output_shape
        self.out_frames = output_shape[2]
        self.rep_channels = 256
        self.rep_frames = 4
        self.rep_size = 14

        self.vgg = vgg16(pretrained=True, weights_path=vgg_weights_path)
        self.i3d = InceptionI3d(final_endpoint='Mixed_5c', in_frames=self.out_frames,
                                pretrained=True, weights_path=i3d_weights_path)

        self.exp = Expander(vp_value_count=self.vp_value_count, out_frames=self.rep_frames, out_size=self.rep_size)
        self.trans = Transformer(in_channels=self.rep_channels + self.vp_value_count, out_channels=self.rep_channels)

        self.gen = Generator(in_channels=[self.rep_channels, self.rep_channels], out_frames=self.out_frames)
Esempio n. 3
0
    def _build_model(self):
        device = torch.device('cuda')

        data_dimension = self.config.data['dimension']
        generator_hidden_layers = self.config.model['generator_hidden_layers']
        use_dropout = self.config.model['use_dropout']
        drop_prob = self.config.model['drop_prob']
        use_ac_func = self.config.model['use_ac_func']
        activation = self.config.model['activation']
        disc_hidden_layers = self.config.model['disc_hidden_layers']

        logger.log("Loading {} network ...".format(colored('generator',
                                                           'red')))
        gen_fc_layers = [
            self.latent_dim, *generator_hidden_layers, data_dimension
        ]
        generator = Generator(gen_fc_layers, use_dropout, drop_prob,
                              use_ac_func, activation).to(device)

        logger.log("Loading {} network ...".format(
            colored('discriminator', 'red')))
        disc_fc_layers = [data_dimension, *disc_hidden_layers, 1]
        discriminator = Discriminator(disc_fc_layers, use_dropout, drop_prob,
                                      use_ac_func, activation).to(device)

        wandb.watch([generator, discriminator])

        g_optimizer, d_optimizer = self._setup_optimizers(
            generator, discriminator)

        return generator, discriminator, g_optimizer, d_optimizer
Esempio n. 4
0
    def __init__(self, opt):
        self.device = torch.device('cuda')
        self.opt = opt
        self.G = Generator(self.opt['network_G']).to(self.device)
        util.init_weights(self.G, init_type='kaiming', scale=0.1)
        if self.opt['path']['pretrain_G']:
            self.G.load_state_dict(torch.load(self.opt['path']['pretrain_G']),
                                   strict=True)
        self.D = Discriminator(self.opt['network_D']).to(self.device)
        util.init_weights(self.D, init_type='kaiming', scale=1)
        self.FE = VGGFeatureExtractor().to(self.device)
        self.G.train()
        self.D.train()
        self.FE.eval()

        self.log_dict = OrderedDict()

        self.optim_params = [
            v for k, v in self.G.named_parameters() if v.requires_grad
        ]
        self.opt_G = torch.optim.Adam(self.optim_params,
                                      lr=self.opt['train']['lr_G'],
                                      betas=(self.opt['train']['b1_G'],
                                             self.opt['train']['b2_G']))
        self.opt_D = torch.optim.Adam(self.D.parameters(),
                                      lr=self.opt['train']['lr_D'],
                                      betas=(self.opt['train']['b1_D'],
                                             self.opt['train']['b2_D']))

        self.optimizers = [self.opt_G, self.opt_D]
        self.schedulers = [
            lr_scheduler.MultiStepLR(optimizer, self.opt['train']['lr_steps'],
                                     self.opt['train']['lr_gamma'])
            for optimizer in self.optimizers
        ]
Esempio n. 5
0
    def __init__(self, device, num_steps, z_dimension=8):

        # in and out channels for the generator:
        a, b = 2, 3

        G = Generator(a, b) if not USE_UNET else UNet(a, b)
        E = ResNetEncoder(b, z_dimension)

        # conditional discriminators
        D1 = MultiScaleDiscriminator(a + b - 1)
        D2 = MultiScaleDiscriminator(a + b - 1)

        def weights_init(m):
            if isinstance(m, (nn.Conv2d, nn.Linear, nn.ConvTranspose2d)):
                init.xavier_normal_(m.weight, gain=0.02)
                if m.bias is not None:
                    init.zeros_(m.bias)
            elif isinstance(m, nn.InstanceNorm2d) and m.affine:
                init.ones_(m.weight)
                init.zeros_(m.bias)

        self.G = G.apply(weights_init).to(device)
        self.E = E.apply(weights_init).to(device)
        self.D1 = D1.apply(weights_init).to(device)
        self.D2 = D2.apply(weights_init).to(device)

        params = {
            'lr': 4e-4,
            'betas': (0.5, 0.999),
            'weight_decay': 1e-8
        }
        generator_groups = [
            {'params': [p for n, p in self.G.named_parameters() if 'mapping' not in n]},
            {'params': self.G.mapping.parameters(), 'lr': 4e-5}
        ]
        self.optimizer = {
            'G': optim.Adam(generator_groups, **params),
            'E': optim.Adam(self.E.parameters(), **params),
            'D1': optim.Adam(self.D1.parameters(), **params),
            'D2': optim.Adam(self.D2.parameters(), **params)
        }

        def lambda_rule(i):
            decay = num_steps // 2
            m = 1.0 if i < decay else 1.0 - (i - decay) / decay
            return max(m, 0.0)

        self.schedulers = []
        for o in self.optimizer.values():
            self.schedulers.append(LambdaLR(o, lr_lambda=lambda_rule))

        self.gan_loss = LSGAN()
        self.z_dimension = z_dimension
        self.device = device
    def __init__(self, opt):
        self.device = torch.device('cuda')
        self.opt = opt
        self.G = Generator(self.opt['network_G']).to(self.device)
        util.init_weights(self.G, init_type='kaiming', scale=0.1)
        self.G.train()

        self.log_dict = OrderedDict()

        self.optim_params = [
            v for k, v in self.G.named_parameters() if v.requires_grad
        ]
        self.opt_G = torch.optim.Adam(self.optim_params,
                                      lr=self.opt['train']['lr_G'],
                                      betas=(self.opt['train']['b1_G'],
                                             self.opt['train']['b2_G']))

        self.optimizers = [self.opt_G]
        self.schedulers = [
            lr_scheduler.MultiStepLR(optimizer, self.opt['train']['lr_steps'],
                                     self.opt['train']['lr_gamma'])
            for optimizer in self.optimizers
        ]
Esempio n. 7
0
GAMMA2 = 5.0
GAMMA3 = 10.0
WLAMBDA = 5.0
SLAMBDA = 5.0

# Datasets
DATASET = Dataset(rootdir=r'D:\GAN\buildingsDataset', max_images=99999)
DATASET.load_captions_and_class_ids()
#%%

DATALOADER = DATASET.make_dataloaders(BATCH_SIZE)

#%%
# Networks/Modules
DEVICE = torch.device('cuda')
GENERATOR = Generator(gf_dim=GF_DIM, emb_dim=EMB_DIM, z_dim=Z_DIM, cond_dim=COND_DIM)
GENERATOR.cuda()
DISCRIMINATORS = [Disc64(DF_DIM), Disc128(DF_DIM), Disc256(DF_DIM)]
for d in DISCRIMINATORS:
    d.cuda()
RNN = RNNEncoder(vocabsize=DATASET.vocab.n_words, nhidden=EMB_DIM)
RNN.cuda()
CNN = CNNEncoder(out_dim=EMB_DIM)
CNN.cuda()

# Losses
WORDSLOSS = WordsLoss(DEVICE, GAMMA1, GAMMA2, GAMMA3, WLAMBDA)
SENTLOSS = SentenceLoss(DEVICE, GAMMA3, SLAMBDA)
GENLOSS = NonSaturatingGenLoss()
DISCLOSS = NonSaturatingDiscLoss()
Esempio n. 8
0
from tqdm import trange

latentdim = 16**2
steps = 4
train_loader, val_loader = get_data()


def prepare_batch(batch, latentdim=latentdim, sigma=0.1):
    z = torch.rand(batch.size(0), latentdim, device=batch.device)
    inp = batch.clone() + sigma * torch.randn(batch.shape, device=batch.device)
    inp[..., [1, 4, 32, 33, 35, 36], :] = 0
    out = batch.clone()
    return inp, z, out


G = Generator(latentdim=latentdim, steps=steps, filters=64, zsteps=3).cuda()
D = PatchDiscriminator(steps=3).cuda()

trainable_G = [p for p in G.parameters() if p.requires_grad]
trainable_D = [p for p in D.parameters() if p.requires_grad]

total_G = sum(p.numel() for p in trainable_G)
total_D = sum(p.numel() for p in trainable_D)
print("Number of parameters: %d" % total_G)
print("Number of discriminator parameters: %d" % total_D)

epochs = 3000
plotting = 50
D_steps = 10
eps = 1e-6
lr = 2e-4
Esempio n. 9
0
        "cuda:0" if (torch.cuda.is_available() and args.gpu) else "cpu"
    )
    now = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
    compare_dir = f"compare/{now}"
    print(f'Saving SR images in: ./{compare_dir}/')
    os.makedirs(compare_dir, exist_ok=True)

    test_dataset = VisDataset(
        root=args.data_path, scale_factor=args.scale_factor, hr_size=args.hr_size
    )
    test_dataloader = torch.utils.data.DataLoader(
        test_dataset, batch_size=args.vis_batch_size
    )

    gen_net = Generator(
        num_res_blocks=args.gen_res_blocks, upscale_factor=args.scale_factor
    ).to(device)

    gen_path = args.from_pretrained_gen
    if gen_path:
        gen_net.load_state_dict(torch.load(gen_path))

    test_iter = iter(test_dataloader)

    for i, (hr_test, lr_test, simple_sr_test) in zip(range(args.iterations), test_iter):
        gan_sr_test = gen_net(lr_test.to(device)).cpu()
        stacked = torch.cat([hr_test, simple_sr_test, gan_sr_test])
        vutils.save_image(
            stacked, f"{compare_dir}/{i}.png", normalize=True, nrow=args.vis_batch_size
        )
Esempio n. 10
0
import cv2
import numpy as np
import torch
from networks.generator import Generator
import utils.utils as util

cfg = util.load_yaml('../Configs/Train/config_sr.yml')
model_path = sys.argv[1]
device = torch.device('cuda')
# device = torch.device('cpu')

input_dir = '/content/drive/MyDrive/MajorProject/results_r2b/*'
output_dir = '/content/drive/MyDrive/MajorProject/results_sr/'
util.mkdir(output_dir)

model = Generator(cfg['network_G'])
model.load_state_dict(torch.load(model_path), strict=False)
model.eval()
for k, v in model.named_parameters():
    v.requires_grad = False
model = model.to(device)

print('Model path {:s}. \nTesting...'.format(model_path))

idx = 0
for path in glob.glob(input_dir):
    idx += 1
    base = os.path.splitext(os.path.basename(path))[0]
    print(idx, base)
    # read image
    img = cv2.imread(path, cv2.IMREAD_COLOR)
Esempio n. 11
0
    def __init__(self, vp_value_count, output_shape, name='Full Network'):
        """
        Initializes the Full Network.
        :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape.
                              Legal values: (bsz, 3, 8, 112, 112) and (bsz, 3, 16, 112, 112)
        :param name: (str, optional) The name of the network (default 'Full Network').
        Raises:
            ValueError: if 'vp_value_count' is not a legal value count
            ValueError: if 'output_shape' does not contain a legal number of frames.
        """
        if vp_value_count not in self.VALID_VP_VALUE_COUNTS:
            raise ValueError('Invalid number of vp values: %d' %
                             vp_value_count)
        if output_shape[2] not in self.VALID_FRAME_COUNTS:
            raise ValueError('Invalid number of frames in desired output: %d' %
                             output_shape[2])

        super(FullNetwork, self).__init__()

        self.net_name = name
        self.vp_value_count = vp_value_count
        self.output_shape = output_shape
        self.out_frames = output_shape[2]
        self.rep_channels = 256
        self.rep_frames = 4
        self.rep_size = 14

        self.vgg = vgg16(pretrained=True, weights_path=vgg_weights_path)
        self.i3d = InceptionI3d(final_endpoint='Mixed_5c',
                                in_frames=self.out_frames,
                                pretrained=True,
                                weights_path=i3d_weights_path)

        self.exp = Expander(vp_value_count=self.vp_value_count)

        # convs to make all appearance encoding have same number of channels, so they can be used in the same convLSTM
        self.app_conv128 = nn.Conv2d(in_channels=128,
                                     out_channels=256,
                                     kernel_size=(3, 3),
                                     stride=(1, 1),
                                     padding=(1, 1))
        self.app_conv256a = nn.Conv2d(in_channels=256,
                                      out_channels=256,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=(1, 1))
        self.app_conv256b = nn.Conv2d(in_channels=512,
                                      out_channels=256,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=(1, 1))
        self.app_convs = [
            self.app_conv128, self.app_conv256a, self.app_conv256b
        ]

        # convs for the initial hidden and current states of the convLSTM
        self.hconv = nn.Conv2d(in_channels=256,
                               out_channels=128,
                               kernel_size=(3, 3),
                               stride=(1, 1),
                               padding=(1, 1))
        self.cconv = nn.Conv2d(in_channels=256,
                               out_channels=128,
                               kernel_size=(3, 3),
                               stride=(1, 1),
                               padding=(1, 1))

        # convs to make all motion features have the same number of channels, so they can be used in the same Trans Net
        self.rep_conv64 = nn.Conv3d(in_channels=64,
                                    out_channels=256,
                                    kernel_size=(3, 3, 3),
                                    stride=(1, 1, 1),
                                    padding=(1, 1, 1))
        self.rep_conv192 = nn.Conv3d(in_channels=192,
                                     out_channels=256,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_conv256 = nn.Conv3d(in_channels=256,
                                     out_channels=256,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_convs = {
            64: self.rep_conv64,
            192: self.rep_conv192,
            256: self.rep_conv256
        }

        self.trans = Transformer(in_channels=256 + self.vp_value_count,
                                 out_channels=128)

        self.conv_lstm = ConvLSTM(input_dim=128,
                                  hidden_dim=[128],
                                  kernel_size=(3, 3),
                                  num_layers=1,
                                  batch_first=True,
                                  bias=False,
                                  return_all_layers=False)

        self.gen = Generator(in_channels=[128], out_frames=self.out_frames)
Esempio n. 12
0
    def __init__(self, vp_value_count, output_shape, name='Full Network'):
        """
        Initializes the Full Network.
        :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape.
                              Legal values: (bsz, 3, 8, 112, 112) and (bsz, 3, 16, 112, 112)
        :param name: (str, optional) The name of the network (default 'Full Network').
        Raises:
            ValueError: if 'vp_value_count' is not a legal value count
            ValueError: if 'output_shape' does not contain a legal number of frames.
        """
        if vp_value_count not in self.VALID_VP_VALUE_COUNTS:
            raise ValueError('Invalid number of vp values: %d' %
                             vp_value_count)
        if output_shape[2] not in self.VALID_FRAME_COUNTS:
            raise ValueError('Invalid number of frames in desired output: %d' %
                             output_shape[2])

        super(FullNetwork, self).__init__()

        # params
        self.net_name = name
        self.vp_value_count = vp_value_count
        self.output_shape = output_shape
        self.out_frames = output_shape[2]
        self.rep_feat = 128
        self.app_feat = 256

        # networks
        self.vgg = vgg16(pretrained=True, weights_path=vgg_weights_path)
        self.i3d = InceptionI3d(final_endpoint='Mixed_5c',
                                in_frames=self.out_frames,
                                pretrained=True,
                                weights_path=i3d_weights_path)
        self.exp = Expander(vp_value_count=self.vp_value_count)
        self.trans = Transformer(in_channels=self.rep_feat +
                                 self.vp_value_count,
                                 out_channels=self.rep_feat)
        self.gen = Generator(in_channels=[self.app_feat, self.rep_feat],
                             out_frames=self.out_frames)

        self.conv_lstms = {
            56:
            ConvLSTM(input_dim=self.rep_feat,
                     hidden_dim=[self.app_feat],
                     kernel_size=(3, 3),
                     num_layers=1,
                     in_shape=(56, 56),
                     batch_first=True,
                     bias=False,
                     return_all_layers=False),
            28:
            ConvLSTM(input_dim=self.rep_feat,
                     hidden_dim=[self.app_feat],
                     kernel_size=(3, 3),
                     num_layers=1,
                     in_shape=(28, 28),
                     batch_first=True,
                     bias=False,
                     return_all_layers=False),
            14:
            ConvLSTM(input_dim=self.rep_feat,
                     hidden_dim=[self.app_feat],
                     kernel_size=(3, 3),
                     num_layers=1,
                     in_shape=(14, 14),
                     batch_first=True,
                     bias=False,
                     return_all_layers=False)
        }

        # convs
        self.app_conv128 = nn.Conv2d(in_channels=128,
                                     out_channels=self.app_feat,
                                     kernel_size=(3, 3),
                                     stride=(1, 1),
                                     padding=(1, 1))
        self.app_conv256 = nn.Conv2d(in_channels=256,
                                     out_channels=self.app_feat,
                                     kernel_size=(3, 3),
                                     stride=(1, 1),
                                     padding=(1, 1))
        self.app_conv512 = nn.Conv2d(in_channels=512,
                                     out_channels=self.app_feat,
                                     kernel_size=(3, 3),
                                     stride=(1, 1),
                                     padding=(1, 1))
        self.app_convs = {
            128: self.app_conv128,
            256: self.app_conv256,
            512: self.app_conv512
        }

        self.hconv = nn.Conv2d(in_channels=self.app_feat,
                               out_channels=128,
                               kernel_size=(3, 3),
                               stride=(1, 1),
                               padding=(1, 1))
        self.cconv = nn.Conv2d(in_channels=self.app_feat,
                               out_channels=128,
                               kernel_size=(3, 3),
                               stride=(1, 1),
                               padding=(1, 1))

        self.rep_conv64 = nn.Conv3d(in_channels=64,
                                    out_channels=self.rep_feat,
                                    kernel_size=(3, 3, 3),
                                    stride=(1, 1, 1),
                                    padding=(1, 1, 1))
        self.rep_conv192 = nn.Conv3d(in_channels=192,
                                     out_channels=self.rep_feat,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_conv256 = nn.Conv3d(in_channels=256,
                                     out_channels=self.rep_feat,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_convs = {
            64: self.rep_conv64,
            192: self.rep_conv192,
            256: self.rep_conv256
        }
Esempio n. 13
0
optimizer_generator = tf.keras.optimizers.Adam(learning_rate=lr,
                                               beta_1=0.5,
                                               beta_2=0.999)
optimizer_keypoint_detector = tf.keras.optimizers.Adam(learning_rate=lr,
                                                       beta_1=0.5,
                                                       beta_2=0.999)
optimizer_discriminator = tf.keras.optimizers.Adam(learning_rate=lr,
                                                   beta_1=0.5,
                                                   beta_2=0.999)

batch_size = 20
epochs = 150
train_steps = 99  # change

keypoint_detector = KeypointDetector()
generator = Generator()
discriminator = Discriminator()

generator_full = FullGenerator(keypoint_detector, generator, discriminator)
discriminator_full = FullDiscriminator(discriminator)


@tf.function
def train_step(source_images, driving_images):
    with tf.GradientTape(persistent=True) as tape:
        losses_generator, generated = generator_full(source_images,
                                                     driving_images, tape)
        generator_loss = tf.math.reduce_sum(list(losses_generator.values()))

    generator_gradients = tape.gradient(generator_loss,
                                        generator_full.trainable_variables)
Esempio n. 14
0
    return checkpoint, new_model_path


def generate(G, file_name, tags):
    '''
  Generate fake image.
  :param G:
  :param file_name:
  :param tags:
  :return: img's tensor and file path.
  '''
    # g_noise = Variable(torch.FloatTensor(1, 128)).to(device).data.normal_(.0, 1)
    # g_tag = Variable(torch.FloatTensor([utils.get_one_hot(tags)])).to(device)
    g_noise, g_tag = utils.fake_generator(1, 128, device)

    img = G(torch.cat([g_noise, g_tag], dim=1))
    vutils.save_image(img.data.view(1, 3, 128, 128),
                      os.path.join(tmp_path, '{}.png'.format(file_name)))
    print('Saved file in {}'.format(
        os.path.join(tmp_path, '{}.png'.format(file_name))))
    return img.data.view(1, 3, 128,
                         128), os.path.join(tmp_path,
                                            '{}.png'.format(file_name))


if __name__ == '__main__':
    G = Generator().to(device)
    checkpoint, _ = load_checkpoint(model_dump_path)
    G.load_state_dict(checkpoint['G'])
    generate(G, 'test', ['white hair'])
Esempio n. 15
0
    freeze_layer = 21
    for i in range(freeze_layer):
        model.layers[i].trainable = False

    if True:
        # BATCH_SIZE不要太小,不然训练效果很差
        BATCH_SIZE = 8
        # 学习率,粗略地训练
        Lr = 5e-4
        # 为起始世代
        Init_Epoch = 0
        # 为冻结训练的世代
        Freeze_Epoch = 50

        # 数据生成器
        gen = Generator(bbox_util, BATCH_SIZE, lines[:num_train], lines[num_train:],
                    (input_shape[0], input_shape[1]), NUM_CLASSES)

        # 模型的装配
        model.compile(optimizer=Adam(lr=Lr), loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=3.0).compute_loss)

        # 0-50个epoches时,冻结对网络前21层的训练
        model.fit(gen.generate(True),
                steps_per_epoch=num_train//BATCH_SIZE,
                validation_data=gen.generate(False),
                validation_steps=num_val//BATCH_SIZE,
                epochs=Freeze_Epoch,
                initial_epoch=Init_Epoch,
                callbacks=[logging, checkpoint, reduce_lr, early_stopping])

    # 解冻前21层参数
    for i in range(freeze_layer):
Esempio n. 16
0
os.makedirs(SAMPLE_DIR, exist_ok=True)

# Setup CUDA
cudnn.benchmark = True
if args.USE_CUDA:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

random.seed(args.SEED)
torch.manual_seed(args.SEED)
if args.USE_CUDA:
    torch.cuda.manual_seed_all(args.SEED)

# Initialize Generator
generator = Generator(args.GAN_TYPE, args.ZDIM, args.NUM_CLASSES)
generator.apply(weights_init)
generator.to(device)
print(generator)

# Initialize Discriminator
discriminator = Discriminator(args.GAN_TYPE, args.NUM_CLASSES)
discriminator.apply(weights_init)
discriminator.to(device)
print(discriminator)

# Initialize loss function and optimizer
criterionLabel = nn.BCELoss()
criterionClass = nn.CrossEntropyLoss()
optimizerD = Adam(discriminator.parameters(), lr=args.LR, betas=(0.5, 0.999))
optimizerG = Adam(generator.parameters(), lr=args.LR, betas=(0.5, 0.999))
Esempio n. 17
0
    def __init__(self,
                 vp_value_count,
                 stdev,
                 output_shape,
                 pretrained=False,
                 vgg_weights_path='',
                 i3d_weights_path='',
                 name='Full Network'):
        """
        Initializes the Full Network.
        :param vp_value_count: (int) The number of values that identify the viewpoint.
        :param output_shape: (5-tuple) The desired output shape for generated videos. Must match video input shape.
                              Legal values: (bsz, 3, 8/16, 112, 112) and (bsz, 3, 16, 112, 112)
        :param name: (str, optional) The name of the network (default 'Full Network').
        Raises:
            ValueError: if 'vp_value_count' is not a legal value count
            ValueError: if 'output_shape' does not contain a legal number of frames.
        """
        if vp_value_count not in self.VALID_VP_VALUE_COUNTS:
            raise ValueError('Invalid number of vp values: %d' %
                             vp_value_count)
        if output_shape[2] not in self.VALID_FRAME_COUNTS:
            raise ValueError('Invalid number of frames in desired output: %d' %
                             output_shape[2])

        super(FullNetwork, self).__init__()

        self.net_name = name
        self.vp_value_count = vp_value_count
        self.stdev = stdev
        self.output_shape = output_shape
        self.out_frames = output_shape[2]

        # specs of various features
        self.app_feat = 128
        self.rep_feat = 128
        self.rep_frames = 4
        self.rep_size = 14
        self.nkp = 32

        self.vgg = vgg16(pretrained=pretrained, weights_path=vgg_weights_path)
        self.i3d = InceptionI3d(final_endpoint='Mixed_5c',
                                in_frames=self.out_frames,
                                pretrained=pretrained,
                                weights_path=i3d_weights_path)

        self.exp = Expander(vp_value_count=self.vp_value_count)

        # convs to make all appearance encodings have same number of channels, so they can be used in the same convGRU
        self.app_conv128 = nn.Conv2d(in_channels=128,
                                     out_channels=self.app_feat,
                                     kernel_size=(3, 3),
                                     stride=(1, 1),
                                     padding=(1, 1))
        self.app_conv256a = nn.Conv2d(in_channels=256,
                                      out_channels=self.app_feat,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=(1, 1))
        self.app_conv256b = nn.Conv2d(in_channels=256,
                                      out_channels=self.app_feat,
                                      kernel_size=(3, 3),
                                      stride=(1, 1),
                                      padding=(1, 1))
        self.app_convs = [
            nn.Sequential(self.app_conv128, nn.ReLU(inplace=True)),
            nn.Sequential(self.app_conv256a, nn.ReLU(inplace=True)),
            nn.Sequential(self.app_conv256b, nn.ReLU(inplace=True))
        ]

        # convs to make all motion features have the same number of channels, so they can be used in the same trans net
        self.rep_conv64 = nn.Conv3d(in_channels=64,
                                    out_channels=self.rep_feat,
                                    kernel_size=(3, 3, 3),
                                    stride=(1, 1, 1),
                                    padding=(1, 1, 1))
        self.rep_conv192 = nn.Conv3d(in_channels=192,
                                     out_channels=self.rep_feat,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_conv256 = nn.Conv3d(in_channels=256,
                                     out_channels=self.rep_feat,
                                     kernel_size=(3, 3, 3),
                                     stride=(1, 1, 1),
                                     padding=(1, 1, 1))
        self.rep_convs = [
            nn.Sequential(self.rep_conv64, nn.ReLU(inplace=True)),
            nn.Sequential(self.rep_conv192, nn.ReLU(inplace=True)),
            nn.Sequential(self.rep_conv256, nn.ReLU(inplace=True))
        ]

        self.trans = Transformer(in_channels=self.rep_feat +
                                 self.vp_value_count,
                                 out_channels=self.rep_feat)

        self.kpp = KPPredictor(in_channels=self.rep_feat,
                               nkp=self.nkp,
                               stdev=self.stdev)

        self.vpp = VPPredictor(in_channels=256)

        self.gru = ConvGRU(input_dim=self.rep_feat,
                           hidden_dim=[self.app_feat],
                           kernel_size=(7, 7),
                           num_layers=1,
                           batch_first=True,
                           bias=False,
                           return_all_layers=False)

        self.gen = Generator(in_channels=[self.app_feat, self.nkp],
                             out_frames=self.out_frames)
Esempio n. 18
0
    run_name = 'correlation-GAN_{}'.format(config.version)
    wandb.init(name=run_name,
               dir=config.checkpoint_dir,
               notes=config.description)
    wandb.config.update(config.__dict__)

    device = torch.device('cuda')

    use_dropout = [True, True, False]
    drop_prob = [0.5, 0.5, 0.5]
    use_ac_func = [True, True, False]
    activation = 'relu'
    latent_dim = 10

    gen_fc_layers = [latent_dim, 16, 32, 2]
    generator = Generator(gen_fc_layers, use_dropout, drop_prob, use_ac_func,
                          activation).to(device)

    disc_fc_layers = [2, 32, 16, 1]
    discriminator = Discriminator(disc_fc_layers, use_dropout, drop_prob,
                                  use_ac_func, activation).to(device)

    wandb.watch([generator, discriminator])

    g_optimizer = Adam(generator.parameters(), lr=1e-4, betas=(0.5, 0.9))
    d_optimizer = Adam(discriminator.parameters(), lr=1e-4, betas=(0.5, 0.9))

    wgan_gp = WGAN_GP(config, generator, discriminator, g_optimizer,
                      d_optimizer, latent_shape)
    wgan_gp.train(dataloader, 200)