def __init__(self, batch_size=16, D_lr=1e-3, G_lr=1e-3, r_dim=3072, z_dim=100, h_size=512, use_gpu=False): self.device = torch.device("cuda" if use_gpu else "cpu") self.batch_size = batch_size self.z_dim = z_dim self.D = Discriminator(r_dim, h_size).to(device=self.device) self.G = Generator(z_dim, r_dim, h_size).to(device=self.device) self.criterion = nn.BCELoss(size_average=True) self.D_optimizer = optim.Adam(self.D.parameters(), lr=D_lr) self.G_optimizer = optim.Adam(self.G.parameters(), lr=G_lr) root = '/home/lhq/PycharmProjects/gan.pytorch/datasets/data/test/' text = '/home/lhq/PycharmProjects/gan.pytorch/datasets/data/labels.txt' dataset = ListDatasets(root=root, fname_list=text) self.dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) self.label_real = torch.ones(batch_size, 1, device=self.device) self.label_fake = torch.zeros(batch_size, 1, device=self.device)
def init_model(config, data_generator): print('Initializing %s embedding model in %s mode...' % (config.model, config.mode)) npz = np.load(config.embedding_file) if config.load_embeddings else None if config.model == 'transe': em = EmbeddingModel.TransE(config, embeddings_dict=npz) elif config.model == 'transd': config.embedding_size = config.embedding_size / 2 em = EmbeddingModel.TransD(config, embeddings_dict=npz) elif config.model == 'distmult': em = EmbeddingModel.DistMult(config, embeddings_dict=npz) else: raise ValueError('Unrecognized model type: %s' % config.model) if config.mode == 'disc': model = Discriminator.BaseModel(config, em, data_generator) elif config.mode == 'gen': model = Generator.Generator(config, em, data_generator) else: raise ValueError('Unrecognized mode: %s' % config.mode) if npz: # noinspection PyUnresolvedReferences npz.close() model.build() print('Built model.') print('use semnet: %s' % model.use_semantic_network) return model
def __init__(self): """ Constructor for Forest. """ print('\n', 'In the vast, deep forest of Hyrule...', '\n', 'Long have I served as the guardian spirit...', '\n', 'I am known as the Deku Tree...', '\n\n', 'Creating Deku Tree...', '\n', sep='') # If names like 'Treebeard' and 'The Giving Tree' are desired, set fun_names # to True. Change to False for trees just to have numbers for names. fun_names = True self.names = (load(open('names.txt', 'rb')) if fun_names else [str(i) for i in range(500)]) # Create one tree, the Great Deku Tree. He has a saved generator located # in the saveddekus folder. Here we are loading a generator that has # trained for 100 epochs. self.trees = [ Tree(location=(0, 0), forest=self, generator=Generator(g=load_model('./saveddekus/DEKU100.h5')), name=self.names.pop(0)) ] self.connections = {self.trees[0]: []} self.GANs = {} print("Forest generated!")
def main(): tf.random.set_seed(22) np.random.seed(22) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' assert tf.__version__.startswith('2.') # hyper parameters z_dim = 100 epochs = 3000000 batch_size = 512 learning_rate = 0.002 is_training = True img_path = glob.glob( r'C:\Users\Jackie Loong\Downloads\DCGAN-LSGAN-WGAN-GP-DRAGAN-Tensorflow-2-master\data\faces\*.jpg' ) dataset, img_shape, _ = make_anime_dataset(img_path, batch_size) print(dataset, img_shape) sample = next(iter(dataset)) print(sample.shape, tf.reduce_max(sample).numpy(), tf.reduce_min(sample).numpy()) dataset = dataset.repeat() db_iter = iter(dataset) generator = Generator() generator.build(input_shape=(None, z_dim)) discriminator = Discriminator() discriminator.build(input_shape=(None, 64, 64, 3)) g_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) d_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) for epoch in range(epochs): batch_z = tf.random.uniform([batch_size, z_dim], minval=-1., maxval=1.) batch_x = next(db_iter) # train D with tf.GradientTape() as tape: d_loss = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training) grads = tape.gradient(d_loss, discriminator.trainable_variables) d_optimizer.apply_gradients( zip(grads, discriminator.trainable_variables)) with tf.GradientTape() as tape: g_loss = g_loss_fn(generator, discriminator, batch_z, is_training) grads = tape.gradient(g_loss, generator.trainable_variables) g_optimizer.apply_gradients(zip(grads, generator.trainable_variables)) if epoch % 100 == 0: print(epoch, 'd-loss:', float(d_loss), 'g-loss:', float(g_loss)) z = tf.random.uniform([100, z_dim]) fake_image = generator(z, training=False) img_path = os.path.join('images', 'gan-%d.png' % epoch) save_result(fake_image.numpy(), 10, img_path, color_mode='P')
def main(): tf.random.set_seed(233) np.random.seed(233) assert tf.__version__.startswith('2.') # hyper parameters z_dim = 100 epochs = 3000000 batch_size = 512 learning_rate = 0.0005 is_training = True img_path = glob.glob(r'C:\Users\Jackie\Downloads\faces\*.jpg') assert len(img_path) > 0 dataset, img_shape, _ = make_anime_dataset(img_path, batch_size) print(dataset, img_shape) sample = next(iter(dataset)) print(sample.shape, tf.reduce_max(sample).numpy(), tf.reduce_min(sample).numpy()) dataset = dataset.repeat() db_iter = iter(dataset) generator = Generator() generator.build(input_shape=(None, z_dim)) discriminator = Discriminator() discriminator.build(input_shape=(None, 64, 64, 3)) z_sample = tf.random.normal([100, z_dim]) g_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) d_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) for epoch in range(epochs): for _ in range(5): batch_z = tf.random.normal([batch_size, z_dim]) batch_x = next(db_iter) # train D with tf.GradientTape() as tape: d_loss, gp = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training) grads = tape.gradient(d_loss, discriminator.trainable_variables) d_optimizer.apply_gradients(zip(grads, discriminator.trainable_variables)) batch_z = tf.random.normal([batch_size, z_dim]) with tf.GradientTape() as tape: g_loss = g_loss_fn(generator, discriminator, batch_z, is_training) grads = tape.gradient(g_loss, generator.trainable_variables) g_optimizer.apply_gradients(zip(grads, generator.trainable_variables)) if epoch % 100 == 0: print(epoch, 'd-loss:', float(d_loss), 'g-loss:', float(g_loss), 'gp:', float(gp)) z = tf.random.normal([100, z_dim]) fake_image = generator(z, training=False) img_path = os.path.join('images', 'wgan-%d.png' % epoch) save_result(fake_image.numpy(), 10, img_path, color_mode='P')
def __init__(self, z_dim, h_dim, learning_rate, scale, generator_output_layer): self.z_dim = z_dim self.h_dim = h_dim self.g_net = Generator(z_dim, h_dim, generator_output_layer) self.d_net = Discriminator(h_dim) self.training = tf.placeholder(tf.bool, []) self.with_text = tf.placeholder(tf.float32, [None]) self.x = tf.placeholder(tf.float32, [None, 64, 64, 3]) self.x_w_ = tf.placeholder(tf.float32, [None, 64, 64, 3]) self.z = tf.placeholder(tf.float32, [None, self.z_dim]) # true h self.h = tf.placeholder(tf.float32, [None, h_dim]) # false h self.h_ = tf.placeholder(tf.float32, [None, h_dim]) # false image self.x_ = self.g_net(self.z, self.h, self.training) # true image, true h self.d = self.d_net(self.x, self.h, self.training, reuse=False) # fake image, true h self.d_ = self.d_net(self.x_, self.h, self.training) # wrong image, true h self.d_w_ = self.d_net(self.x_w_, self.h, self.training) # true image, false h self.d_h_ = self.d_net(self.x, self.h_, self.training) self.g_loss = - tf.reduce_mean(self.d_) #+ tf.reduce_mean(tf.square(self.x - self.x_)) self.d_loss = tf.reduce_mean(self.d) \ - ( 1 * tf.reduce_mean(self.d_) + 1 * tf.reduce_mean(self.d_h_) + 1 * tf.reduce_mean(self.d_w_)) / (1 + 1 + 1) # penalty distribution for "improved wgan" epsilon = tf.random_uniform([], 0.0, 1.0) x_hat = epsilon * self.x + (1 - epsilon) * self.x_ d_hat = self.d_net(x_hat, self.h, self.training) dx = tf.gradients(d_hat, x_hat)[0] dx_norm = tf.sqrt(tf.reduce_sum(tf.square(dx), axis=[1,2,3])) ddx = scale * tf.reduce_mean(tf.square(dx_norm - 1.0)) self.d_loss = -(self.d_loss - ddx) self.d_opt, self.g_opt = None, None with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): self.d_opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5, beta2=0.9)\ .minimize(self.d_loss, var_list=self.d_net.vars) self.g_opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5, beta2=0.9)\ .minimize(self.g_loss, var_list=self.g_net.vars)
def train(): tf.random.set_seed(22) np.random.seed(22) data_iter = dataset.load_dataset() # 利用数组形式实现多输入模型 generator = Generator() generator.build(input_shape=[(None, z_dim), (None, 10)]) discriminator = Discriminator() discriminator.build(input_shape=[(None, 28, 28, 1), (None, 10)]) g_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) d_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) for epoch in range(epochs): for i in range(int(60000 / batch_size / epochs_d)): batch_z = tf.random.uniform([batch_size, z_dim], minval=0., maxval=1.) batch_c = [] for k in range(batch_size): batch_c.append(np.random.randint(0, 10)) batch_c = tf.one_hot(tf.convert_to_tensor(batch_c), 10) # train D for epoch_d in range(epochs_d): batch_data = next(data_iter) batch_x = batch_data[0] batch_y = batch_data[1] with tf.GradientTape() as tape: d_loss = d_loss_fn(generator, discriminator, batch_z, batch_c, batch_x, batch_y, is_training) grads = tape.gradient(d_loss, discriminator.trainable_variables) d_optimizer.apply_gradients( zip(grads, discriminator.trainable_variables)) # train G with tf.GradientTape() as tape: g_loss = g_loss_fn(generator, discriminator, batch_z, batch_c, is_training) grads = tape.gradient(g_loss, generator.trainable_variables) g_optimizer.apply_gradients( zip(grads, generator.trainable_variables)) print('epoch : {epoch} d-loss : {d_loss} g-loss : {g_loss}'.format( epoch=epoch, d_loss=d_loss, g_loss=g_loss)) z = tf.random.uniform([100, z_dim], minval=0., maxval=1.) c = [] for i in range(10): for j in range(10): c.append(i) c = tf.one_hot(tf.convert_to_tensor(c), 10) fake_image = generator([z, c], training=False) img_path = os.path.join('images', 'infogan-%d-final.png' % epoch) saver.save_image(fake_image.numpy(), img_path, 10)
def main(): # 设计随机种子,方便复现 tf.random.set_seed(22) np.random.seed(22) # 设定相关参数 z_dim = 100 epochs = 3000000 batch_size = 512 # 根据自己的GPU能力设计 learning_rate = 0.002 is_training = True # 加载数据(根据自己的路径更改),建立网络 img_path = glob.glob( r'C:\Users\Jackie Loong\Downloads\DCGAN-LSGAN-WGAN-GP-DRAGAN-Tensorflow-2-master\data\faces\*.jpg' ) dataset, img_shape, _ = make_anime_dataset(img_path, batch_size) # print(dataset, img_shape) # sample = next(iter(dataset)) dataset = dataset.repeat() db_iter = iter(dataset) generator = Generator() generator.build(input_shape=(None, z_dim)) discriminator = Discriminator() discriminator.build(input_shape=(None, 64, 64, 3)) # 建立优化器 g_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) d_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) for epoch in range(epochs): # 随机取样出来的结果 batch_z = tf.random.uniform([batch_size, z_dim], minval=-1., maxval=1.) batch_x = next(db_iter) # 训练检测网络 with tf.GradientTape() as tape: d_loss, gp = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training) grads = tape.gradient(d_loss, discriminator.trainable_variables) d_optimizer.apply_gradients( zip(grads, discriminator.trainable_variables)) # 训练生成网络 with tf.GradientTape() as tape: g_loss = g_loss_fn(generator, discriminator, batch_z, is_training) grads = tape.gradient(g_loss, generator.trainable_variables) g_optimizer.apply_gradients(zip(grads, generator.trainable_variables)) if epoch % 100 == 0: print(epoch, 'd-loss:', float(d_loss), 'g-loss:', float(g_loss), 'gp:', float(gp)) z = tf.random.uniform([100, z_dim]) fake_image = generator(z, training=False) # 生成的图片保存,images文件夹下, 图片名为:wgan-epoch.png img_path = os.path.join('images', 'wgan-%d.png' % epoch) # 10*10, 彩色图片 save_result(fake_image.numpy(), 10, img_path, color_mode='P')
def sample(): args, device = get_config() generator = Generator() generator.to(device) generator.load_state_dict(torch.load(args.model_path)) generator.eval() z = rand_sampler(1, device) g = generator(z) save_image(g[0], '../samples/sample.png')
def __init__(self, z_dim, h_dim, learning_rate, scale, generator_output_layer): self.z_dim = z_dim self.h_dim = h_dim self.g_net = Generator(z_dim, h_dim, generator_output_layer) self.d_net = Discriminator(h_dim) self.training = tf.placeholder(tf.bool, []) self.with_text = tf.placeholder(tf.float32, [None]) self.x = tf.placeholder(tf.float32, [None, 64, 64, 3]) self.x_w_ = tf.placeholder(tf.float32, [None, 64, 64, 3]) self.z = tf.placeholder(tf.float32, [None, self.z_dim]) # true h self.h = tf.placeholder(tf.float32, [None, h_dim]) # false h self.h_ = tf.placeholder(tf.float32, [None, h_dim]) # false image self.x_ = self.g_net(self.z, self.h, self.training) # true image, true h self.d = self.d_net(self.x, self.h, self.training, reuse=False) # fake image, true h self.d_ = self.d_net(self.x_, self.h, self.training) # wrong image, true h self.d_w_ = self.d_net(self.x_w_, self.h, self.training) # true image, false h self.d_h_ = self.d_net(self.x, self.h_, self.training) # self.g_loss = - tf.reduce_mean(self.d_) #+ tf.reduce_mean(tf.square(self.x - self.x_)) # self.d_loss = tf.reduce_mean(self.d) \ # - ( 1 * tf.reduce_mean(self.d_) + 1 * tf.reduce_mean(self.d_h_) + 1 * tf.reduce_mean(self.d_w_)) / (1 + 1 + 1) self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_, labels=tf.ones_like(self.d_))) self.d_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d, labels=tf.ones_like(self.d))) \ + (tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_, labels=tf.zeros_like(self.d_))) + \ tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_w_, labels=tf.zeros_like(self.d_w_))) +\ tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_h_, labels=tf.zeros_like(self.d_h_))) ) / 3 self.d_opt, self.g_opt = None, None with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): self.d_opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5, beta2=0.9)\ .minimize(self.d_loss, var_list=self.d_net.vars) self.g_opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5, beta2=0.9)\ .minimize(self.g_loss, var_list=self.g_net.vars)
def __init__(self, location, forest, parent=None, generator=Generator(), name='unnamed'): """ Constructor for a Tree. """ self.generator = generator self.discriminator = Discriminator() self.nn = load_model('new_nn.h5') self.age = 1 self.location = location self.parent = parent self.forest = forest self.neighbors = ([parent] if parent else []) self.name = name
def load_model(filename): checkpoint = torch.load(filename, map_location=device) global generator global discriminator generator = Generator(len(data['mapping']), data['max_length'], latent_size) generator.load_state_dict(checkpoint['generator_model_state_dict']) generator.eval() discriminator = Discriminator(len(data['mapping'])) discriminator.load_state_dict(checkpoint['discriminator_model_state_dict']) discriminator.eval() print('Loaded model')
def main(): ''' This is a sample implementation of one generator training with three discriminators ''' # Setting up the gan network system gan1 = GAN(generator=Generator(), discriminator=Discriminator(), nn=load_model("new_nn.h5")) gan2 = GAN(generator=gan1.G, discriminator=Discriminator(), nn=load_model("new_nn.h5")) gan3 = GAN(generator=gan1.G, discriminator=Discriminator(), nn=load_model("new_nn.h5")) # Set the number of training iterations for the network training_period = 5 # Train the system for i in range(training_period): gan1.train(testid=i) gan2.train(testid=i) gan3.train(testid=i)
def generate_image(threshold=1.0): model = Generator() model.load_state_dict( torch.load(SAVED_GENERATOR + '_state_dict', map_location='cpu')) noise = torch.randn(1, Z_SIZE) with torch.no_grad(): generated_model = model(noise).squeeze().numpy() fig = plt.figure(figsize=(10, 10)) ax = fig.gca(projection='3d') ax.voxels(generated_model >= threshold, facecolor='blue', edgecolor='k') output = io.BytesIO() FigureCanvas(fig).print_png(output) return Response(output.getvalue(), mimetype='image/png')
def main(args): vocoder = Generator(80) vocoder = vocoder.cuda() ckpt = torch.load(args.load_dir) vocoder.load_state_dict(ckpt['G']) testset = glob.glob(os.path.join(args.test_dir, '*.wav')) for i, test_path in enumerate(tqdm(testset)): mel, spectrogram = process_audio(test_path) g_audio = vocoder(mel.cuda()) g_audio = g_audio.squeeze().cpu() audio = (g_audio.detach().numpy() * 32768) g_spec = librosa.stft(y=audio, n_fft=1024, hop_length=256, win_length=1024) scipy.io.wavfile.write( Path(args.save_dir) / ('generated-%d.wav' % i), 22050, audio.astype('int16')) plot_stft(spectrogram, g_spec, i)
def build_model(self): """Create a generator and a discriminator.""" self.G = Generator(self.g_conv_dim, self.c_dim, self.g_repeat_num) self.D = Discriminator(self.image_size, self.d_conv_dim, self.c_dim, self.d_repeat_num) self.g_optimizer = torch.optim.Adam(self.G.parameters(), self.g_lr, [self.beta1, self.beta2]) self.d_optimizer = torch.optim.Adam(self.D.parameters(), self.d_lr, [self.beta1, self.beta2]) self.print_network(self.G, 'G') self.print_network(self.D, 'D') self.G.to(self.device) self.D.to(self.device) """Build the feature extractor""" self.feature_model = f_model(model_path=DUMPED_MODEL, freeze_param=True).cuda() #.cuda() self.feature_model.eval()
import argparse import os import torch from torch import nn from torchvision import utils from gan import Generator from helper import get_truncated_noise # Just a simple script to create a video of shifting images if __name__ == "__main__": gen = nn.DataParallel(Generator().to('cuda')) save = torch.load('./chk-116000.pth') gen.load_state_dict(save["gen"]) steps = save["step"] alpha = save["alpha"] z = get_truncated_noise(60, 512, 0.7) noise = [] for i in range(8): size = 4 * 2 ** i noise.append(torch.randn(1, 1, size, size, device='cuda')) e = 0 for i in range(59): start = z[i].unsqueeze(0)
def main(): tf.random.set_seed(3333) np.random.seed(3333) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' assert tf.__version__.startswith('2.') z_dim = 100 # 隐藏向量z的长度 epochs = 3000000 # 训练步数 batch_size = 64 learning_rate = 0.0002 is_training = True # 获取数据集路径 img_path = glob.glob(r'C:\Users\jay_n\.keras\datasets\faces\*.jpg') + \ glob.glob(r'C:\Users\jay_n\.keras\datasets\faces\*.png') print('images num:', len(img_path)) # 构建数据集对象 dataset, img_shape, _ = make_anime_dataset(img_path, batch_size, resize=64) print(dataset, img_shape) sample = next(iter(dataset)) # 采样 print(sample.shape, tf.reduce_max(sample).numpy(), tf.reduce_min(sample).numpy()) dataset = dataset.repeat(100) db_iter = iter(dataset) generator = Generator() generator.build(input_shape=(4, z_dim)) discriminator = Discriminator() discriminator.build(input_shape=(4, 64, 64, 3)) # 分别为生成器和判别器创建优化器 g_optimizer = keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) d_optimizer = keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) # generator.load_weights('generator.ckpt') # discriminator.load_weights('discriminator.ckpt') # print('Loaded ckpt!!') d_losses, g_losses = [], [] for epoch in range(epochs): # 1. 训练判别器 for _ in range(1): # 采样隐藏向量 batch_z = tf.random.normal([batch_size, z_dim]) batch_x = next(db_iter) # 采样真实图片 # 判别器前向计算 with tf.GradientTape() as tape: d_loss, _ = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training) grads = tape.gradient(d_loss, discriminator.trainable_variables) d_optimizer.apply_gradients(zip(grads, discriminator.trainable_variables)) # 2. 训练生成器 # 采样隐藏向量 batch_z = tf.random.normal([batch_size, z_dim]) # 生成器前向计算 with tf.GradientTape() as tape: g_loss = g_loss_fn(generator, discriminator, batch_z, is_training) grads = tape.gradient(g_loss, generator.trainable_variables) g_optimizer.apply_gradients(zip(grads, generator.trainable_variables)) if epoch % 100 == 0: print(epoch, 'd-loss:', float(d_loss), 'g-loss:', float(g_loss)) # 可视化 z = tf.random.normal([100, z_dim]) fake_image = generator(z, training=False) img_path = os.path.join('gan_images', 'gan-%d.png' % epoch) save_result(fake_image.numpy(), 10, img_path, color_mode='P') d_losses.append(float(d_loss)) g_losses.append(float(g_loss)) if epoch % 10000 == 1: generator.save_weights('generator.ckpt') discriminator.save_weights('discriminator.ckpt')
style_size = 564 sample_batch = 4 z_size = 400 ######################################### def gen_rand_noise(batch_size, z_size, mean=0, std=0.001): z_sample = np.random.normal(mean, std, size=[batch_size, z_size]).astype(np.float32) z = torch.from_numpy(z_sample) return z ######################################### G = Generator().cuda() D = Discriminator().cuda() Vgg = Vgg16().cuda() bce = BCE_Loss() mse = torch.nn.MSELoss() optimizer_d = torch.optim.SGD(D.parameters(), lr=lr * 0.4) optimizer_g = torch.optim.Adam(G.parameters(), lr=lr, betas=(beta1, 0.9)) ######################################### ######################################### for epoch in range(max_epoch): for idx, (real_img, real_label, mask) in tqdm.tqdm(enumerate(trainloader)): # trainD make_trainable(D, True) make_trainable(G, False) D.zero_grad()
return np.random.normal(size=[Nbatch, m, dim]).astype(dtype=np.float32) # ------- TRAIN MODELS -------- num_epochs = config_d['epochs'] print_every = config_d['print_every'] z_size = config_d['noise_dim'] n_critic = config_d['critic_iter'] lr = config_d['lr'] beta1 = config_d['beta1'] beta2 = config_d['beta2'] D = Discriminator() G = Generator(z_size=z_size) print(D) print(G) if cuda: G.cuda() D.cuda() print('GPU available for training. Models moved to GPU') else: print('Training on CPU.') d_optimizer = optim.Adam(D.parameters(), lr=lr, betas=[beta1, beta2]) g_optimizer = optim.Adam(G.parameters(), lr=lr, betas=[beta1, beta2]) losses_train = [] losses_val = []
#Create and load the metric feature extractor network feature_extractor_network = models.resnet18(pretrained=False) modules = list( feature_extractor_network.children())[:-1] #Remove fully connected layer modules.append(nn.Flatten()) feature_extractor_network = nn.Sequential(*modules) feature_extractor_network = feature_extractor_network.to(device) feature_extractor_network.load_state_dict( torch.load(args.fe_model, map_location=device)) feature_extractor_network = feature_extractor_network.eval() #Setup networks generator_network = Generator(in_noise=args.noise_dem, in_features=args.feature_dem, ch=args.ch_multi, norm_type=args.norm_type).to(device) discriminator_network = Discriminator(channels=novel_images.shape[1], in_features=args.feature_dem, ch=args.ch_multi, norm_type=args.norm_type).to(device) #Setup optimizers d_optimizer = optim.Adam(discriminator_network.parameters(), lr=args.dlr, betas=(0.0, 0.999)) g_optimizer = optim.Adam(generator_network.parameters(), lr=args.glr, betas=(0.0, 0.999)) #Create the save directory if it does note exist
def train(args): mel_list = glob.glob(os.path.join(args.train_dir, '*.mel')) trainset = MelDataset(args.seq_len, mel_list, args.hop_length) train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=0, shuffle=True, drop_last=True) test_mel = glob.glob(os.path.join(args.valid_dir, '*.mel')) testset = [] for i in range(args.test_num): mel = torch.load(test_mel[i]) mel = mel[:, :args.test_len] mel = mel.unsqueeze(0) testset.append(mel) G = Generator(80) D = MultiScale() G = G.cuda() D = D.cuda() g_optimizer = optim.Adam(G.parameters(), lr=1e-4, betas=(0.5, 0.9)) d_optimizer = optim.Adam(D.parameters(), lr=1e-4, betas=(0.5, 0.9)) step, epochs = 0, 0 if args.load_dir is not None: print("Loading checkpoint") ckpt = torch.load(args.load_dir) G.load_state_dict(ckpt['G']) g_optimizer.load_state_dict(ckpt['g_optimizer']) D.load_state_dict(ckpt['D']) d_optimizer.load_state_dict(ckpt['d_optimizer']) step = ckpt['step'] epochs = ckpt['epoch'] print('Load Status: Epochs %d, Step %d' % (epochs, step)) torch.backends.cudnn.benchmark = True start = time.time() try: for epoch in itertools.count(epochs): for (mel, audio) in train_loader: mel = mel.cuda() audio = audio.cuda() # Discriminator d_real = D(audio) d_loss_real = 0 for scale in d_real: d_loss_real += F.relu(1 - scale[-1]).mean() fake_audio = G(mel) d_fake = D(fake_audio.cuda().detach()) d_loss_fake = 0 for scale in d_fake: d_loss_fake += F.relu(1 + scale[-1]).mean() d_loss = d_loss_real + d_loss_fake D.zero_grad() d_loss.backward() d_optimizer.step() # Generator d_fake = D(fake_audio.cuda()) g_loss = 0 for scale in d_fake: g_loss += -scale[-1].mean() # Feature Matching feature_loss = 0 # feat_weights = 4.0 / 5.0 # discriminator block size + 1 # D_weights = 1.0 / 3.0 # multi scale size # wt = D_weights * feat_weights # not in paper for i in range(3): for j in range(len(d_fake[i]) - 1): feature_loss += F.l1_loss(d_fake[i][j], d_real[i][j].detach()) g_loss += args.lambda_feat * feature_loss G.zero_grad() g_loss.backward() g_optimizer.step() step += 1 if step % args.log_interval == 0: print( 'Epoch: %-5d, Step: %-7d, D_loss: %.05f, G_loss: %.05f, ms/batch: %5.2f' % (epoch, step, d_loss, g_loss, 1000 * (time.time() - start) / args.log_interval)) start = time.time() if step % args.save_interval == 0: root = Path(args.save_dir) with torch.no_grad(): for i, mel_test in enumerate(testset): g_audio = G(mel_test.cuda()) g_audio = g_audio.squeeze().cpu() audio = (g_audio.numpy() * 32768) scipy.io.wavfile.write( root / ('generated-%d-%dk-%d.wav' % (epoch, step // 1000, i)), 22050, audio.astype('int16')) print("Saving checkpoint") torch.save( { 'G': G.state_dict(), 'g_optimizer': g_optimizer.state_dict(), 'D': D.state_dict(), 'd_optimizer': d_optimizer.state_dict(), 'step': step, 'epoch': epoch, }, root / ('ckpt-%dk.pt' % (step // 1000))) except Exception as e: traceback.print_exc()
def main(): tf.random.set_seed(22) np.random.seed(22) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' assert tf.__version__.startswith('2.') # hyper parameters z_dim = 100 epochs = 3000000 batch_size = 128 learning_rate = 0.0002 is_training = True # for validation purpose assets_dir = './images' if not os.path.isdir(assets_dir): os.makedirs(assets_dir) val_block_size = 10 val_size = val_block_size * val_block_size # load mnist data (x_train, _), (x_test, _) = keras.datasets.mnist.load_data() x_train = x_train.astype(np.float32) / 255. db = tf.data.Dataset.from_tensor_slices(x_train).shuffle( batch_size * 4).batch(batch_size).repeat() db_iter = iter(db) inputs_shape = [-1, 28, 28, 1] # create generator & discriminator generator = Generator() generator.build(input_shape=(batch_size, z_dim)) generator.summary() discriminator = Discriminator() discriminator.build(input_shape=(batch_size, 28, 28, 1)) discriminator.summary() # prepare optimizer d_optimizer = keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) g_optimizer = keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) for epoch in range(epochs): # no need labels batch_x = next(db_iter) # rescale images to -1 ~ 1 batch_x = tf.reshape(batch_x, shape=inputs_shape) # -1 - 1 batch_x = batch_x * 2.0 - 1.0 # Sample random noise for G batch_z = tf.random.uniform(shape=[batch_size, z_dim], minval=-1., maxval=1.) with tf.GradientTape() as tape: d_loss = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training) grads = tape.gradient(d_loss, discriminator.trainable_variables) d_optimizer.apply_gradients( zip(grads, discriminator.trainable_variables)) with tf.GradientTape() as tape: g_loss = g_loss_fn(generator, discriminator, batch_z, is_training) grads = tape.gradient(g_loss, generator.trainable_variables) g_optimizer.apply_gradients(zip(grads, generator.trainable_variables)) if epoch % 100 == 0: print(epoch, 'd loss:', float(d_loss), 'g loss:', float(g_loss)) # validation results at every epoch val_z = np.random.uniform(-1, 1, size=(val_size, z_dim)) fake_image = generator(val_z, training=False) image_fn = os.path.join('images', 'gan-val-{:03d}.png'.format(epoch + 1)) save_result(fake_image.numpy(), val_block_size, image_fn, color_mode='L')
def main(): parser = argparse.ArgumentParser(description='DCGAN for mnist') parser.add_argument('--batchsize', '-b', type=int, default=1, help='# of each mini-batch size') parser.add_argument('--epoch', '-e', type=int, default=500, help='# of epoch') parser.add_argument( '--gpu', '-g', type=int, default=-1, help='GPU ID (if you want to use gpu, set positive value)') parser.add_argument('--dataset', '-d', type=str, default='', help='path of training dataset path.') parser.add_argument('--out_dir', '-o', type=str, default='result', help='path of output the result.') parser.add_argument('--n_hidden', '-n', type=int, default=100, help='# of hidden unit(z)') args = parser.parse_args() logger = set_logger() logger.debug('=' * 10) logger.debug('GPU: {}'.format(args.gpu)) logger.debug('#batchsize: {}'.format(args.batchsize)) logger.debug('#epoch: {}'.format(args.epoch)) logger.debug('n_hidden: {}'.format(args.n_hidden)) logger.debug('dataset: {}'.format(args.dataset)) logger.debug('out_dir: {}'.format(args.out_dir)) logger.debug('=' * 10) print() logger.debug('setup models') # Setup networks generator = Generator(z_dim=args.n_hidden) discriminator = Discriminator(z_dim=args.n_hidden) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() generator.to_gpu() discriminator.to_gpu() # Setup optimizers def make_optimizer(model, alpha=0.0002, beta1=0.5): optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0001), 'hook_dec') return optimizer opt_generator = make_optimizer(generator) opt_discriminator = make_optimizer(discriminator) if args.dataset == '': train, _ = chainer.datasets.get_mnist(withlabel=False, ndim=3, scale=255.) else: pass # Setup an iterator train_iter = chainer.iterators.SerialIterator(train, args.batchsize) # Setup an Updater updater = GANUpdater(models=(generator, discriminator), iterator=train_iter, optimizer={ 'gen': opt_generator, 'dis': opt_discriminator }, device=args.gpu) # Setup a trainer trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out_dir) snapshot_interval = (1000, 'iteration') display_interval = (100, 'iteration') trainer.extend( extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( generator, 'generator_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( discriminator, 'discriminator_iter_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=snapshot_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'gen/loss', 'dis/loss', ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) # Start train logger.debug('Training Start.') print() print() trainer.run()
def main(): tf.random.set_seed(233) np.random.seed(233) z_dim = 100 epochs = 3000000 batch_size = 512 learning_rate = 2e-4 # ratios = D steps:G steps ratios = 2 img_path = glob.glob(os.path.join('faces', '*.jpg')) dataset, img_shape, _ = make_anime_dataset(img_path, batch_size) print(dataset, img_shape) sample = next(iter(dataset)) print(sample.shape, tf.reduce_max(sample).numpy(), tf.reduce_min(sample).numpy()) dataset = dataset.repeat() db_iter = iter(dataset) generator = Generator() generator.build(input_shape=(None, z_dim)) # generator.load_weights(os.path.join('checkpoints', 'generator-5000')) discriminator = Discriminator() discriminator.build(input_shape=(None, 64, 64, 3)) # discriminator.load_weights(os.path.join('checkpoints', 'discriminator-5000')) g_optimizer = tf.optimizers.Adam(learning_rate, beta_1=0.5) d_optimizer = tf.optimizers.Adam(learning_rate, beta_1=0.5) # a fixed noise for sampling z_sample = tf.random.normal([100, z_dim]) g_loss_meter = keras.metrics.Mean() d_loss_meter = keras.metrics.Mean() gp_meter = keras.metrics.Mean() for epoch in range(epochs): # train D for step in range(ratios): batch_z = tf.random.normal([batch_size, z_dim]) batch_x = next(db_iter) with tf.GradientTape() as tape: d_loss, gp = d_loss_fn(generator, discriminator, batch_z, batch_x) d_loss_meter.update_state(d_loss) gp_meter.update_state(gp) gradients = tape.gradient(d_loss, discriminator.trainable_variables) d_optimizer.apply_gradients( zip(gradients, discriminator.trainable_variables)) # train G batch_z = tf.random.normal([batch_size, z_dim]) with tf.GradientTape() as tape: g_loss = g_loss_fn(generator, discriminator, batch_z) g_loss_meter.update_state(g_loss) gradients = tape.gradient(g_loss, generator.trainable_variables) g_optimizer.apply_gradients( zip(gradients, generator.trainable_variables)) if epoch % 100 == 0: fake_image = generator(z_sample, training=False) print(epoch, 'd-loss:', d_loss_meter.result().numpy(), 'g-loss', g_loss_meter.result().numpy(), 'gp', gp_meter.result().numpy()) d_loss_meter.reset_states() g_loss_meter.reset_states() gp_meter.reset_states() # save generated image samples img_path = os.path.join('images_wgan_gp', 'wgan_gp-%d.png' % epoch) save_result(fake_image.numpy(), 10, img_path, color_mode='P') if epoch + 1 % 2000 == 0: generator.save_weights( os.path.join('checkpoints_gp', 'generator-%d' % epoch)) discriminator.save_weights( os.path.join('checkpoints_gp', 'discriminator-%d' % epoch))
def main(): parser = argparse.ArgumentParser( description='Train the individual Transformer model') parser.add_argument('--dataset_folder', type=str, default='datasets') parser.add_argument('--dataset_name', type=str, default='zara1') parser.add_argument('--obs', type=int, default=12) # size of history steps in frames parser.add_argument('--preds', type=int, default=8) # size of predicted trajectory in frames parser.add_argument('--point_dim', type=int, default=3) # number of dimensions (x,y,z) is 3 parser.add_argument('--emb_size', type=int, default=512) parser.add_argument('--heads', type=int, default=8) parser.add_argument('--layers', type=int, default=6) parser.add_argument('--dropout', type=float, default=0.1) parser.add_argument('--cpu', action='store_true') parser.add_argument('--val_size', type=int, default=0) parser.add_argument('--verbose', action='store_true') parser.add_argument('--max_epoch', type=int, default=1500) parser.add_argument('--batch_size', type=int, default=70) parser.add_argument('--validation_epoch_start', type=int, default=30) parser.add_argument('--resume_train', action='store_true') parser.add_argument('--delim', type=str, default='\t') parser.add_argument('--name', type=str, default="zara1") parser.add_argument('--factor', type=float, default=1.) parser.add_argument('--save_step', type=int, default=1) parser.add_argument('--warmup', type=int, default=2) parser.add_argument('--evaluate', type=bool, default=True) parser.add_argument('--gen_pth', type=str) parser.add_argument('--crit_pth', type=str) parser.add_argument('--visual_step', type=int, default=10) parser.add_argument('--grad_penality', type=float, default=10) parser.add_argument('--crit_repeats', type=int, default=5) parser.add_argument('--lambda_recon', type=float, default=0.1) parser.add_argument('--z_dim', type=int, default=3) parser.add_argument('--stop_recon', type=int, default=2) args = parser.parse_args() model_name = args.name def mkdir(path): try: os.mkdir(path) except: pass paths = [ 'models', 'models/gen', 'models/crit', 'models/gan', f'models/gen/{args.name}', f'models/crit/{args.name}', f'models/gan/{args.name}', 'output', 'output/gan', f'output/gan/{args.name}' ] for path in paths: mkdir(path) log = SummaryWriter('logs/gan_%s' % model_name) device = torch.device("cuda") if args.cpu or not torch.cuda.is_available(): device = torch.device("cpu") args.verbose = True ## creation of the dataloaders for train and validation if args.val_size == 0: train_dataset, _ = baselineUtils.create_dataset(args.dataset_folder, args.dataset_name, 0, args.obs, args.preds, delim=args.delim, train=True, verbose=args.verbose) val_dataset, _ = baselineUtils.create_dataset(args.dataset_folder, args.dataset_name, 0, args.obs, args.preds, delim=args.delim, train=False, verbose=args.verbose) else: train_dataset, val_dataset = baselineUtils.create_dataset( args.dataset_folder, args.dataset_name, args.val_size, args.obs, args.preds, delim=args.delim, train=True, verbose=args.verbose) test_dataset, _ = baselineUtils.create_dataset(args.dataset_folder, args.dataset_name, 0, args.obs, args.preds, delim=args.delim, train=False, eval=True, verbose=args.verbose) # import individual_TF # model=individual_TF.IndividualTF(3, 4, 4, N=args.layers, # d_model=args.emb_size, d_ff=2048, h=args.heads, dropout=args.dropout,mean=[0,0],std=[0,0]).to(device) tr_dl = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=0) val_dl = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=0) test_dl = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0) #optim = SGD(list(a.parameters())+list(model.parameters())+list(generator.parameters()),lr=0.01) #sched=torch.optim.lr_scheduler.StepLR(optim,0.0005) # optim = NoamOpt(args.emb_size, args.factor, len(tr_dl)*args.warmup, # torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) #optim=Adagrad(list(a.parameters())+list(model.parameters())+list(generator.parameters()),lr=0.01,lr_decay=0.001) epoch = 0 #mean=train_dataset[:]['src'][:,1:,2:4].mean((0,1)) mean = torch.cat((train_dataset[:]['src'][:, 1:, -3:], train_dataset[:]['trg'][:, :, -3:]), 1).mean((0, 1)) #std=train_dataset[:]['src'][:,1:,2:4].std((0,1)) std = torch.cat((train_dataset[:]['src'][:, 1:, -3:], train_dataset[:]['trg'][:, :, -3:]), 1).std((0, 1)) means = [] stds = [] for i in np.unique(train_dataset[:]['dataset']): ind = train_dataset[:]['dataset'] == i means.append( torch.cat((train_dataset[:]['src'][ind, 1:, -3:], train_dataset[:]['trg'][ind, :, -3:]), 1).mean((0, 1))) stds.append( torch.cat((train_dataset[:]['src'][ind, 1:, -3:], train_dataset[:]['trg'][ind, :, -3:]), 1).std((0, 1))) mean = torch.stack(means).mean(0) std = torch.stack(stds).mean(0) scipy.io.savemat(f'models/gan/{args.name}/norm.mat', { 'mean': mean.cpu().numpy(), 'std': std.cpu().numpy() }) from gan import Generator, Critic, get_gradient, gradient_penalty, get_crit_loss, get_gen_loss from tqdm import tqdm c_lambda = args.grad_penality crit_repeats = args.crit_repeats gen = Generator(args.obs - 1, args.preds, args.point_dim, args.point_dim, args.point_dim, z_dim=args.z_dim, N=args.layers, d_model=args.emb_size, d_ff=2048, h=args.heads, dropout=args.dropout, device=device).to(device) gen_opt = torch.optim.Adam(gen.parameters()) # gen_opt = NoamOpt(args.emb_size, args.factor, len(tr_dl)*args.warmup, # torch.optim.Adam(gen.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) crit = Critic(args.point_dim, args.obs - 1 + args.preds, N=args.layers, d_model=args.emb_size, d_ff=2048, h=args.heads, dropout=args.dropout, device=device).to(device) crit_opt = torch.optim.Adam(crit.parameters()) # crit_opt = NoamOpt(args.emb_size, args.factor, len(tr_dl)*args.warmup, # torch.optim.Adam(crit.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) if args.resume_train: gen.load_state_dict( torch.load(f'models/gen/{args.name}/{args.gen_pth}')) crit.load_state_dict( torch.load(f'models/crit/{args.name}/{args.crit_pth}')) cur_step = -1 for epoch in range(args.max_epoch): gen.train() crit.train() for id_b, batch in enumerate(tqdm(tr_dl, desc=f"Epoch {epoch}")): cur_step += 1 src = (batch['src'][:, 1:, -3:].to(device) - mean.to(device)) / std.to(device) tgt = (batch['trg'][:, :, -3:].to(device) - mean.to(device)) / std.to(device) batch_size = src.shape[0] mean_iteration_critic_loss = 0 for _ in range(crit_repeats): ### Update critic ### crit_opt.zero_grad() fake_noise = gen.sample_noise(batch_size) fake = gen(src, fake_noise) fake_seq = torch.cat((src, fake.detach()), dim=1) real_seq = torch.cat((src, tgt), dim=1) crit_fake_pred = crit(fake_seq) crit_real_pred = crit(real_seq) crit_loss = get_crit_loss( crit, src, tgt, fake.detach(), crit_fake_pred, crit_real_pred, c_lambda, args.lambda_recon if epoch < args.stop_recon else 0.) mean_iteration_critic_loss += crit_loss.item() / crit_repeats crit_loss.backward(retain_graph=True) crit_opt.step() log.add_scalar('Loss/train/crit', mean_iteration_critic_loss, cur_step) ### Update generator ### gen_opt.zero_grad() fake_noise_2 = gen.sample_noise(batch_size) fake_2 = gen(src, fake_noise_2) fake_2_seq = torch.cat((src, fake_2), dim=1) crit_fake_pred = crit(fake_2_seq) gen_loss = get_gen_loss( crit_fake_pred, fake_2, tgt, args.lambda_recon if epoch < args.stop_recon else 0.) gen_loss.backward() gen_opt.step() log.add_scalar('Loss/train/gen', gen_loss.item(), cur_step) if cur_step % args.visual_step == 0: scipy.io.savemat( f"output/gan/{args.name}/step_{cur_step:05}.mat", { 'input': batch['src'][:, 1:, :3].detach().cpu().numpy(), 'gt': batch['trg'][:, :, :3].detach().cpu().numpy(), 'pr': (fake_2 * std.to(device) + mean.to(device)).detach().cpu().numpy().cumsum(1) + batch['src'][:, -1:, :3].cpu().numpy() }) if epoch % args.save_step == 0: torch.save(gen.state_dict(), f'models/gen/{args.name}/{cur_step:05}.pth') torch.save(crit.state_dict(), f'models/crit/{args.name}/{cur_step:05}.pth')
def train(args): mel_list = glob.glob(os.path.join(args.train_dir, '*.mel')) trainset = MelDataset(args.seq_len, mel_list, args.hop_length) train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=0, shuffle=False, drop_last=True) test_mel = glob.glob(os.path.join(args.valid_dir, '*.mel')) print(args.valid_dir) print(type(test_mel)) print(test_mel[0]) testset = [] for i in range(args.test_num): mel = torch.load(test_mel[i]) mel = mel[:, :args.test_len] mel = mel.unsqueeze(0) testset.append(mel) #print(testset[0].shape) G = Generator(80) D = MultiScale() G = G.cuda() D = D.cuda() g_optimizer = optim.Adam(G.parameters(), lr=1e-4, betas=(0.5, 0.9)) d_optimizer = optim.Adam(D.parameters(), lr=1e-4, betas=(0.5, 0.9)) step, epochs = 0, 0 if args.load_dir is not None: print("Loading checkpoint") ckpt = torch.load(args.load_dir) G.load_state_dict(ckpt['G']) g_optimizer.load_state_dict(ckpt['g_optimizer']) D.load_state_dict(ckpt['D']) d_optimizer.load_state_dict(ckpt['d_optimizer']) step = ckpt['step'] epochs = ckpt['epoch'] print('Load Status: Epochs %d, Step %d' % (epochs, step)) torch.backends.cudnn.benchmark = True start = time.time() try: for epoch in itertools.count(1): for idx, (mel, audio) in enumerate(train_loader): mel = mel.cuda() coeffs = pywt.wavedec(audio, 'db1', level=3, mode='periodic') c1, c2, c3, c4 = coeffs temp = numpy.concatenate((c1[0][0], c2[0][0]), axis=0) temp = numpy.concatenate((temp, c3[0][0]), axis=0) temp = numpy.concatenate((temp, c4[0][0]), axis=0) arr2 = numpy.array([[temp]]) arr3 = torch.from_numpy(arr2).float() audio = arr3.cuda() # Discriminator d_real = D(audio) #print(type(d_real)) d_loss_real = 0 for scale in d_real: d_loss_real += F.relu(1 - scale[-1]).mean() fake_audio = G(mel) ############################# #print(len(d_fake[0])) -----> 출력값 : 7 #print(len(d_fake)) -----> 출력값 : 3 d_fake = D(fake_audio.cuda().detach()) d_loss_fake = 0 for scale in d_fake: d_loss_fake += F.relu(1 + scale[-1]).mean() d_loss = d_loss_real + d_loss_fake D.zero_grad() d_loss.backward() d_optimizer.step() # Generator d_fake = D(fake_audio.cuda()) g_loss = 0 for scale in d_fake: g_loss += -scale[-1].mean() #print(g_loss) # Feature Matching feature_loss = 0 # feat_weights = 4.0 / 5.0 # discriminator block size + 1 # D_weights = 1.0 / 3.0 # multi scale size # wt = D_weights * feat_weights # not in paper for i in range(1): for j in range(len(d_fake[i]) - 1): feature_loss += F.l1_loss(d_fake[i][j], d_real[i][j].detach()) g_loss += args.lambda_feat * feature_loss G.zero_grad() g_loss.backward() g_optimizer.step() step += 1 if step % args.log_interval != 0: print( 'Epoch: %-5d, Step: %-7d, D_loss: %.05f, G_loss: %.05f, ms/batch: %5.2f' % (epoch, step, d_loss, g_loss, 1000 * (time.time() - start) / args.log_interval)) start = time.time() ''' if step % args.save_interval == 0: root = Path(args.save_dir) with torch.no_grad(): for i, mel_test in enumerate(testset): g_audio = G(mel_test.cuda()) g_audio = g_audio.squeeze().cpu() audio = (g_audio.numpy() * 32768) scipy.io.wavfile.write(root / ('generated-%d-%dk-%d.wav' % (epoch, step // 1000, i)), 22050, audio.astype('int16')) ''' if step % 10 == 0: root = Path(args.save_dir) with torch.no_grad(): for i, mel_test in enumerate(testset): g_audio = G(mel_test.cuda()) g_audio = g_audio.squeeze().cpu().clone().numpy() std_ = int(g_audio.shape[0] / 8) coeffs_ = [ g_audio[0:std_], g_audio[std_:std_ * 2], g_audio[std_ * 2:std_ * 4], g_audio[std_ * 4:std_ * 8] ] y = pywt.waverec(coeffs_, 'db1', mode='periodic') # y = numpy.asarray(y,dtype=numpy.int16) y = numpy.int16(y / numpy.max(numpy.abs(y)) * 32767) wavfile.write( root / ('g211enerated-%d-%dk-%d.wav' % (epoch, step // 10, i)), 22050, y) print("Saving checkpoint") torch.save( { 'G': G.state_dict(), 'g_optimizer': g_optimizer.state_dict(), 'D': D.state_dict(), 'd_optimizer': d_optimizer.state_dict(), 'step': step, 'epoch': epoch, }, root / ('ckpt-%dk.pt' % (step // 1000))) except Exception as e: traceback.print_exc()
def train(config, checkpoint=None): # Load constants. c_lambda = int(config.get("gradient_lambda", 10)) noise_size = int(config.get("noise_length", 512)) device = config.get("device", "cuda") beta_1 = float(config.get("beta_1", 0.00)) beta_2 = float(config.get("beta_2", 0.99)) learning_rate = float(config.get("lr", 0.001)) critic_repeats = int(config.get("critic_repeats", 1)) use_r1_loss = str(config.get("use_r1", "True")) == "True" num_workers = int(config.get("dataloader_threads", 2)) display_step = int(config.get("display_step", 250)) checkpoint_step = int(config.get("checkpoint_step", 2000)) refresh_stat_step = int(config.get("refresh_stat_step", 5)) # The batch size in each image size progression. batch_progression = config.get("batch_progression").split(",") batch_progression = list(map(int, batch_progression)) # The number of epochs in each image size progression. epoch_progresson = config.get("epoch_progression").split(",") epoch_progresson = list(map(int, epoch_progresson)) # Percentage of each step that will be a fade in. fade_in_percentage = float(config.get("fade_percentage", 0.5)) transformation = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True), transforms.ConvertImageDtype(float), ]) # Path to dataset. data_path = config.get("data", None) if not os.path.exists(os.path.join(data_path, "prepared")): raise OSError("Did not detect prepared dataset!") # Initialize Generator gen = Generator().to(device) gen_opt = torch.optim.Adam( [ { "params": gen.to_w_noise.parameters(), "lr": (learning_rate * 0.01), }, { "params": gen.gen_blocks.parameters() }, { "params": gen.to_rgbs.parameters() }, ], lr=learning_rate, betas=(beta_1, beta_2), ) gen = nn.DataParallel(gen) gen.train() # Initialize Critic critic = Critic().to(device) critic_opt = torch.optim.Adam(critic.parameters(), lr=learning_rate, betas=(beta_1, beta_2)) critic = nn.DataParallel(critic) critic.train() # Create a constant set of noise vectors to show image progression. show_noise = get_truncated_noise(25, 512, 0.75).to(device) # Some other variables to track. iters = 0 c_loss_history = [] g_loss_history = [] if checkpoint is not None: save = torch.load(checkpoint) gen.load_state_dict(save["gen"]) critic.load_state_dict(save["critic"]) iters = save["iter"] im_count = save["im_count"] last_step = save["step"] last_epoch = save["epoch"] else: last_step = None last_epoch = None for index, step_epochs in enumerate(epoch_progresson): if last_step is not None and index + 1 < last_step: continue steps = int(index + 1) im_count = 0 images = datasets.ImageFolder( os.path.join(data_path, "prepared", f"set_{steps}"), transformation) dataset = torch.utils.data.DataLoader( images, batch_size=batch_progression[index], shuffle=True, num_workers=num_workers, ) fade_in = fade_in_percentage * step_epochs * len(dataset) print(f"STARTING STEP #{steps}") for epoch in range(step_epochs): if last_epoch is not None and epoch < last_epoch: continue else: last_epoch = None pbar = tqdm(dataset) for real_im, _ in pbar: cur_batch_size = len(real_im) set_requires_grad(critic, True) set_requires_grad(gen, False) for i in range(critic_repeats): z_noise = get_truncated_noise(cur_batch_size, noise_size, 0.75).to(device) alpha = im_count / fade_in if alpha > 1.0: alpha = None fake_im = gen(z_noise, steps=steps, alpha=alpha) real_im = (torch.nn.functional.interpolate( real_im, size=(fake_im.shape[2], fake_im.shape[3]), mode="bilinear", ).to(device, dtype=torch.float).requires_grad_()) critic_fake_pred = critic(fake_im.detach(), steps, alpha) critic_real_pred = critic(real_im, steps, alpha) critic.zero_grad() if use_r1_loss: c_loss = critic.module.get_r1_loss( critic_fake_pred, critic_real_pred, real_im, fake_im, steps, alpha, c_lambda, ) else: c_loss = critic.module.get_wgan_loss( critic_fake_pred, critic_real_pred, real_im, steps, alpha, c_lambda, ) critic_opt.step() im_count += cur_batch_size c_loss_history.append(c_loss.item()) set_requires_grad(critic, False) set_requires_grad(gen, True) noise = get_truncated_noise(cur_batch_size, noise_size, 0.75).to(device) alpha = im_count / fade_in if alpha > 1.0: alpha = None fake_images = gen(noise, steps=steps, alpha=alpha) critic_fake_pred = critic(fake_images, steps, alpha) if use_r1_loss: g_loss = gen.module.get_r1_loss(critic_fake_pred) else: g_loss = gen.module.get_wgan_loss(critic_fake_pred) gen.zero_grad() g_loss.backward() gen_opt.step() g_loss_history.append(g_loss.item()) iters += 1 if iters > 0 and iters % refresh_stat_step == 0: avg_c_loss = (sum(c_loss_history[-refresh_stat_step:]) / refresh_stat_step) avg_g_loss = (sum(g_loss_history[-refresh_stat_step:]) / refresh_stat_step) pbar.set_description( f"g_loss: {avg_g_loss:.3} c_loss: {avg_c_loss:.3} epoch: {epoch + 1}", refresh=True, ) with torch.no_grad(): examples = gen(show_noise, alpha=alpha, steps=steps) if iters > 0 and iters % display_step == 0: display_image( torch.clamp(examples, 0, 1), save_to_disk=True, filename="s-{}".format(iters), title="Iteration {}".format(iters), num_display=25, ) if iters > 0 and iters % checkpoint_step == 0: torch.save( { "gen": gen.state_dict(), "critic": critic.state_dict(), "iter": iters, "im_count": im_count, "step": steps, "epoch": epoch, "alpha": alpha, }, f"./checkpoints/chk-{iters}.pth", ) # TRAINING FINISHED - save final set of samples and save model. examples = gen(show_noise, alpha=alpha, steps=steps) torch.save( { "gen": gen.state_dict(), "critic": critic.state_dict(), "iter": iters, "im_count": im_count, "step": steps, "epoch": epoch, "alpha": None, }, "./checkpoints/FINAL.pth", ) print("TRAINING IS FINISHED - MODEL SAVED!")
parser.add_argument('--data', type=str) parser.add_argument('--data-args', type=int, nargs='+') parser.add_argument('--dbs', type=int, help='Discriminator Batch Size') parser.add_argument('--dlr', type=float, help='Discriminator Learning Rate') parser.add_argument('--glr', type=float, help='Generator Learning Rate') parser.add_argument('--gbs', type=int, help='Generator Batch Size') parser.add_argument('--gpu', type=int, help='GPU') parser.add_argument('--nf', type=int, nargs='+', help='Number of Features') parser.add_argument('--ni', type=int, help='Number of Iterations') args = parser.parse_args() device = th.device('cpu') if args.gpu < 0 else None # TODO data_loader = getattr(data, args.data + 'Loader')(*args.data_args, device=device) discriminator = Discriminator().to(device) g_configs = [{'in_feats' : in_feats, 'out_feats' : out_feats, 'out_nodes' : out_nodes, 'aggregator' : 'mean'} for in_feats, out_feats, out_nodes in zip([data_loader.n_feats] + args.nf[:-1], args.nf[1:])] generator = Generator().to(device) d_optim = optim.Adam(discirminator.parameters(), args.dlr) g_optim = optim.Adam(generator.parameters(), args.glr) for i in range(args.ni): for j in range(args.gbs): generator() x, adj = next(data_loader) p, cost = discriminator(x, adj) authentic = synthetic =
# 2. better normalization with Batch Normalization algorithm # 3. different learning rates (is there a better one?) # 4. change architecture to a CNN # Hyperparameters etc. device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') lr = 3e-4 z_dim = 64 # 128, 256 image_dim = 28 * 28 * 1 # 784 batch_size = 32 num_epochs = 200 disc_model_file = 'parameters/gan_mnist/discriminator.pth' gen_model_file = 'parameters/gan_mnist/generator.pth' disc = Discriminator(image_dim).to(device) gen = Generator(z_dim, image_dim).to(device) load_model(disc, disc_model_file, device) load_model(gen, gen_model_file, device) fixed_noise = torch.randn((batch_size, z_dim)).to(device) transforms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5), (0.5))]) dataset = datasets.MNIST(root='../datasets/', transform=transforms, download=True) loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) opt_disc = optim.Adam(disc.parameters(), lr=lr) opt_gen = optim.Adam(gen.parameters(), lr=lr) criterion = nn.BCELoss() writer_fake = SummaryWriter(f"runs/GAN_MNIST/fake")