import matplotlib.pyplot as plt import numpy as np from torch.nn import functional as F device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # This is a simple mixture model with two means that we are optimizing together with # with KL divergences # # #%% setting up parameters batch_size, dim = 200, 2 Enc = encoder(dim=dim, k=2, batch_size=batch_size) Dec = decoder(dim=dim, k=2, batch_size=batch_size) Disc = discriminator(dim=dim, k=2, batch_size=batch_size) losses_ = loss_functions() dataHandler=helpers.data_and_plotting(batch_size,encoder=Enc,decoder=Dec,discriminator=Disc,mixture=True,\ semi_circle=False) #%% mixture parameters # self.z = Variable(torch.FloatTensor(torch.rand(1, self.k)), requires_grad=True); mu_s = list([]) logvar_s = list([]) mu1 = Variable(torch.randn(1), requires_grad=True) logvar1 = Variable(torch.FloatTensor([0]), requires_grad=False) mu2 = Variable(torch.randn(1), requires_grad=True)
def __init__(self, opts, sess): logging.error('Building the Tensorflow Graph') self.sess = sess self.opts = opts self.data_shape = opts['datashape'] # Placeholders self.add_inputs_placeholders() self.add_training_placeholders() # Transformation ops # Encode the content of sample_points placeholder res = encoder(opts, inputs=self.sample_points, is_training=self.is_training) self.enc_mean, self.enc_sigmas = None, None self.encoded, _ = res # Decode the points encoded above (i.e. reconstruct) self.reconstructed, self.reconstructed_logits = \ decoder(opts, noise=self.encoded, is_training=self.is_training) # Decode the content of sample_noise self.decoded, self.decoded_logits = \ decoder(opts, reuse=True, noise=self.sample_noise, is_training=self.is_training) # Main network with tf.variable_scope("main"): self.auxi_weights_list = self.t_generator(self.encoded, self.t_keep_prob) main_prev = self.sample_points for i in range(1, len(opts['main_info']) - 1): if opts['auxi_info'][i - 1]: main_prev = ops.auxilinear(opts, main_prev, opts['main_info'][i], self.auxi_weights_list[i - 1], scope='main_%d' % i) main_prev = tf.nn.dropout(main_prev, self.main_keep_prob) main_prev = tf.nn.relu(main_prev) else: main_prev = ops.linear(opts, main_prev, opts['main_info'][i], scope='main_%d' % i) main_prev = tf.nn.dropout(main_prev, self.main_keep_prob) main_prev = tf.nn.relu(main_prev) if opts['auxi_info'][-1]: main_prev = ops.auxilinear(opts, main_prev, opts['main_info'][-1], self.auxi_weights_list[-1], scope='main_%d' % len(opts['auxi_info'])) else: main_prev = ops.linear(opts, main_prev, opts['main_info'][-1], scope='main_%d' % len(opts['auxi_info'])) self.prediction = main_prev # Objectives, losses, penalties self.former_placeholder() self.cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=self.sample_labels, logits=self.prediction)) self.correct_prediction = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(self.sample_labels, 1)) self.accuracy = tf.reduce_mean( tf.cast(self.correct_prediction, tf.float32)) self.penalty, self.loss_gan = self.matching_penalty() self.loss_reconstruct = self.reconstruction_loss( self.opts, self.sample_points, self.reconstructed) with tf.variable_scope("main"): self.trans_loss = self.transfer_loss() self.reg_loss = self.regularization_loss() self.f_loss = self.encoder_loss() self.wae_loss = self.rec_lambda * self.loss_reconstruct + \ self.wae_lambda * self.penalty + \ self.reg_lambda * self.reg_loss + \ self.f_lambda * self.f_loss self.main_loss = self.main_lambda * self.cross_entropy + \ self.trans_lambda * self.trans_loss # self.blurriness = self.compute_blurriness() if opts['e_pretrain']: self.loss_pretrain = self.pretrain_loss() else: self.loss_pretrain = None # self.add_least_gaussian2d_ops() # Optimizers, savers, initializer self.add_optimizers() self.add_savers() self.init = tf.global_variables_initializer()
print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) colorMapFile = './colormap.mat' colormap = loadmat(colorMapFile)['cmap'] colormap = torch.from_numpy(colormap).cuda() #################################### # Initialize Network encoder = models.encoder(isAddCostVolume=opt.isAddCostVolume) for param in encoder.parameters(): param.requires_grad = False encoder.load_state_dict( torch.load('{0}/encoder_{1}.pth'.format(opt.experiment, opt.nepoch - 1))) decoder = models.decoder(isAddVisualHull=opt.isAddVisualHull) for param in decoder.parameters(): param.requires_grad = False decoder.load_state_dict( torch.load('{0}/decoder_{1}.pth'.format(opt.experiment, opt.nepoch - 1))) normalFeature = models.normalFeature() for param in normalFeature.parameters(): param.requires_grad = False normalFeature.load_state_dict( torch.load('{0}/normalFeature_{1}.pth'.format(opt.experiment, opt.nepoch - 1))) normalPool = Variable( torch.ones([1, angleNum * angleNum, 1, 1, 1], dtype=torch.float32)) normalPool.requires_grad = False
for index for output: word=index2word[index] if word=='EOS': break elif word!='PAD': sentence_str+=word return sentence_str if __name__=='__main__': dataset=datasets.Cornell encoder=models.encoder(dataset.num_word,512,2,0.1) decoder=models.decoder(dataset.num_word,512,2,'dot',0.1) utils.load_model(encoder,os.path.join('./Model',str(config.MODEL)),'encoder.pth') utils.load_model(decoder,os.path.join('./Model',str(config.MODEL)),'decoder.pth') bot=GreedySearchBot(encoder,decoder) index2word=dataset.index2word word2index=dataset.word2index max_len=10 while(True): input_sentence=input('>>> ') if input_sentence=='q': break else: result=process(bot,input_sentence,word2index,index2word,max_len)
feature_s = model_f(data_s) output = model_c(feature_s) pred = output.max(1, keepdim=True)[1] for i in range(len(pred)): pred_y.append(pred[i].item()) correct += pred.eq(target_s.view_as(pred)).sum().item() count += len(target_s) return correct*1.0/count criterion_cel = nn.CrossEntropyLoss() ### change to your own model for your data outdim = 50 model_f = models.Net_f(outdim=outdim).cuda() model_c = models.Net_c_cway(outdim=outdim).cuda() model_de = models.decoder(outdim=outdim).cuda() optimizer_f = torch.optim.Adam(model_f.parameters(), 0.001) optimizer_c = torch.optim.Adam(model_c.parameters(), 0.001) optimizer_de = torch.optim.Adam(model_de.parameters(), 0.001) ### Please prepare the image folders in the correct format ### Please refer to https://pytorch.org/tutorials/beginner/data_loading_tutorial.html#afterword-torchvision image_batches = [] image_dirs = [] #change 'test' to any dir which contains your batches data_dir = os.getcwd()+'/test' for s in sorted(os.listdir(data_dir)): if s.startswith('batch'): image_dirs.append(data_dir+'/'+s)
def generate(args): # Load model nn.load_parameters(args.model_load_path) # Context extension_module = "cudnn" ctx = get_extension_context(extension_module, type_config=args.type_config) nn.set_default_context(ctx) # Input b, c, h, w = 1, 3, args.image_size, args.image_size x_real_a = nn.Variable([b, c, h, w]) x_real_b = nn.Variable([b, c, h, w]) one = nn.Variable.from_numpy_array(np.ones((1, 1, 1, 1)) * 0.5) # Model maps = args.maps # content/style (domain A) x_content_a = content_encoder(x_real_a, maps, name="content-encoder-a") x_style_a = style_encoder(x_real_a, maps, name="style-encoder-a") # content/style (domain B) x_content_b = content_encoder(x_real_b, maps, name="content-encoder-b") x_style_b = style_encoder(x_real_b, maps, name="style-encoder-b") # generate over domains and reconstruction of content and style (domain A) z_style_a = F.randn( shape=x_style_a.shape) if not args.example_guided else x_style_a z_style_a = z_style_a.apply(persistent=True) x_fake_a = decoder(x_content_b, z_style_a, name="decoder-a") # generate over domains and reconstruction of content and style (domain B) z_style_b = F.randn( shape=x_style_b.shape) if not args.example_guided else x_style_b z_style_b = z_style_b.apply(persistent=True) x_fake_b = decoder(x_content_a, z_style_b, name="decoder-b") # Monitor suffix = "Stochastic" if not args.example_guided else "Example-guided" monitor = Monitor(args.monitor_path) monitor_image_a = MonitorImage("Fake Image B to A {} Valid".format(suffix), monitor, interval=1) monitor_image_b = MonitorImage("Fake Image A to B {} Valid".format(suffix), monitor, interval=1) # DataIterator di_a = munit_data_iterator(args.img_path_a, args.batch_size) di_b = munit_data_iterator(args.img_path_b, args.batch_size) # Generate all # generate (A -> B) if args.example_guided: x_real_b.d = di_b.next()[0] for i in range(di_a.size): x_real_a.d = di_a.next()[0] images = [] images.append(x_real_a.d.copy()) for _ in range(args.num_repeats): x_fake_b.forward(clear_buffer=True) images.append(x_fake_b.d.copy()) monitor_image_b.add(i, np.concatenate(images, axis=3)) # generate (B -> A) if args.example_guided: x_real_a.d = di_a.next()[0] for i in range(di_b.size): x_real_b.d = di_b.next()[0] images = [] images.append(x_real_b.d.copy()) for _ in range(args.num_repeats): x_fake_a.forward(clear_buffer=True) images.append(x_fake_a.d.copy()) monitor_image_a.add(i, np.concatenate(images, axis=3))
def interpolate(args): # Load model nn.load_parameters(args.model_load_path) # Context extension_module = "cudnn" ctx = get_extension_context(extension_module, type_config=args.type_config) nn.set_default_context(ctx) # Input b, c, h, w = 1, 3, args.image_size, args.image_size x_real_a = nn.Variable([b, c, h, w]) x_real_b = nn.Variable([b, c, h, w]) one = nn.Variable.from_numpy_array(np.ones((1, 1, 1, 1)) * 0.5) # Model maps = args.maps # content/style (domain A) x_content_a = content_encoder(x_real_a, maps, name="content-encoder-a") x_style_a = style_encoder(x_real_a, maps, name="style-encoder-a") # content/style (domain B) x_content_b = content_encoder(x_real_b, maps, name="content-encoder-b") x_style_b = style_encoder(x_real_b, maps, name="style-encoder-b") # generate over domains and reconstruction of content and style (domain A) z_style_a = nn.Variable( x_style_a.shape) if not args.example_guided else x_style_a z_style_a = z_style_a.apply(persistent=True) x_fake_a = decoder(x_content_b, z_style_a, name="decoder-a") # generate over domains and reconstruction of content and style (domain B) z_style_b = nn.Variable( x_style_b.shape) if not args.example_guided else x_style_b z_style_b = z_style_b.apply(persistent=True) x_fake_b = decoder(x_content_a, z_style_b, name="decoder-b") # Monitor def file_names(path): return path.split("/")[-1].rstrip("_AB.jpg") suffix = "Stochastic" if not args.example_guided else "Example-guided" monitor = Monitor(args.monitor_path) monitor_image_tile_a = MonitorImageTile( "Fake Image Tile {} B to A {} Interpolation".format( "-".join([file_names(path) for path in args.img_files_b]), suffix), monitor, interval=1, num_images=len(args.img_files_b)) monitor_image_tile_b = MonitorImageTile( "Fake Image Tile {} A to B {} Interpolation".format( "-".join([file_names(path) for path in args.img_files_a]), suffix), monitor, interval=1, num_images=len(args.img_files_a)) # DataIterator di_a = munit_data_iterator(args.img_files_a, b, shuffle=False) di_b = munit_data_iterator(args.img_files_b, b, shuffle=False) rng = np.random.RandomState(args.seed) # Interpolate (A -> B) z_data_0 = [rng.randn(*z_style_a.shape) for j in range(di_a.size)] z_data_1 = [rng.randn(*z_style_a.shape) for j in range(di_a.size)] for i in range(args.num_repeats): r = 1.0 * i / args.num_repeats images = [] for j in range(di_a.size): x_data_a = di_a.next()[0] x_real_a.d = x_data_a z_style_b.d = z_data_0[j] * (1.0 - r) + z_data_1[j] * r x_fake_b.forward(clear_buffer=True) cmp_image = np.concatenate([x_data_a, x_fake_b.d.copy()], axis=3) images.append(cmp_image) images = np.concatenate(images) monitor_image_tile_b.add(i, images) # Interpolate (B -> A) z_data_0 = [rng.randn(*z_style_b.shape) for j in range(di_b.size)] z_data_1 = [rng.randn(*z_style_b.shape) for j in range(di_b.size)] for i in range(args.num_repeats): r = 1.0 * i / args.num_repeats images = [] for j in range(di_b.size): x_data_b = di_b.next()[0] x_real_b.d = x_data_b z_style_a.d = z_data_0[j] * (1.0 - r) + z_data_1[j] * r x_fake_a.forward(clear_buffer=True) cmp_image = np.concatenate([x_data_b, x_fake_a.d.copy()], axis=3) images.append(cmp_image) images = np.concatenate(images) monitor_image_tile_a.add(i, images)
def __init__(self, opts): logging.error('Building the Tensorflow Graph') self.sess = tf.Session() self.opts = opts # -- Some of the parameters for future use assert opts['dataset'] in datashapes, 'Unknown dataset.' self.data_shape = datashapes[opts['dataset']] # -- Placeholders self.add_model_placeholders() self.add_training_placeholders() sample_size = tf.shape(self.sample_points)[0] # -- Transformation ops # Encode the content of sample_points placeholder res = encoder(opts, inputs=self.sample_points, is_training=self.is_training) if opts['e_noise'] in ('deterministic', 'implicit', 'add_noise'): self.enc_mean, self.enc_sigmas = None, None if opts['e_noise'] == 'implicit': self.encoded, self.encoder_A = res else: self.encoded, _ = res elif opts['e_noise'] == 'gaussian': # Encoder outputs means and variances of Gaussian enc_mean, enc_sigmas = res[0] enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50) self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas if opts['verbose']: self.add_sigmas_debug() eps = tf.random_normal((sample_size, opts['zdim']), 0., 1., dtype=tf.float32) self.encoded = self.enc_mean + tf.multiply( eps, tf.sqrt(1e-8 + tf.exp(self.enc_sigmas))) # self.encoded = self.enc_mean + tf.multiply( # eps, tf.exp(self.enc_sigmas / 2.)) # Decode the points encoded above (i.e. reconstruct) self.reconstructed, self.reconstructed_logits = \ decoder(opts, noise=self.encoded, is_training=self.is_training) # Decode the content of sample_noise self.decoded, self.decoded_logits = \ decoder(opts, reuse=True, noise=self.sample_noise, is_training=self.is_training) # -- Objectives, losses, penalties self.penalty, self.loss_gan = self.matching_penalty() self.loss_reconstruct = self.reconstruction_loss() self.wae_objective = self.loss_reconstruct + \ self.wae_lambda * self.penalty self.blurriness = self.compute_blurriness() if opts['e_pretrain']: self.loss_pretrain = self.pretrain_loss() else: self.loss_pretrain = None self.add_least_gaussian2d_ops() # -- Optimizers, savers, etc self.add_optimizers() self.add_savers() self.init = tf.global_variables_initializer()
opt.seed = 0 print("Random Seed: ", opt.seed) random.seed(opt.seed) torch.manual_seed(opt.seed) if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) #################################### # Initialize Network encoder = nn.DataParallel(models.encoder(isAddCostVolume=opt.isAddCostVolume), device_ids=opt.deviceIds) decoder = nn.DataParallel(models.decoder(), device_ids=opt.deviceIds) normalFeature = nn.DataParallel(models.normalFeature(), device_ids=opt.deviceIds) normalPool = Variable( torch.ones([1, angleNum * angleNum, 1, 1, 1], dtype=torch.float32)) ############## ###################### # Send things into GPU if opt.cuda: encoder = encoder.cuda() decoder = decoder.cuda() normalFeature = normalFeature.cuda() normalPool = normalPool.cuda() #################################### # Other modules
args = parser.parse_args() transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )), ]) dataset = torchvision.datasets.MNIST(root=args.directory, train=True, transform=transform, download=args.download) data_loader = loader.DataLoader(dataset, batch_size=args.batch_size, shuffle=True) enc = models.encoder().to(device) dec = models.decoder().to(device) D_ = models.discriminator().to(device) op_enc = optim.Adam(enc.parameters(), lr=args.gen_lr) op_dec = optim.Adam(dec.parameters(), lr=args.gen_lr) op_gen = optim.Adam(enc.parameters(), lr=args.dis_lr) op_disc = optim.Adam(D_.parameters(), lr=args.dis_lr) warnings.filterwarnings("ignore") num_epochs = args.epochs recloss = [] dloss = [] gloss = [] TINY = 1e-8
def __init__(self, opts, tag): tf.reset_default_graph() gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.Session(config=config) self.opts = opts self.tag = tag assert opts['dataset'] in datashapes, 'Unknown dataset.' shape = datashapes[opts['dataset']] # Placeholders self.sample_points = tf.placeholder(tf.float32, [None] + shape, name='real_points_ph') self.labels = tf.placeholder(tf.int32, shape=[None], name='label_ph') self.sample_noise = tf.placeholder(tf.float32, [None] + [opts['zdim']], name='noise_ph') self.fixed_sample_labels = tf.placeholder(tf.int32, shape=[None], name='fixed_sample_label_ph') self.lr_decay = tf.placeholder(tf.float32, name='rate_decay_ph') self.is_training = tf.placeholder(tf.bool, name='is_training_ph') # Ops self.encoded = encoder(opts, inputs=self.sample_points, is_training=self.is_training) self.reconstructed, self.probs1 = decoder(opts, noise=self.encoded, is_training=self.is_training) self.prob1_softmaxed = tf.nn.softmax(self.probs1, axis=-1) self.correct_sum = tf.reduce_sum( tf.cast(tf.equal(tf.argmax(self.prob1_softmaxed, axis=1, output_type=tf.int32), self.labels), tf.float32)) self.decoded, self.probs2 = decoder(opts, reuse=True, noise=self.sample_noise, is_training=self.is_training) self.De_pro_tilde_logits, self.De_pro_tilde_wdistance = self.discriminate(self.reconstructed) self.D_pro_logits, self.D_pro_logits_wdistance = self.discriminate(self.sample_points) self.G_pro_logits, self.G_pro_logits_wdistance = self.discriminate(self.decoded) self.predict_as_real_mask = tf.equal(tf.argmax(self.G_pro_logits, axis=1, output_type=tf.int32), self.fixed_sample_labels) # Objectives, losses, penalties self.loss_cls = self.cls_loss(self.labels, self.probs1) self.penalty = self.mmd_penalty(self.encoded) self.loss_reconstruct = self.reconstruction_loss(self.opts, self.sample_points, self.reconstructed) self.wgan_d_loss = tf.reduce_mean(self.De_pro_tilde_wdistance) + tf.reduce_mean( self.G_pro_logits_wdistance) - 2 * tf.reduce_mean(self.D_pro_logits_wdistance) self.wgan_g_loss = -(tf.reduce_mean(self.De_pro_tilde_wdistance) + tf.reduce_mean(self.G_pro_logits_wdistance)) self.wgan_d_penalty1 = self.gradient_penalty(self.sample_points, self.reconstructed) self.wgan_d_penalty2 = self.gradient_penalty(self.sample_points, self.decoded) self.wgan_d_penalty = 0.5 * (self.wgan_d_penalty1 + self.wgan_d_penalty2) # G_additional loss self.G_fake_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.fixed_sample_labels, logits=self.G_pro_logits)) self.G_tilde_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=self.De_pro_tilde_logits)) # D loss self.D_fake_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.fixed_sample_labels, logits=self.G_pro_logits)) self.D_real_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=self.D_pro_logits)) self.D_tilde_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=self.De_pro_tilde_logits)) self.encoder_objective = self.loss_reconstruct + opts['lambda'] * self.penalty + self.loss_cls self.decoder_objective = self.loss_reconstruct + self.G_fake_loss + self.G_tilde_loss + self.wgan_g_loss self.disc_objective = self.D_real_loss + self.D_fake_loss + \ self.D_tilde_loss + self.wgan_d_loss + self.wgan_d_penalty self.total_loss = self.loss_reconstruct + opts['lambda'] * self.penalty + self.loss_cls self.loss_pretrain = self.pretrain_loss() if opts['e_pretrain'] else None # Optimizers, savers, etc opts = self.opts lr = opts['lr'] encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder') decoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') discriminator_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') optim = self.optimizer(lr, self.lr_decay) self.encoder_opt = optim.minimize(loss=self.encoder_objective, var_list=encoder_vars) self.decoder_opt = optim.minimize(loss=self.decoder_objective, var_list=decoder_vars) self.disc_opt = optim.minimize(loss=self.disc_objective, var_list=discriminator_vars) self.ae_opt = optim.minimize(loss=self.total_loss, var_list=encoder_vars + decoder_vars) self.pretrain_opt = self.optimizer(lr).minimize(loss=self.loss_pretrain, var_list=encoder_vars) if opts['e_pretrain'] else None self.saver = tf.train.Saver(max_to_keep=10) tf.add_to_collection('real_points_ph', self.sample_points) tf.add_to_collection('noise_ph', self.sample_noise) tf.add_to_collection('is_training_ph', self.is_training) tf.add_to_collection('encoder', self.encoded) tf.add_to_collection('decoder', self.decoded) self.init = tf.global_variables_initializer() self.result_logger = ResultLogger(tag, opts['work_dir'], verbose=True)
def train(config): """Training process. """ # create model instances: enc = encoder(config) ain = adain(enc.noutchannels) dec = decoder(config, enc) # Move enc to gpu if cuda is available if torch.cuda.is_available(): enc = enc.cuda() ain = ain.cuda() dec = dec.cuda() # set to train enc.eval() dec.train() # lr = config.learning_rate/(1 + config.learning_rate_decay * epoch) lr = config.learning_rate # Create optimizer optimizer = optim.Adam(dec.parameters(), lr=lr) # Create log directory if it does not exist if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) # Create summary writer tr_writer = SummaryWriter(log_dir=os.path.join(config.log_dir, "train")) # Initialize training iter_idx = -1 # make counter start at zero # loss_vec = [] # Training loop for epoch in range(config.num_epoch): # # For each iteration prefix = "Training Epoch {:3d}: ".format(epoch) for (content, style) in tqdm(trainloader(config), desc=prefix): # Counter iter_idx += 1 # Send data to GPU if we have one if torch.cuda.is_available(): content = content.cuda() style = style.cuda() # Apply the model to obtain features (forward pass) contentf = enc(content, multiple=False) stylef = enc(style, multiple=False) targetf = ain(contentf, stylef) g = dec(targetf) outf1, outf2, outf3, outf4 = enc(g, multiple=True) stylef1, stylef2, stylef3, stylef4 = enc(style, multiple=True) # Compute the loss loss_c = content_loss(outf4, targetf) loss_s = style_loss(outf1, stylef1) + style_loss( outf2, stylef2) + style_loss(outf3, stylef3) + style_loss( outf4, stylef4) loss = loss_c + config.styleWeight * loss_s # Compute gradients loss.backward() # Update parameters optimizer.step() # Zero the parameter gradients in the optimizer optimizer.zero_grad() # Monitor results every report interval if iter_idx % config.rep_intv == 0: # List to contain all losses and accuracies for all the training batches loss_c_test = [] loss_s_test = [] loss_test = [] # Set model for evaluation dec = dec.eval() for (content, style) in tqdm(testloader(config)): # Send data to GPU if we have one if torch.cuda.is_available(): content = content.cuda() style = style.cuda() # Apply forward pass to compute the losses for each of the test batches with torch.no_grad(): # Apply the model to obtain features (forward pass) contentf = enc(content, multiple=False) stylef = enc(style, multiple=False) targetf = ain(contentf, stylef) g = dec(targetf) outf1, outf2, outf3, outf4 = enc(g, multiple=True) stylef1, stylef2, stylef3, stylef4 = enc(style, multiple=True) # Compute the loss loss_c_temp = content_loss(outf4, targetf) loss_c_test += [loss_c_temp.cpu().numpy()] loss_s_temp = style_loss(outf1, stylef1) + style_loss( outf2, stylef2) + style_loss( outf3, stylef3) + style_loss(outf4, stylef4) loss_s_test += [loss_s_temp.cpu().numpy()] loss_temp = loss_c_temp + config.styleWeight * loss_s_temp loss_test += [loss_temp.cpu().numpy()] # Set model back for training dec = dec.train() # Take average loss_c_test = np.mean(loss_c_test) loss_s_test = np.mean(loss_s_test) loss_test = np.mean(loss_test) # Write loss to tensorboard, using keywords `loss` tr_writer.add_scalar("loss_content_test", loss_c_test, global_step=iter_idx) tr_writer.add_scalar("loss_style_test", loss_s_test, global_step=iter_idx) tr_writer.add_scalar("loss_test", loss_test, global_step=iter_idx) torch.save({"model": dec.state_dict()}, os.path.join(config.modelDir, "dec_model.pth"))
def test(config): """Test routine""" # create model instances: enc = encoder(config) ain = adain(enc.noutchannels) dec = decoder(config, enc) load_res = torch.load(os.path.join(config.modelDir, 'dec_model.pth'), map_location="cpu") dec.load_state_dict(load_res["model"]) # Move enc to gpu if cuda is available if torch.cuda.is_available(): enc = enc.cuda() ain = ain.cuda() dec = dec.cuda() # set to eval enc.eval() dec.eval() tt = transforms.ToTensor() tp = transforms.ToPILImage() fig = plt.figure() if not config.interpolate: content = Image.open(config.contentImage) assert (np.asarray(content).shape[2] == 3) content = tt(content) imgplot = plt.imshow(content.permute(1, 2, 0)) plt.show() content = content.reshape(1, *content.shape) style = Image.open(config.styleImage) assert (np.asarray(style).shape[2] == 3) style = tt(style) imgplot = plt.imshow(style.permute(1, 2, 0)) plt.show() style = style.reshape(1, *style.shape) if torch.cuda.is_available(): content = content.cuda() style = style.cuda() with torch.no_grad(): contentf = enc(content, multiple=False) stylef = enc(style, multiple=False) targetf = ain(contentf, stylef) g = dec((1 - config.alpha) * contentf + config.alpha * targetf).squeeze() imgplot = plt.imshow(g.permute(1, 2, 0)) plt.show() if config.interpolate: targetf = 0 content = Image.open(config.contentImage) assert (np.asarray(content).shape[2] == 3) content = tt(content) imgplot = plt.imshow(content.permute(1, 2, 0)) plt.show() content = content.reshape(1, *content.shape) weights = list( map(float, config.styleInterpWeights.strip('[]').split(','))) im_names = config.styleImage.split(',') for i, im_name in enumerate(im_names): style = Image.open(im_name) assert (np.asarray(style).shape[2] == 3) style = tt(style) imgplot = plt.imshow(style.permute(1, 2, 0)) plt.show() style = style.reshape(1, *style.shape) if torch.cuda.is_available(): content = content.cuda() style = style.cuda() with torch.no_grad(): contentf = enc(content, multiple=False) stylef = enc(style, multiple=False) targetf += weights[i] * ain(contentf, stylef) with torch.no_grad(): g = dec(targetf).squeeze() imgplot = plt.imshow(g.permute(1, 2, 0)) plt.show()
# This is the vanilla VAE import torch from torch import nn, optim from torch.nn import functional as F from torch.autograd import Variable import matplotlib.pyplot as plt import numpy as np from models import encoder, decoder, discriminator, loss_functions #%% get the required components batchSize = 100 encode = encoder(batch_size=batchZ) decode = decoder(batch_size=batchZ) losses = loss_functions() #%% optimizerEnc = optim.Adam(encoder.parameters(), lr=1e-4) optimizerDec = optim.Adam(encoder.parameters(), lr=1e-4) for i in range(50000): optimizerEnc.zero_grad() mu, logvar, z, x_hat = encoder(x) KL_loss = losses.KL_Gaussian(mu, logvar, torch.zeros(batchSize), torch.zeros(batchSize), batch_size=batchSize) mse_error = losses.reconstruction_error(x, x_hat) print(KL_loss.item(), mse_error.item()) mse_error.backward() optimizerEnc.step()
print(np.mean(prequential_acc)) ### our drift detection model if args.model == 'ours': task = 'm2u' batch_size = 64 outdim = 50 initial_batches = 50 label_lag = 3 drift_num = 0 #initialize models model_f = models.Net_f(task=task, outdim=outdim).cuda() #encoder model_c = models.Net_c_cway(task=task, outdim=outdim).cuda() #classifier model_de = models.decoder(task=task, outdim=outdim).cuda() #decoder optimizer_f = torch.optim.Adam(model_f.parameters(), 0.001) optimizer_c = torch.optim.Adam(model_c.parameters(), 0.001) optimizer_de = torch.optim.Adam(model_de.parameters(), 0.001) #load data source_dataset, target_dataset = get_dataset(task, drift_num) source_loader = torch.utils.data.DataLoader(source_dataset, batch_size=batch_size, shuffle=False, num_workers=0) target_loader = torch.utils.data.DataLoader(target_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
def train(train_model=True, load=0, comment=None, model_name=None, modelstep=0): save_dir = "checkpoint" log_dir = "logs" tl.files.exists_or_mkdir(save_dir) tl.files.exists_or_mkdir(log_dir) # task = "model_dataset" + str(FLAGS.dataset)+"_image_size"+str(FLAGS.image_size)\ # +"_z_dim"+str(FLAGS.z_dim)+"_learning_rate_"+str(FLAGS.learning_rate)+\ # "_epoch_"+str(FLAGS.epoch)+"_batchsize_"+str(FLAGS.batch_size) # # tl.files.exists_or_mkdir("samples/{}".format(task)) with tf.device("/gpu:0"): sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) with tf.device("/gpu:0"): #z = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.z_dim], name='encoded_z') real_distribution = tf.placeholder( dtype=tf.float32, shape=[FLAGS.batch_size, FLAGS.z_dim], name='Real_distribution') real_images = tf.placeholder(tf.float32, [ FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, FLAGS.c_dim ], name='real_images') #===============================================# encoder_output = encoder(real_images, reuse=False, is_train=True) encoder_output_test = encoder(real_images, reuse=True, is_train=False) d_fake, d_fake_logits = discriminator(encoder_output, reuse=False) d_real, d_real_logits = discriminator(real_distribution, reuse=True) d_fake_test, d_fake_logits_test = discriminator( encoder_output_test, reuse=True) d_real_test, d_real_logits_test = discriminator(real_distribution, reuse=True) decoder_output, std = decoder(encoder_output, reuse=False, is_train=True) # encoder_output_z = encoder(decoder_output, reuse=True, is_train=False) decoder_output_test, std_ = decoder(encoder_output, reuse=True, is_train=False) # encoder_output_z_test = encoder(decoder_output_test, reuse=True, is_train=False) decoder_z_output, _ = decoder(real_distribution, reuse=True, is_train=False) d_fake_decoder, d_fake_decoder_logits = discriminate_decoder( decoder_output, reuse=False, istrain=True) d_real_decoder, d_real_decoder_logits = discriminate_decoder( real_images, reuse=True, istrain=False) d_fake_decoder_test, d_fake_decoder_logits_test = discriminate_decoder( decoder_output_test, reuse=True, istrain=False) d_real_decoder_test, d_real_decoder_logits_test = discriminate_decoder( real_images, reuse=True, istrain=False) d_sample_decoder, d_sample_decoder_logits = discriminate_decoder( decoder_z_output, reuse=True, istrain=False) summed = tf.reduce_sum(tf.square(decoder_output - real_images), [1, 2, 3]) # sqrt_summed = summed sqrt_summed = tf.sqrt(summed + 1e-8) autoencoder_loss = tf.reduce_mean(sqrt_summed) summed_test = tf.reduce_sum( tf.square(decoder_output_test - real_images), [1, 2, 3]) # sqrt_summed_test = summed_test sqrt_summed_test = tf.sqrt(summed_test + 1e-8) autoencoder_loss_test = tf.reduce_mean(sqrt_summed_test) # discriminator loss tf_randn_real = tf.random_uniform(tf.shape(d_real), 0.8, 1.1) tf_randn_fake = tf.random_uniform(tf.shape(d_real), 0.0, 0.1) dc_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf_randn_real, logits=d_real_logits)) dc_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf_randn_fake, logits=d_fake_logits)) dc_loss = dc_loss_fake + dc_loss_real dc_loss_real_test = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf_randn_real, logits=d_real_logits_test)) dc_loss_fake_test = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(d_fake), logits=d_fake_logits_test)) dc_loss_test = dc_loss_fake_test + dc_loss_real_test # Generator loss generator_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(d_fake), logits=d_fake_logits)) generator_loss_test = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(d_fake_test), logits=d_fake_logits_test)) # decoder's discriminator loss tf_randn_real = tf.random_uniform(tf.shape(d_real_decoder), 0.8, 1.1) tf_randn_fake = tf.random_uniform(tf.shape(d_fake_decoder), 0.0, 0.1) dc_decoder_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf_randn_real, logits=d_real_decoder_logits)) dc_decoder_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(d_fake), logits=d_fake_decoder_logits)) dc_decoder_loss_sample = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(d_fake), logits=d_sample_decoder_logits)) dc_decoder_loss = dc_decoder_loss_fake + dc_decoder_loss_real + dc_decoder_loss_sample # dc_decoder_loss = dc_decoder_loss_fake + dc_decoder_loss_real + dc_loss_decoder_sample #dc_decoder_loss = tf.reduce_mean(-tf.log(d_real_decoder+1e-8) - tf.log(1 - d_fake_decoder+1e-8) # - tf.log(1 - d_sample_decoder+1e-8)) dc_decoder_loss_real_test = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf_randn_real, logits=d_real_decoder_logits_test)) dc_decoder_loss_fake_test = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(d_fake), logits=d_fake_decoder_logits_test)) dc_decoder_loss_sample_test = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(d_fake), logits=d_sample_decoder_logits)) # dc_loss_decoder_test = dc_loss_decoder_fake_test + dc_loss_decoder_real_test + dc_loss_decoder_sample_test #dc_loss_decoder_test = tf.reduce_mean(-tf.log(d_real_decoder_test+1e-8) - tf.log(1 - d_fake_decoder_test+1e-8) # - tf.log(1 - d_sample_decoder+1e-8)) dc_decoder_loss_test = dc_decoder_loss_fake_test + dc_decoder_loss_real_test + dc_decoder_loss_sample # decoder's generator loss generator_decoder_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(d_fake_decoder), logits=d_fake_decoder_logits)) generator_decoder_sample_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(d_sample_decoder), logits=d_sample_decoder_logits)) generator_decoder_loss_test = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(d_fake_decoder_test), logits=d_fake_decoder_logits_test)) #dc_decoder_loss = tf.reduce_mean(d_real_decoder_logits - d_fake_decoder_logits) #dc_loss_decoder_test = tf.reduce_mean(d_real_decoder_logits_test - d_fake_decoder_logits_test) E_vars = tl.layers.get_variables_with_name('encoder', True, True) G_vars = tl.layers.get_variables_with_name('decoder', True, True) EG_vars = tl.layers.get_variables_with_name('ae', True, True) D_vars = tl.layers.get_variables_with_name('discriminator', True, True) C_vars = tl.layers.get_variables_with_name('discriminate_decoder', True, True) lr_v = FLAGS.learning_rate dc = tf.log( tf.divide(d_fake_logits, 1 - d_fake_logits + 1e-8) + 1e-8) dcd = tf.log( tf.divide(d_fake_decoder_logits, 1 - d_fake_decoder_logits + 1e-8) + 1e-8) #generator_loss = lambda_*autoencoder_loss - tf.log(d_fake_logits+1e-8) + tf.log(1-d_fake_logits+1e-8) # for E vars def generator_loss_func(x): return tf.reduce_mean(-tf.log(x + 1e-8) + tf.log(1. - x + 1e-8)) #encoder_loss = lambda_*autoencoder_loss + generator_loss_func(d_fake) encoder_loss = lambda_ * autoencoder_loss + generator_loss #encoder_loss_test = lambda_ * autoencoder_loss_test + generator_loss_func(d_fake_test) encoder_loss_test = lambda_ * autoencoder_loss + generator_loss_test # for G vars, generator loss in the paper #decoder_loss = lambda_*autoencoder_loss + generator_loss_func(d_fake_decoder) + \ # generator_loss_func(d_sample_decoder) decoder_loss = lambda_ * autoencoder_loss + generator_decoder_loss + \ generator_decoder_sample_loss #decoder_loss_test = lambda_ * autoencoder_loss_test + generator_loss_func(d_fake_decoder_test) + \ # generator_loss_func(d_sample_decoder) decoder_loss_test = lambda_ * autoencoder_loss_test + generator_decoder_loss_test + \ generator_decoder_sample_loss # tf.log(d_fake_test + 1e-8) # + tf.log(1 - d_fake_test + 1e-8)) # for C vars, discriminator for the decoder # dc_decoder_loss # D vars # dc_loss with tf.device("/gpu:0"): train_op_e = tf.train.AdamOptimizer( lr_v, beta1=FLAGS.beta1).minimize(encoder_loss, var_list=E_vars) train_op_g = tf.train.AdamOptimizer( lr_v, beta1=FLAGS.beta1).minimize(decoder_loss, var_list=G_vars) train_op_d = tf.train.AdamOptimizer( lr_v, beta1=FLAGS.beta1).minimize(dc_loss, var_list=D_vars) train_op_dc = tf.train.AdamOptimizer( lr_v, beta1=FLAGS.beta1).minimize(dc_decoder_loss, var_list=C_vars) tl.layers.initialize_global_variables(sess) tensorboard_path, saved_model_path, log_path = form_results() input_images = tf.reshape(real_images, [-1, FLAGS.input_dim, FLAGS.input_dim, 1]) generated_images = tf.reshape( decoder_output, [-1, FLAGS.input_dim, FLAGS.input_dim, 1]) writer = tf.summary.FileWriter(logdir=tensorboard_path, graph=tf.get_default_graph()) tf.summary.scalar("autoencoder_loss", autoencoder_loss) tf.summary.scalar("discriminator_loss", dc_loss) tf.summary.scalar("generator_loss", generator_loss) tf.summary.scalar("discriminate_decoder_loss", dc_decoder_loss) tf.summary.scalar("discriminate_generator_loss", generator_decoder_loss) tf.summary.image(name='Input Images', tensor=input_images, max_outputs=10) tf.summary.image(name='Generated Images', tensor=generated_images, max_outputs=10) tf.summary.histogram(name='Encoder Distribution', values=encoder_output) tf.summary.histogram(name='Real Distribution', values=real_distribution) summary_op = tf.summary.merge_all() saver = tf.train.Saver() if not train_model: # Get the latest results folder all_results = os.listdir(results_path) all_results.sort() #saver.restore(sess, # save_path=tf.train.latest_checkpoint(results_path + '/' + all_results[-1] + '/Saved_models/')) return all_results # tl.layers.initialize_global_variables(sess) # ## load existing model if possible # tl.files.load_and_assign_npz(sess=sess, name=save_dir+'/{}.npz'.format(task), network=alphagan) #X_train, y_train = datasets.create_datasets(retrain=0, task="gan_" + str(FLAGS.z_dim) + "_" + str(FLAGS.input_dim)) #x,y = brats.create_datasets(retrain=0, task="brats_aae_wgan_" + str(z_dim) + "_" + str(input_dim)) # bp() #X_train_lowres = lowres_level(X_train, level).astype("float32") #y_train_lowres = lowres_level(y_train, level).astype("float32") step = 0 with open(log_path + '/log.txt', 'a') as log: log.write("Comment: {}\n".format(comment)) log.write("\n") log.write("input_dim: {}\n".format(FLAGS.input_dim)) log.write("z_dim: {}\n".format(FLAGS.z_dim)) log.write("batch_size: {}\n".format(FLAGS.batch_size)) log.write("learning_rate: {}\n".format(FLAGS.learning_rate)) log.write("\n") for i in range(FLAGS.epoch): n_batches = int(mnist.train.num_examples / FLAGS.batch_size) # b = 0 for b in range(1, n_batches + 1): batch_x, _ = mnist.train.next_batch(FLAGS.batch_size) batch_x = batch_x.reshape(FLAGS.batch_size, 28, 28) batch_x = resize(batch_x, 32.0 / 28.0) batch_x = batch_x[:, :, :, np.newaxis] images = batch_x z_real_dist = np.random.normal( 0, 1, (FLAGS.batch_size, FLAGS.z_dim)) * 1. sess.run(train_op_e, { real_images: images, real_distribution: z_real_dist }) sess.run(train_op_g, { real_images: images, real_distribution: z_real_dist }) sess.run(train_op_d, { real_images: images, real_distribution: z_real_dist }) #for _ in range(2): sess.run(train_op_dc, { real_images: images, real_distribution: z_real_dist }) if b % 20 == 0: e_loss, d_loss, dc_loss, g_loss, dcd_loss, gd_loss, ae_loss, summary = sess.run( [ encoder_loss_test, decoder_loss_test, dc_loss_test, generator_loss_test, dc_decoder_loss_test, generator_decoder_loss_test, autoencoder_loss, summary_op ], feed_dict={ real_images: images, real_distribution: z_real_dist }) df_decoder, df_decoder_test = sess.run( [d_fake_decoder, d_fake_decoder_test], feed_dict={ real_images: images, real_distribution: z_real_dist }) writer.add_summary(summary, global_step=step) print("Epoch: {}, iteration: {}".format(i, b)) print("Encoder Loss: {}".format(e_loss)) print("Decoder Loss: {}".format(d_loss)) print("Discriminator Loss: {}".format(dc_loss)) print("Generator Loss: {}".format(g_loss)) print("Discriminate decoder Loss: {}".format(dcd_loss)) print("Discriminate generator Loss: {}".format(gd_loss)) print("Autoencoder LOss:{}".format(ae_loss)) print("df_decoder {}, df_decoder_test".format( df_decoder, df_decoder_test)) with open('./logs/log.txt', 'a') as log: log.write("Epoch: {}, iteration: {}\n".format(i, b)) log.write("Encoder Loss: {}\n".format(e_loss)) log.write("Decoder Loss: {}\n".format(d_loss)) log.write("Generator Loss:{}\n".format(g_loss)) log.write("Discriminator Loss: {}\n".format(dc_loss)) log.write( "Discriminate decoder Loss:{}\n".format(dcd_loss)) log.write( "Discriminate Generator Loss: {}\n".format(gd_loss)) saver.save(sess, save_path=saved_model_path, global_step=step) #i+=1 step += 1
def improved_sampling(opts): NUM_ROWS = 10 NUM_COLS = 10 NUM_GD_STEPS = 100000 num_z = NUM_ROWS * NUM_COLS checkpoint = opts['checkpoint'] with tf.Session() as sess: with sess.graph.as_default(): z = tf.get_variable("latent_codes", [num_z, opts['zdim']], tf.float32, tf.random_normal_initializer(stddev=1.)) is_training_ph = tf.placeholder(tf.bool, name='is_training_ph') gen, _ = decoder(opts, z, is_training=is_training_ph) data_shape = datashapes[opts['dataset']] gen.set_shape([num_z] + data_shape) e_gen, _ = encoder(opts, gen, is_training=is_training_ph) if opts['e_noise'] == 'gaussian': e_gen = e_gen[0] ae_gen = decoder(opts, e_gen, reuse=True, is_training=is_training_ph) loss = wae.WAE.reconstruction_loss(opts, gen, ae_gen) # optim = tf.train.AdamOptimizer(0.001, 0.9) optim = tf.train.AdamOptimizer(0.01, 0.9) optim = optim.minimize(loss, var_list=[z]) # Now restoring weights from the checkpoint # We need to restore all variables except for newly created ones all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) enc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='encoder') dec_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='generator') new_vars = [v for v in all_vars if \ v not in enc_vars and v not in dec_vars] vars_to_restore = enc_vars + dec_vars saver = tf.train.Saver(vars_to_restore) saver.restore(sess, checkpoint) logging.error('Restored.') init = tf.variables_initializer(new_vars) for iteration in xrange(1): pic_id = 0 loss_prev = 1e10 init.run() for step in xrange(NUM_GD_STEPS): if (step < 100) or (step >= 100 and step % 100 == 0): # will save all 100 first steps and then every 100 steps pics = gen.eval(feed_dict={is_training_ph: False}) codes = z.eval() pic_path = os.path.join(opts['work_dir'], 'pic%03d' % pic_id) code_path = os.path.join(opts['work_dir'], 'code%03d' % pic_id) np.save(pic_path, pics) np.save(code_path, codes) pic_id += 1 # Make a gradient step sess.run(optim, feed_dict={is_training_ph: False}) if step % 10 == 0: loss_cur = loss.eval(feed_dict={is_training_ph: False}) rel_imp = abs(loss_cur - loss_prev) / abs(loss_prev) logging.error('step %d, loss=%f, rel_imp=%f' % (step, loss_cur, rel_imp)) # if rel_imp < 1e-2: # break loss_prev = loss_cur
tvt.Normalize(mean=[-38.39992], std=[13.462255])]) reverse_normalize = tvt.Normalize( mean=[38.39992/13.462255], std=[1./13.462255]) num_layers = args.x device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Assuming that we are on a CUDA machine, this should print a CUDA device: print(device) encoder = models.encoder(x=num_layers, pretrained_path=args.encoder).to(device) decoder = models.decoder(x=num_layers, pretrained_path=args.decoder).to(device) encoder.train(False) decoder.train(False) content_audio, content_ang = load_audio(args.content, transform) content_audio = content_audio.to(device) style_audio, _ = load_audio(args.style, transform) style_audio = style_audio.to(device) z_content, maxpool_content = encoder(content_audio) # (1, C, H, W) z_style, _ = encoder(style_audio) # (1, C, H, W) n_channels = z_content.size()[1] # C n_1 = z_content.size()[2] # H n_2 = z_content.size()[3] # W
def __init__(self, opts, tag): tf.reset_default_graph() logging.error('Building the Tensorflow Graph') gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.Session(config=config) self.opts = opts assert opts['dataset'] in datashapes, 'Unknown dataset.' self.data_shape = datashapes[opts['dataset']] self.add_inputs_placeholders() self.add_training_placeholders() sample_size = tf.shape(self.sample_points)[0] enc_mean, enc_sigmas = encoder(opts, inputs=self.sample_points, is_training=self.is_training, y=self.labels) enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50) self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas eps = tf.random_normal((sample_size, opts['zdim']), 0., 1., dtype=tf.float32) self.encoded = self.enc_mean + tf.multiply( eps, tf.sqrt(1e-8 + tf.exp(self.enc_sigmas))) # self.encoded = self.enc_mean + tf.multiply( # eps, tf.exp(self.enc_sigmas / 2.)) (self.reconstructed, self.reconstructed_logits), self.probs1 = \ decoder(opts, noise=self.encoded, is_training=self.is_training) self.correct_sum = tf.reduce_sum( tf.cast(tf.equal(tf.argmax(self.probs1, axis=1), self.labels), tf.float32)) # Decode the content of sample_noise (self.decoded, self.decoded_logits), _ = decoder(opts, reuse=True, noise=self.sample_noise, is_training=self.is_training) # -- Objectives, losses, penalties self.loss_cls = self.cls_loss(self.labels, self.probs1) self.loss_mmd = self.mmd_penalty(self.sample_noise, self.encoded) self.loss_recon = self.reconstruction_loss(self.opts, self.sample_points, self.reconstructed) self.mixup_loss = self.MIXUP_loss(opts, self.encoded, self.labels) self.gmmpara_init() self.loss_mixture = self.mixture_loss(self.encoded) self.objective = self.loss_recon + opts[ 'lambda_cls'] * self.loss_cls + opts['lambda_mixture'] * tf.cast( self.loss_mixture, dtype=tf.float32) self.objective_pre = self.loss_recon + opts[ 'lambda'] * self.loss_mmd + self.loss_cls self.result_logger = ResultLogger(tag, opts['work_dir'], verbose=True) self.tag = tag logpxy = [] dimY = opts['n_classes'] N = sample_size S = opts['sampling_size'] x_rep = tf.tile(self.sample_points, [S, 1, 1, 1]) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): for i in range(dimY): y = tf.fill((N, ), i) mu, log_sig = encoder(opts, inputs=self.sample_points, reuse=True, is_training=False, y=y) mu = tf.tile(mu, [S, 1]) log_sig = tf.tile(log_sig, [S, 1]) y = tf.tile(y, [S]) eps2 = tf.random_normal((N * S, opts['zdim']), 0., 1., dtype=tf.float32) z = mu + tf.multiply(eps2, tf.sqrt(1e-8 + tf.exp(log_sig))) (mu_x, _), logit_y = decoder(opts, reuse=True, noise=z, is_training=False) logp = -tf.reduce_sum((x_rep - mu_x)**2, axis=[1, 2, 3]) log_pyz = -tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=logit_y) posterior = tf.log( self.theta_p) - 0.5 * tf.log(2 * math.pi * self.lambda_p) self.u_p_1 = tf.expand_dims(self.u_p, 2) z_m = tf.expand_dims(tf.transpose(z), 1) aa = tf.square(z_m - self.u_p_1) self.lambda_p_1 = tf.expand_dims(self.lambda_p, 2) bb = aa / 2 * self.lambda_p_1 posterior = tf.expand_dims(posterior, 2) - bb posterior_sum = tf.reduce_sum(tf.reduce_sum(posterior, axis=0), axis=0) bound = 0.5 * logp + opts['lambda_cls'] * log_pyz + opts[ 'lambda_mixture'] * posterior_sum bound = tf.reshape(bound, [S, N]) bound = self.logsumexp(bound) - tf.log(float(S)) logpxy.append(tf.expand_dims(bound, 1)) logpxy = tf.concat(logpxy, 1) y_pred = tf.nn.softmax(logpxy) self.eval_probs = y_pred self.test_a = 0.5 * logp self.test_b = log_pyz self.test_c = posterior_sum if opts['e_pretrain']: self.loss_pretrain = self.pretrain_loss() else: self.loss_pretrain = None self.add_optimizers() self.add_savers()
torch.manual_seed(opt.seed) if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) #################################### # Initialize Network encoder = models.encoder(isAddCostVolume=opt.isAddCostVolume) for param in encoder.parameters(): param.requires_grad = False encoder.load_state_dict( torch.load('{0}/encoder_{1}.pth'.format(opt.experiment, opt.nepoch - 1))) decoder = models.decoder() for param in decoder.parameters(): param.requires_grad = False decoder.load_state_dict( torch.load('{0}/decoder_{1}.pth'.format(opt.experiment, opt.nepoch - 1))) normalFeature = models.normalFeature() for param in normalFeature.parameters(): param.requires_grad = False normalFeature.load_state_dict( torch.load('{0}/normalFeature_{1}.pth'.format(opt.experiment, opt.nepoch - 1))) normalPool = Variable( torch.ones([1, angleNum * angleNum, 1, 1, 1], dtype=torch.float32)) normalPool.requires_grad = False
def __init__(self, opts, train_size=0): logging.error('Building the Tensorflow Graph') self.sess = tf.Session() self.opts = opts self.train_size = train_size # ===================================================================== # -- Some of the parameters for future use # ===================================================================== assert opts['dataset'] in datashapes, 'Unknown dataset.' self.data_shape = datashapes[opts['dataset']] # ===================================================================== # -- Placeholders # ===================================================================== self.add_inputs_placeholders() self.add_training_placeholders() sample_size = tf.shape(self.sample_points)[0] # batch_size # ===================================================================== # -- Transformation ops # ===================================================================== # ================================================ # Encode the content of sample_points placeholder # ================================================ res = encoder(opts, inputs=self.sample_points, is_training=self.is_training) # ================================================ # the encoder outputs depend on the hyperparameter -> e_noise # here, the outputs are assigned to the class vars accoring to the type of e_noise computing done by the encoder... # ================================================ if opts['e_noise'] in ('deterministic', 'implicit', 'add_noise'): self.enc_mean, self.enc_sigmas = None, None if opts['e_noise'] == 'implicit': self.encoded, self.encoder_A = res else: self.encoded, _ = res elif opts['e_noise'] == 'gaussian': # Encoder outputs means and variances of Gaussian enc_mean, enc_sigmas = res[0] enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50) self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas if opts['verbose']: self.add_sigmas_debug() eps = tf.random_normal((sample_size, opts['zdim']), 0., 1., dtype=tf.float32) self.encoded = self.enc_mean + tf.multiply( eps, tf.sqrt(1e-8 + tf.exp(self.enc_sigmas))) # self.encoded = self.enc_mean + tf.multiply(eps, tf.exp(self.enc_sigmas / 2.)) # ================================================ # Decode the points encoded above (i.e. reconstruct) # ================================================ self.reconstructed, self.reconstructed_logits = decoder( opts, noise=self.encoded, is_training=self.is_training) # ================================================ # Decode the content of sample_noise # ================================================ self.decoded, self.decoded_logits = decoder( opts, reuse=True, noise=self.sample_noise, is_training=self.is_training) # ================================================ # -- Objectives, losses, penalties # ================================================ self.penalty, self.loss_gan = self.matching_penalty() self.loss_reconstruct = self.reconstruction_loss( self.opts, self.sample_points, self.reconstructed) self.wae_objective = self.loss_reconstruct + self.wae_lambda * self.penalty # Extra costs if any if 'w_aef' in opts and opts['w_aef'] > 0: improved_wae.add_aefixedpoint_cost(opts, self) # ================================================ # ================================================ self.blurriness = self.compute_blurriness() # ================================================ # ================================================ if opts['e_pretrain']: self.loss_pretrain = self.pretrain_loss() else: self.loss_pretrain = None # ================================================ # ================================================ self.add_least_gaussian2d_ops() # ================================================ # -- Optimizers, savers, etc # ================================================ self.add_optimizers() self.add_savers() self.init = tf.global_variables_initializer()
def get_data_batch(): while True: for seq in data_loader: seq.transpose_(3, 4).transpose_(2, 3) yield seq data_generator = get_data_batch() if args.dataset != "mpi3d_real": Ec = models.content_encoder(args.g_dims, nc=args.num_channels).cuda() Ep = models.pose_encoder(args.z_dims, nc=args.num_channels).cuda() D = models.decoder(args.g_dims, args.z_dims, nc=args.num_channels, skips=args.skips).cuda() else: Ec = vgg_64.encoder(args.g_dims, nc=args.num_channels).cuda() Ep = resnet_64.pose_encoder(args.z_dims, nc=args.num_channels).cuda() D = vgg_64.drnet_decoder(args.g_dims, args.z_dims, nc=args.num_channels).cuda() checkpoint = torch.load(args.checkpoint) Ec.load_state_dict(checkpoint["content_encoder"]) Ep.load_state_dict(checkpoint["position_encoder"]) D.load_state_dict(checkpoint["decoder"]) Ec.eval() Ep.eval() D.eval()
def train(args): # Create Communicator and Context extension_module = "cudnn" ctx = get_extension_context(extension_module, type_config=args.type_config) comm = C.MultiProcessDataParalellCommunicator(ctx) comm.init() n_devices = comm.size mpi_rank = comm.rank mpi_local_rank = comm.local_rank device_id = mpi_local_rank ctx.device_id = str(device_id) nn.set_default_context(ctx) # Input b, c, h, w = args.batch_size, 3, args.image_size, args.image_size x_real_a = nn.Variable([b, c, h, w]) x_real_b = nn.Variable([b, c, h, w]) # Model # workaround for starting with the same model among devices. np.random.seed(412) maps = args.maps # within-domain reconstruction (domain A) x_content_a = content_encoder(x_real_a, maps, name="content-encoder-a") x_style_a = style_encoder(x_real_a, maps, name="style-encoder-a") x_recon_a = decoder(x_content_a, x_style_a, name="decoder-a") # within-domain reconstruction (domain B) x_content_b = content_encoder(x_real_b, maps, name="content-encoder-b") x_style_b = style_encoder(x_real_b, maps, name="style-encoder-b") x_recon_b = decoder(x_content_b, x_style_b, name="decoder-b") # generate over domains and reconstruction of content and style (domain A) z_style_a = F.randn(shape=x_style_a.shape) x_fake_a = decoder(x_content_b, z_style_a, name="decoder-a") x_content_rec_b = content_encoder(x_fake_a, maps, name="content-encoder-a") x_style_rec_a = style_encoder(x_fake_a, maps, name="style-encoder-a") # generate over domains and reconstruction of content and style (domain B) z_style_b = F.randn(shape=x_style_b.shape) x_fake_b = decoder(x_content_a, z_style_b, name="decoder-b") x_content_rec_a = content_encoder(x_fake_b, maps, name="content-encoder-b") x_style_rec_b = style_encoder(x_fake_b, maps, name="style-encoder-b") # discriminate (domain A) p_x_fake_a_list = discriminators(x_fake_a) p_x_real_a_list = discriminators(x_real_a) p_x_fake_b_list = discriminators(x_fake_b) p_x_real_b_list = discriminators(x_real_b) # Loss # within-domain reconstruction loss_recon_x_a = recon_loss(x_recon_a, x_real_a).apply(persistent=True) loss_recon_x_b = recon_loss(x_recon_b, x_real_b).apply(persistent=True) # content and style reconstruction loss_recon_x_style_a = recon_loss(x_style_rec_a, z_style_a).apply(persistent=True) loss_recon_x_content_b = recon_loss(x_content_rec_b, x_content_b).apply(persistent=True) loss_recon_x_style_b = recon_loss(x_style_rec_b, z_style_b).apply(persistent=True) loss_recon_x_content_a = recon_loss(x_content_rec_a, x_content_a).apply(persistent=True) # adversarial def f(x, y): return x + y loss_gen_a = reduce(f, [lsgan_loss(p_f) for p_f in p_x_fake_a_list]).apply(persistent=True) loss_dis_a = reduce(f, [ lsgan_loss(p_f, p_r) for p_f, p_r in zip(p_x_fake_a_list, p_x_real_a_list) ]).apply(persistent=True) loss_gen_b = reduce(f, [lsgan_loss(p_f) for p_f in p_x_fake_b_list]).apply(persistent=True) loss_dis_b = reduce(f, [ lsgan_loss(p_f, p_r) for p_f, p_r in zip(p_x_fake_b_list, p_x_real_b_list) ]).apply(persistent=True) # loss for generator-related models loss_gen = loss_gen_a + loss_gen_b \ + args.lambda_x * (loss_recon_x_a + loss_recon_x_b) \ + args.lambda_c * (loss_recon_x_content_a + loss_recon_x_content_b) \ + args.lambda_s * (loss_recon_x_style_a + loss_recon_x_style_b) # loss for discriminators loss_dis = loss_dis_a + loss_dis_b # Solver lr_g, lr_d, beta1, beta2 = args.lr_g, args.lr_d, args.beta1, args.beta2 # solver for generator-related models solver_gen = S.Adam(lr_g, beta1, beta2) with nn.parameter_scope("generator"): params_gen = nn.get_parameters() solver_gen.set_parameters(params_gen) # solver for discriminators solver_dis = S.Adam(lr_d, beta1, beta2) with nn.parameter_scope("discriminators"): params_dis = nn.get_parameters() solver_dis.set_parameters(params_dis) # Monitor monitor = Monitor(args.monitor_path) # time monitor_time = MonitorTimeElapsed("Training time", monitor, interval=10) # reconstruction monitor_loss_recon_x_a = MonitorSeries("Recon Loss Image A", monitor, interval=10) monitor_loss_recon_x_content_b = MonitorSeries("Recon Loss Content B", monitor, interval=10) monitor_loss_recon_x_style_a = MonitorSeries("Recon Loss Style A", monitor, interval=10) monitor_loss_recon_x_b = MonitorSeries("Recon Loss Image B", monitor, interval=10) monitor_loss_recon_x_content_a = MonitorSeries("Recon Loss Content A", monitor, interval=10) monitor_loss_recon_x_style_b = MonitorSeries("Recon Loss Style B", monitor, interval=10) # adversarial monitor_loss_gen_a = MonitorSeries("Gen Loss A", monitor, interval=10) monitor_loss_dis_a = MonitorSeries("Dis Loss A", monitor, interval=10) monitor_loss_gen_b = MonitorSeries("Gen Loss B", monitor, interval=10) monitor_loss_dis_b = MonitorSeries("Dis Loss B", monitor, interval=10) monitor_losses = [ # reconstruction (monitor_loss_recon_x_a, loss_recon_x_a), (monitor_loss_recon_x_content_b, loss_recon_x_content_b), (monitor_loss_recon_x_style_a, loss_recon_x_style_a), (monitor_loss_recon_x_b, loss_recon_x_b), (monitor_loss_recon_x_content_a, loss_recon_x_content_a), (monitor_loss_recon_x_style_b, loss_recon_x_style_b), # adaversarial (monitor_loss_gen_a, loss_gen_a), (monitor_loss_dis_a, loss_dis_a), (monitor_loss_gen_b, loss_gen_b), (monitor_loss_dis_b, loss_dis_b) ] # image monitor_image_a = MonitorImage("Fake Image B to A Train", monitor, interval=1) monitor_image_b = MonitorImage("Fake Image A to B Train", monitor, interval=1) monitor_images = [ (monitor_image_a, x_fake_a), (monitor_image_b, x_fake_b), ] # DataIterator rng_a = np.random.RandomState(device_id) rng_b = np.random.RandomState(device_id + n_devices) di_a = munit_data_iterator(args.img_path_a, args.batch_size, rng=rng_a) di_b = munit_data_iterator(args.img_path_b, args.batch_size, rng=rng_b) # Train for i in range(args.max_iter // n_devices): ii = i * n_devices # Train generator-related models x_data_a, x_data_b = di_a.next()[0], di_b.next()[0] x_real_a.d, x_real_b.d = x_data_a, x_data_b solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) comm.all_reduce([w.grad for w in params_gen.values()]) solver_gen.weight_decay(args.weight_decay_rate) solver_gen.update() # Train discriminators x_data_a, x_data_b = di_a.next()[0], di_b.next()[0] x_real_a.d, x_real_b.d = x_data_a, x_data_b x_fake_a.need_grad, x_fake_b.need_grad = False, False solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) comm.all_reduce([w.grad for w in params_dis.values()]) solver_dis.weight_decay(args.weight_decay_rate) solver_dis.update() x_fake_a.need_grad, x_fake_b.need_grad = True, True # LR schedule if (i + 1) % (args.lr_decay_at_every // n_devices) == 0: lr_d = solver_dis.learning_rate() * args.lr_decay_rate lr_g = solver_gen.learning_rate() * args.lr_decay_rate solver_dis.set_learning_rate(lr_d) solver_gen.set_learning_rate(lr_g) if mpi_local_rank == 0: # Monitor monitor_time.add(ii) for mon, loss in monitor_losses: mon.add(ii, loss.d) # Save if (i + 1) % (args.model_save_interval // n_devices) == 0: for mon, x in monitor_images: mon.add(ii, x.d) nn.save_parameters( os.path.join(args.monitor_path, "param_{:05d}.h5".format(i))) if mpi_local_rank == 0: # Monitor for mon, loss in monitor_losses: mon.add(ii, loss.d) # Save for mon, x in monitor_images: mon.add(ii, x.d) nn.save_parameters( os.path.join(args.monitor_path, "param_{:05d}.h5".format(i)))
def __init__(self, opts, tag): tf.reset_default_graph() logging.error('Building the Tensorflow Graph') gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.Session(config=config) self.opts = opts assert opts['dataset'] in datashapes, 'Unknown dataset.' # Add placeholders shape = datashapes[opts['dataset']] self.sample_points = tf.placeholder(tf.float32, [None] + shape, name='real_points_ph') self.labels = tf.placeholder(tf.int64, shape=[None], name='label_ph') self.sample_noise = tf.placeholder(tf.float32, [None, opts['zdim']], name='noise_ph') self.lr_decay = tf.placeholder(tf.float32, name='rate_decay_ph') self.is_training = tf.placeholder(tf.bool, name='is_training_ph') self.mean_ph = tf.placeholder(tf.float32, shape=[None, opts['zdim']]) self.sigma_ph = tf.placeholder(tf.float32, shape=[None, opts['zdim']]) # Build training computation graph sample_size = tf.shape(self.sample_points)[0] enc_mean, enc_sigmas = encoder(opts, inputs=self.sample_points, is_training=self.is_training, y=self.labels) enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50) self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas self.encoded = self.get_encoded(opts, self.enc_mean, self.enc_sigmas) self.encoded2 = self.get_encoded(opts, self.mean_ph, self.sigma_ph) self.reconstructed = decoder(opts, noise=self.encoded, is_training=self.is_training) self.probs1 = classifier(opts, self.encoded) self.probs2 = classifier(opts, self.encoded2) self.correct_sum = tf.reduce_sum( tf.cast(tf.equal(tf.argmax(self.probs1, axis=1), self.labels), tf.float32)) self.decoded = decoder(opts, noise=self.sample_noise, is_training=self.is_training) self.loss_cls = self.cls_loss(self.labels, self.probs1) self.loss_cls2 = self.cls_loss(self.labels, self.probs2) self.loss_mmd = self.mmd_penalty(self.sample_noise, self.encoded) self.loss_recon = self.reconstruction_loss(self.opts, self.sample_points, self.reconstructed) self.objective = self.loss_recon + opts[ 'lambda'] * self.loss_mmd + self.loss_cls # Build evaluate computation graph logpxy = [] dimY = opts['n_classes'] N = sample_size S = opts['sampling_size'] x_rep = tf.tile(self.sample_points, [S, 1, 1, 1]) for i in range(dimY): y = tf.fill((N, ), i) mu, log_sig = encoder(opts, inputs=self.sample_points, is_training=False, y=y) mu = tf.tile(mu, [S, 1]) log_sig = tf.tile(log_sig, [S, 1]) y = tf.tile(y, [ S, ]) z = self.get_encoded(opts, mu, log_sig) z_sample = tf.random_normal((tf.shape(z)[0], opts['zdim']), 0., 1., dtype=tf.float32) mu_x = decoder(opts, noise=z, is_training=False) logit_y = classifier(opts, z) logp = -tf.reduce_sum((x_rep - mu_x)**2, axis=[1, 2, 3]) log_pyz = -tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=logit_y) mmd_loss = self.mmd_penalty(z_sample, z) bound = 0.5 * logp + log_pyz + opts['lambda'] * mmd_loss bound = tf.reshape(bound, [S, N]) bound = self.logsumexp(bound) - tf.log(float(S)) logpxy.append(tf.expand_dims(bound, 1)) logpxy = tf.concat(logpxy, 1) y_pred = tf.nn.softmax(logpxy) self.eval_probs = y_pred self.loss_pretrain = self.pretrain_loss( ) if opts['e_pretrain'] else None self.add_optimizers() self.add_savers() self.tag = tag