def __init__(self, params, is_train, mode=None): self.is_train = is_train self.params = params if mode is not None: self.mode = mode elif self.is_train: self.mode = ModeKeys.TRAIN else: self.mode = ModeKeys.PREDICT if params.shared_embedding_softmax_weights: print("sharing embedding!!!") self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params.vocab_size, params.hidden_size) self.encoder_embedding_layer = self.embedding_softmax_layer self.decoder_embedding_layer = self.embedding_softmax_layer self.decoder_softmax_layer = self.embedding_softmax_layer else: print("not sharing embedding!!!") self.encoder_embedding_layer = embedding_layer.EmbeddingWeights( params.source_vocab_size, params.hidden_size, "source_embedding") self.decoder_embedding_layer = embedding_layer.EmbeddingWeights( params.target_vocab_size, params.hidden_size, "target_embedding") self.decoder_softmax_layer = embedding_layer.EmbeddingWeights( params.target_vocab_size, params.hidden_size, 'soft_max') # done self.encoder_stack = EncoderDecoder.EncoderStack(params, is_train, self.mode) self.decoder_stack = EncoderDecoder.DecoderStack(params, is_train, self.mode) self._initializer = tf.variance_scaling_initializer( self.params.initializer_gain, mode="fan_avg", distribution="uniform")
def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): "Helper: Construct a model from hyperparameters." c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab)) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model
def train(train_loader, validate_data, device, gradient_clipping=1, hidden_state=10, lr=0.001, opt="adam", epochs=600, batch_size=32): model = EncoderDecoder(1, hidden_state, 1, 50).to(device) validate_data = validate_data.to(device) if (opt == "adam"): optimizer = torch.optim.Adam(model.parameters(), lr=lr) else: optimizer = torch.optim.RMSprop(model.parameters(), lr=lr) optimizer_name = 'adam' if 'adam' in str(optimizer).lower() else 'mse' mse = nn.MSELoss() min_loss = float("inf") best_loss_global = float("inf") min_in, min_out = None, None validation_losses = [] for epoch in range(1, epochs): total_loss = 0 for batch_idx, data in enumerate(train_loader): data = data.to(device) optimizer.zero_grad() output = model(data) loss = mse(output, data) total_loss += loss.item() if loss.item() < min_loss: min_loss = loss.item() min_in, min_out = data, output loss.backward() if gradient_clipping: nn.utils.clip_grad_norm_(model.parameters(), max_norm=gradient_clipping) optimizer.step() epoch_loss = total_loss / len(train_loader) best_loss_global = min(best_loss_global, epoch_loss) print(f'Train Epoch: {epoch} \t loss: {epoch_loss}') if epoch % 50 == 0: file_name = f'ae_toy_{optimizer_name}_lr={lr}_hidden_size={hidden_state}_' \ f'_gradient_clipping={gradient_clipping}' path = os.path.join("saved_models", "toy_task", file_name) create_folders(path) torch.save( model, os.path.join(path, f'epoch={epoch}_bestloss={best_loss_global}.pt')) if epoch % 10 == 0: validation(model, mse, validate_data, validation_losses) plot_validation_loss(epochs, gradient_clipping, lr, optimizer_name, validation_losses, batch_size, hidden_state)
def load_model(model_dir, en_emb_lookup_matrix, target_emb_lookup_matrix): save_dict = torch.load(os.path.join(os.path.dirname(cwd), model_dir)) config = save_dict['config'] print(' Model config: \n', config) model = EncoderDecoder(en_emb_lookup_matrix, target_emb_lookup_matrix, config['h_size'], config['bidirectional'], config['attention'], config['attention_type'], config['decoder_cell_type']).to(device) mn.hidden_size = config['h_size'] model.encoder.device = device model.load_state_dict(save_dict['state_dict']) return model
def __init__(self, params, is_train, mode=None): self.is_train = is_train self.params = params if mode is not None: self.mode = mode elif self.is_train: self.mode = ModeKeys.TRAIN else: self.mode = ModeKeys.PREDICT self.encoder_stack = EncoderDecoder.EncoderStack(params, is_train, self.mode) self.decoder_stack = EncoderDecoder.DecoderStack(params, is_train, self.mode) self._initializer = tf.variance_scaling_initializer( self.params.initializer_gain, mode="fan_avg", distribution="uniform")
def initialize_model(model_path=None, device=DEVICE): """ Initializes a model and an optimizer. If a model_path is given, state_dicts for the EncoderDecoder model for the optimizer are loaded in. If the a device is given, model will be moved to the given device. Model will always be wrapped in `nn.DataParallel` for consistency when loading models across devices. This can however be a slowdown when running in single device environments. Returns (model, optimizer) """ model = nn.DataParallel(EncoderDecoder()) optimizer = optim.Adadelta(model.parameters(), rho=RHO) if model_path is not None and os.path.isfile(model_path): print("Loading model from", model_path) checkpoint = torch.load(model_path, map_location=device) model.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) # use doubles since our input tensors use doubles model.double() model.to(device) return model, optimizer
def train(data, epochs=100, batchSize=32, learningRate=1e-3): data = torch.from_numpy(data).cuda() dataset = TensorDataset(data) dataLoader = DataLoader(dataset, batch_size=batchSize, shuffle=True) model = EncoderDecoder().cuda() criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learningRate, weight_decay=1e-5) for epoch in range(epochs): for data in dataLoader: data = data[0] output = model(data) loss = criterion(output, data) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch [%d/%d], loss:%.4f' % (epoch + 1, epochs, loss.data.item())) if epoch % 2 == 0: pic = output.cpu().data save_image(pic, 'outputs/%d.png' % (epoch)) torch.save(model.state_dict(), 'models/encoderDecoder.pth')
def __init__(self, params, is_train, mode=None): self.is_train = is_train self.params = params if mode is not None: self.mode = mode elif self.is_train: self.mode = ModeKeys.TRAIN else: self.mode = ModeKeys.PREDICT #with tf.device('/cpu:0'): # self.dropout_pl = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_pl") # self.params.layer_postprocess_dropout = self.dropout_pl # self.params.attention_dropout = self.dropout_pl # self.relu_dropout = self.dropout_pl if params.shared_embedding_softmax_weights: print("sharing embedding!!!") self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params.vocab_size, params.hidden_size) self.encoder_embedding_layer = self.embedding_softmax_layer self.decoder_embedding_layer = self.embedding_softmax_layer self.decoder_softmax_layer = self.embedding_softmax_layer else: print("not sharing embedding!!!") self.encoder_embedding_layer = embedding_layer.EmbeddingWeights( params.source_vocab_size, params.hidden_size, "source_embedding") self.decoder_embedding_layer = embedding_layer.EmbeddingWeights( params.target_vocab_size, params.hidden_size, "target_embedding") self.decoder_softmax_layer = embedding_layer.EmbeddingWeights( params.target_vocab_size, params.hidden_size, 'soft_max') # done self.encoder_stack = EncoderDecoder.EncoderStack(params, is_train, self.mode) self.decoder_stack = EncoderDecoder.DecoderStack(params, is_train, self.mode) self._initializer = tf.variance_scaling_initializer( self.params.initializer_gain, mode="fan_avg", distribution="uniform")
def main(args): num_frames = 15 ms_per_frame = 40 network = EncoderDecoder(args).cuda() optimizer = torch.optim.Adam(network.parameters(), lr=args.lr, betas=(0.9, 0.99)) criterion = nn.MSELoss() train_loader, dev_loader, test_loader = fetch_kth_data(args) # test_tens = next(iter(train_loader))['instance'][0, :, :, :, :].transpose(0, 1) # print(test_tens.shape) # save_image(test_tens, './img/test_tens.png') # print(next(iter(train_loader))['instance'][0, :, 0, :, :].shape) train_loss = [] dev_loss = [] for epoch in range(args.epochs): epoch_loss = 0 batch_num = 0 for item in train_loader: #label = item['label'] item = item['instance'].cuda() frames_processed = 0 batch_loss = 0 # fit a whole batch for all the different milliseconds for i in range(num_frames-1): for j in range(i+1, num_frames): network.zero_grad() frame_diff = j - i time_delta = torch.tensor(frame_diff * ms_per_frame).float().repeat(args.batch_size).cuda() time_delta.requires_grad = True seq = item[:, :, i, :, :] #print(seq.shape) # downsample #seq = F.interpolate(seq, size=(64, 64)) #print(seq.shape) seq.requires_grad = True seq_targ = item[:, :, j, :, :] # downsample #seq_targ = F.interpolate(seq_targ, size=(64, 64)) seq_targ.requires_grad = False assert seq.requires_grad and time_delta.requires_grad, 'No Gradients' outputs = network(seq, time_delta) error = criterion(outputs, seq_targ) error.backward() optimizer.step() batch_loss += error.cpu().item() frames_processed += 1 if i == 0: save_image(outputs, '/scratch/eecs-share/dinkinst/kth/img/train_output_{}_epoch_{}.png'.format(j, epoch)) batch_num += 1 epoch_loss += batch_loss print('Epoch {} Batch #{} Total Error {}'.format(epoch, batch_num, batch_loss)) print('\nEpoch {} Total Loss {} Scaled Loss {}\n'.format(epoch, epoch_loss, epoch_loss/frames_processed)) train_loss.append(epoch_loss) if epoch % 10 == 0: torch.save(network.state_dict(), KTH_PATH+str('/model_new_{}.pth'.format(epoch))) torch.save(optimizer.state_dict(), KTH_PATH+str('/optim_new_{}.pth'.format(epoch))) dev_loss.append(eval_model(network, dev_loader, epoch)) network.train() plt.plot(range(args.epochs), train_loss) plt.grid() plt.savefig('/scratch/eecs-share/dinkinst/kth/img/loss_train.png', dpi=64) plt.close('all') plt.plot(range(args.epochs), dev_loss) plt.grid() plt.savefig('/scratch/eecs-share/dinkinst/kth/img/loss_dev.png', dpi=64) plt.close('all')
def main(): torch.manual_seed(10) # fix seed for reproducibility torch.cuda.manual_seed(10) train_data, train_source_text, train_target_text = create_data( os.path.join(train_data_dir, train_dataset), lang) #dev_data, dev_source_text, dev_target_text = create_data(os.path.join(eval_data_dir, 'newstest2012_2013'), lang) eval_data, eval_source_text, eval_target_text = create_data( os.path.join(dev_data_dir, eval_dataset), lang) en_emb_lookup_matrix = train_source_text.vocab.vectors.to(device) target_emb_lookup_matrix = train_target_text.vocab.vectors.to(device) global en_vocab_size global target_vocab_size en_vocab_size = train_source_text.vocab.vectors.size(0) target_vocab_size = train_target_text.vocab.vectors.size(0) if verbose: print('English vocab size: ', en_vocab_size) print(lang, 'vocab size: ', target_vocab_size) print_runtime_metric('Vocabs loaded') model = EncoderDecoder(en_emb_lookup_matrix, target_emb_lookup_matrix, hidden_size, bidirectional, attention, attention_type, decoder_cell_type).to(device) model.encoder.device = device criterion = nn.CrossEntropyLoss( ignore_index=1 ) # ignore_index=1 comes from the target_data generation from the data iterator #optimiser = torch.optim.Adadelta(model.parameters(), lr=1.0, rho=0.9, eps=1e-06, weight_decay=0) # This is the exact optimiser in the paper; rho=0.95 optimiser = torch.optim.Adam(model.parameters(), lr=lr) best_loss = 10e+10 # dummy variable best_bleu = 0 epoch = 1 # initial epoch id if resume: print('\n ---------> Resuming training <----------') checkpoint_path = os.path.join(save_dir, 'checkpoint.pth') checkpoint = torch.load(checkpoint_path) epoch = checkpoint['epoch'] subepoch, num_subepochs = checkpoint['subepoch_num'] model.load_state_dict(checkpoint['state_dict']) best_loss = checkpoint['best_loss'] optimiser.load_state_dict(checkpoint['optimiser']) is_best = checkpoint['is_best'] metric_store.load(os.path.join(save_dir, 'checkpoint_metrics.pickle')) if subepoch == num_subepochs: epoch += 1 subepoch = 1 else: subepoch += 1 if verbose: print_runtime_metric('Model initialised') while epoch <= num_epochs: is_best = False # best loss or not # Initialise the iterators train_iter = BatchIterator(train_data, batch_size, do_train=True, seed=epoch**2) num_subepochs = train_iter.num_batches // subepoch_size # train sub-epochs from start_batch # This allows subepoch training resumption if not resume: subepoch = 1 while subepoch <= num_subepochs: if verbose: print(' Running code on: ', device) print('------> Training epoch {}, sub-epoch {}/{} <------'. format(epoch, subepoch, num_subepochs)) mean_train_loss = train(model, criterion, optimiser, train_iter, train_source_text, train_target_text, subepoch, num_subepochs) if verbose: print_runtime_metric('Training sub-epoch complete') print( '------> Evaluating sub-epoch {} <------'.format(subepoch)) eval_iter = BatchIterator(eval_data, batch_size, do_train=False, seed=325632) mean_eval_loss, mean_eval_bleu, _, mean_eval_sent_bleu, _, _ = evaluate( model, criterion, eval_iter, eval_source_text.vocab, eval_target_text.vocab, train_source_text.vocab, train_target_text.vocab) # here should be the eval data if verbose: print_runtime_metric('Evaluating sub-epoch complete') if mean_eval_loss < best_loss: best_loss = mean_eval_loss is_best = True if mean_eval_bleu > best_bleu: best_bleu = mean_eval_bleu is_best = True config_dict = { 'train_dataset': train_dataset, 'b_size': batch_size, 'h_size': hidden_size, 'bidirectional': bidirectional, 'attention': attention, 'attention_type': attention_type, 'decoder_cell_type': decoder_cell_type } # Save the model and the optimiser state for resumption (after each epoch) checkpoint = { 'epoch': epoch, 'subepoch_num': (subepoch, num_subepochs), 'state_dict': model.state_dict(), 'config': config_dict, 'best_loss': best_loss, 'best_BLEU': best_bleu, 'optimiser': optimiser.state_dict(), 'is_best': is_best } torch.save(checkpoint, os.path.join(save_dir, 'checkpoint.pth')) metric_store.log(mean_train_loss, mean_eval_loss) metric_store.save( os.path.join(save_dir, 'checkpoint_metrics.pickle')) if verbose: print('Checkpoint.') # Save the best model so far if is_best: save_dict = { 'state_dict': model.state_dict(), 'config': config_dict, 'epoch': epoch } torch.save(save_dict, os.path.join(save_dir, 'best_model.pth')) metric_store.save( os.path.join(save_dir, 'best_model_metrics.pickle')) if verbose: if is_best: print('Best model saved!') print( 'Ep {} Sub-ep {}/{} Tr loss {} Eval loss {} Eval BLEU {} Eval sent BLEU {}' .format(epoch, subepoch, num_subepochs, round(mean_train_loss, 3), round(mean_eval_loss, 3), round(mean_eval_bleu, 4), round(mean_eval_sent_bleu, 4))) subepoch += 1 epoch += 1
def main(): parser = argparse.ArgumentParser(description='Gaussian-Poisson GAN for high-resolution image blending') parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder or G') parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder or G') parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') parser.add_argument('--image_size', type=int, default=64, help='The height / width of the input image to network') parser.add_argument('--color_weight', type=float, default=1, help='Color weight') parser.add_argument('--sigma', type=float, default=0.5, help='Sigma for gaussian smooth of Gaussian-Poisson Equation') parser.add_argument('--gradient_kernel', type=str, default='normal', help='Kernel type for calc gradient') parser.add_argument('--smooth_sigma', type=float, default=1, help='Sigma for gaussian smooth of Laplacian pyramid') parser.add_argument('--supervised', type=lambda x:x == 'True', default=True, help='Use unsupervised Blending GAN if False') parser.add_argument('--nz', type=int, default=100, help='Size of the latent z vector') parser.add_argument('--n_iteration', type=int, default=1000, help='# of iterations for optimizing z') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--g_path', default='models/blending_gan.npz', help='Path for pretrained Blending GAN model') parser.add_argument('--unsupervised_path', default='models/unsupervised_blending_gan.npz', help='Path for pretrained unsupervised Blending GAN model') parser.add_argument('--list_path', default='', help='File for input list in csv format: obj_path;bg_path;mask_path in each line') parser.add_argument('--result_folder', default='blending_result', help='Name for folder storing results') parser.add_argument('--src_image', default='', help='Path for source image') parser.add_argument('--dst_image', default='', help='Path for destination image') parser.add_argument('--mask_image', default='', help='Path for mask image') parser.add_argument('--blended_image', default='', help='Where to save blended image') args = parser.parse_args() print('Input arguments:') for key, value in vars(args).items(): print('\t{}: {}'.format(key, value)) print('') # Init CNN model if args.supervised: G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size) print('Load pretrained Blending GAN model from {} ...'.format(args.g_path)) serializers.load_npz(args.g_path, G) else: G = DCGAN_G(args.image_size, args.nc, args.ngf) print('Load pretrained unsupervised Blending GAN model from {} ...'.format(args.unsupervised_path)) serializers.load_npz(args.unsupervised_path, G) if args.gpu >= 0: cuda.get_device(args.gpu).use() # Make a specified GPU current G.to_gpu() # Copy the model to the GPU # Init image list if args.list_path: print('Load images from {} ...'.format(args.list_path)) with open(args.list_path) as f: test_list = [line.strip().split(';') for line in f] print('\t {} images in total ...\n'.format(len(test_list))) else: test_list = [(args.src_image, args.dst_image, args.mask_image)] if not args.blended_image: # Init result folder if not os.path.isdir(args.result_folder): os.makedirs(args.result_folder) print('Result will save to {} ...\n'.format(args.result_folder)) total_size = len(test_list) for idx in range(total_size): print('Processing {}/{} ...'.format(idx+1, total_size)) # load image obj = img_as_float(imread(test_list[idx][0])) bg = img_as_float(imread(test_list[idx][1])) mask = imread(test_list[idx][2]).astype(obj.dtype) blended_im = gp_gan(obj, bg, mask, G, args.image_size, args.gpu, color_weight=args.color_weight, sigma=args.sigma, gradient_kernel=args.gradient_kernel, smooth_sigma=args.smooth_sigma, supervised=args.supervised, nz=args.nz, n_iteration=args.n_iteration) if args.blended_image: imsave(args.blended_image, blended_im) else: imsave('{}/obj_{}_bg_{}_mask_{}.png'.format(args.result_folder, basename(test_list[idx][0]), basename(test_list[idx][1]), basename(test_list[idx][2])), blended_im)
def on_batch_end(self, batch, logs={}): if (self.iteration % self.increment == 0): output_img = self.model.predict(self.test_img)[0] fname = '%d.jpg' % self.iteration out_path = os.path.join(self.preview_dir_path, fname) imsave(out_path, output_img) self.iteration += 1 gen = create_gen(TRAIN_PATH, TARGET_SIZE, BATCH_SIZE) num_samples = count_num_samples(TRAIN_PATH) steps_per_epoch = num_samples // BATCH_SIZE target_layer = 1 encoder_decoder = EncoderDecoder(target_layer=target_layer) callbacks = [ OutputPreview( encoder_decoder, r'C:\Users\MillerV\Documents\Masters CS\Machine-Learning-Group\advancedML\lab2\monkey.jpg', 5000, './preview-%d' % target_layer) ] encoder_decoder.model.fit_generator(gen, steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=callbacks) encoder_decoder.export_decoder()
def train(train_loader, test_loader, gradient_clipping=1, hidden_state_size=10, lr=0.001, epochs=3000, is_prediction=False): model = EncoderDecoder(input_size=1, hidden_size=hidden_state_size, output_size=1, labels_num=1) if not is_prediction \ else EncoderDecoder(input_size=1, hidden_size=hidden_state_size, output_size=1, is_prediction=True, labels_num=1, is_snp=True) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr) loss_name = "mse" min_loss = float("inf") task_name = "classify" if is_prediction else "reconstruct" validation_losses = [] tensorboard_writer = init_writer(lr, is_prediction, hidden_state_size, epochs, task_name) for epoch in range(1, epochs): total_loss = 0 for batch_idx, (data, target) in enumerate(train_loader): data_sequential = (data.view(data.shape[0], data.shape[1], 1)).to(device) target = target.to(device) optimizer.zero_grad() if is_prediction: resconstucted_batch, batch_preds = model(data_sequential) batch_preds = batch_preds.view(batch_preds.shape[0], batch_preds.shape[1]) loss = model.loss(data_sequential, resconstucted_batch, target, batch_preds) else: resconstucted_batch = model(data_sequential) loss = model.loss(data_sequential, resconstucted_batch) total_loss += loss.item() loss.backward() if gradient_clipping: nn.utils.clip_grad_norm_(model.parameters(), max_norm=gradient_clipping) optimizer.step() epoch_loss = total_loss / len(train_loader) tensorboard_writer.add_scalar('train_loss', epoch_loss, epoch) print(f'Train Epoch: {epoch} \t loss: {epoch_loss}') validation_loss = validation(model, test_loader, validation_losses, device, is_prediction, tensorboard_writer, epoch) if epoch % 5 == 0 or validation_loss < min_loss: file_name = f"ae_s&p500_{loss_name}_lr={lr}_hidden_size={hidden_state_size}_epoch={epoch}_gradient_clipping={gradient_clipping}.pt" path = os.path.join(results_path, "saved_models", "s&p500_task", task_name, file_name) torch.save(model, path) min_loss = min(validation_loss, min_loss) plot_validation_loss(epochs, gradient_clipping, lr, loss_name, validation_losses, hidden_state_size, task_name)
def main(): parser = argparse.ArgumentParser(description='Train Blending GAN') parser.add_argument('--nef', type=int, default=64, help='number of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='number of base filters in decoder') parser.add_argument('--nc', type=int, default=3, help='number of output channels in decoder') parser.add_argument('--nBottleneck', type=int, default=4000, help='number of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='number of base filters in D') parser.add_argument('--lr_d', type=float, default=0.0002, help='Learning rate for Critic, default=0.0002') parser.add_argument('--lr_g', type=float, default=0.002, help='Learning rate for Generator, default=0.002') parser.add_argument('--beta1', type=float, default=0.5, help='Beta for Adam, default=0.5') parser.add_argument('--l2_weight', type=float, default=0.99, help='Weight for l2 loss, default=0.999') parser.add_argument('--train_steps', default=float("58000"), help='Max amount of training cycles') parser.add_argument('--batch_size', type=int, default=64, help='Input batch size') parser.add_argument('--data_root', default='DataBase/TransientAttributes/cropped_images', help='Path to dataset') parser.add_argument('--train_data_root', default='DataBase/TransientAttributes/train.tfrecords', help='Path to train dataset') parser.add_argument('--val_data_root', default='DataBase/TransientAttributes/val.tfrecords', help='Path to val dataset') parser.add_argument( '--image_size', type=int, default=64, help='The height / width of the network\'s input image') parser.add_argument( '--d_iters', type=int, default=5, help='# of discriminator iters per each generator iter') parser.add_argument('--clamp_lower', type=float, default=-0.01, help='Lower bound for weight clipping') parser.add_argument('--clamp_upper', type=float, default=0.01, help='Upper bound for weight clipping') parser.add_argument('--experiment', default='blending_gan', help='Where to store samples and models') parser.add_argument('--save_folder', default='GP-GAN_training', help='location to save') parser.add_argument('--tboard_save_dir', default='tensorboard', help='location to save tboard records') parser.add_argument('--val_freq', type=int, default=500, help='frequency of validation') parser.add_argument('--snapshot_interval', type=int, default=500, help='Interval of snapshot (steps)') parser.add_argument('--weights_path', type=str, default=None, help='path to checkpoint') args = parser.parse_args() print('Input arguments:') for key, value in vars(args).items(): print('\t{}: {}'.format(key, value)) print('') # Set up generator & discriminator print('Create & Init models ...') print('\tInit Generator network ...') generator = EncoderDecoder(encoder_filters=args.nef, encoded_dims=args.nBottleneck, output_channels=args.nc, decoder_filters=args.ngf, is_training=True, image_size=args.image_size, skip=False, scope_name='generator') #, conv_init=init_conv, generator_val = EncoderDecoder(encoder_filters=args.nef, encoded_dims=args.nBottleneck, output_channels=args.nc, decoder_filters=args.ngf, is_training=False, image_size=args.image_size, skip=False, scope_name='generator') print('\tInit Discriminator network ...') discriminator = DCGAN_D(image_size=args.image_size, encoded_dims=1, filters=args.ndf, is_training=True, scope_name='discriminator' ) #, conv_init=init_conv, bn_init=init_bn) # D discriminator_val = DCGAN_D(image_size=args.image_size, encoded_dims=1, filters=args.ndf, is_training=False, scope_name='discriminator') # Set up training graph with tf.device('/gpu:0'): train_dataset = DataFeeder(tfrecords_path=args.train_data_root, dataset_flag='train') composed_image, real_image = train_dataset.inputs( batch_size=args.batch_size, name='train_dataset') shape = composed_image.get_shape().as_list() composed_image.set_shape( [shape[0], args.image_size, args.image_size, shape[3]]) real_image.set_shape( [shape[0], args.image_size, args.image_size, shape[3]]) validation_dataset = DataFeeder(tfrecords_path=args.val_data_root, dataset_flag='val') composed_image_val, real_image_val = validation_dataset.inputs( batch_size=args.batch_size, name='val_dataset') composed_image_val.set_shape( [shape[0], args.image_size, args.image_size, shape[3]]) real_image_val.set_shape( [shape[0], args.image_size, args.image_size, shape[3]]) # Compute losses: # Train tensors fake = generator(composed_image) prob_disc_real = discriminator.encode(real_image) prob_disc_fake = discriminator.encode(fake) # Validation tensors fake_val = generator_val(composed_image) prob_disc_real_val = discriminator_val.encode(real_image) prob_disc_fake_val = discriminator_val.encode(fake) # Calculate losses gen_loss, l2_comp, disc_comp, fake_image_train = l2_generator_loss( fake=fake, target=real_image, prob_disc_fake=prob_disc_fake, l2_weight=args.l2_weight) disc_loss = discriminator_loss(prob_disc_real=prob_disc_real, prob_disc_fake=prob_disc_fake) gen_loss_val, _, _, fake_image_val = l2_generator_loss( fake=fake_val, target=real_image, prob_disc_fake=prob_disc_fake_val, l2_weight=args.l2_weight) disc_loss_val = discriminator_loss(prob_disc_real=prob_disc_real_val, prob_disc_fake=prob_disc_fake_val) # Set optimizers global_step = tf.Variable(0, name='global_step', trainable=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): discriminator_variables = [ v for v in tf.trainable_variables() if v.name.startswith("discriminator") ] generator_variables = [ v for v in tf.trainable_variables() if v.name.startswith("generator") ] optimizer_gen = tf.train.AdamOptimizer( learning_rate=args.lr_g, beta1=args.beta1).minimize(loss=gen_loss, global_step=global_step, var_list=generator_variables) optimizer_disc = tf.train.AdamOptimizer( learning_rate=args.lr_d, beta1=args.beta1).minimize(loss=disc_loss, global_step=global_step, var_list=discriminator_variables) with tf.name_scope("clip_weights"): clip_discriminator_var_op = [ var.assign( tf.clip_by_value(var, args.clamp_lower, args.clamp_upper)) for var in discriminator_variables ] # Set summaries for Tensorboard model_save_dir = os.path.join(args.save_folder, args.experiment) tboard_save_dir = os.path.join(model_save_dir, args.tboard_save_dir) os.makedirs(tboard_save_dir, exist_ok=True) sum_gen_train = tf.summary.scalar(name='train_gen_loss', tensor=gen_loss) sum_gen_disc_comp = tf.summary.scalar(name='train_gen_disc_component', tensor=disc_comp) sum_gen_l2_comp = tf.summary.scalar(name='train_gen_l2_component', tensor=l2_comp) sum_gen_val = tf.summary.scalar(name='val_gen_loss', tensor=gen_loss_val, collections='') sum_disc_train = tf.summary.scalar(name='train_disc_loss', tensor=disc_loss) sum_disc_val = tf.summary.scalar(name='val_disc_loss', tensor=disc_loss_val) sum_fake_image_train = tf.summary.image(name='train_image_generated', tensor=fake_image_train) sum_fake_image_val = tf.summary.image(name='val_image_generated', tensor=fake_image_val) sum_disc_real = tf.summary.scalar(name='train_disc_value_real', tensor=tf.reduce_mean(prob_disc_real)) sum_disc_fake = tf.summary.scalar(name='train_disc_value_fake', tensor=tf.reduce_mean(prob_disc_fake)) sum_composed = tf.summary.image(name='composed', tensor=composed_image) sum_real = tf.summary.image(name='real', tensor=real_image) train_merge = tf.summary.merge([ sum_gen_train, sum_fake_image_train, sum_disc_train, sum_composed, sum_real, sum_gen_disc_comp, sum_gen_l2_comp, sum_disc_real, sum_disc_fake ]) # Set saver configuration loader = tf.train.Saver() saver = tf.train.Saver() os.makedirs(model_save_dir, exist_ok=True) train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) model_name = 'GP-GAN_{:s}.ckpt'.format(str(train_start_time)) model_save_path = os.path.join(model_save_dir, model_name) # Set sess configuration sess_config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=sess_config) # Write graph to tensorboard summary_writer = tf.summary.FileWriter(tboard_save_dir) summary_writer.add_graph(sess.graph) # Set the training parameters with sess.as_default(): step = 0 cycle = 0 if args.weights_path is None: print('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: print('Restore model from {:s}'.format(args.weights_path)) loader.restore(sess=sess, save_path=args.weights_path) step_cycle = args.weights_path.split('ckpt-')[-1] step, cycle = decode_step_cycle(step_cycle) gen_train_loss = '?' while cycle <= args.train_steps: # (1) Update discriminator network # train the discriminator Diters times if cycle < 25 or cycle % 500 == 0: Diters = 100 else: Diters = args.d_iters for _ in range(Diters): # enforce Lipschitz constraint sess.run(clip_discriminator_var_op) _, disc_train_loss = sess.run([optimizer_disc, disc_loss]) print('Step: ' + str(step) + ' Cycle: ' + str(cycle) + ' Train discriminator loss: ' + str(disc_train_loss) + ' Train generator loss: ' + str(gen_train_loss)) step += 1 # (2) Update generator network _, gen_train_loss, train_merge_value = sess.run( [optimizer_gen, gen_loss, train_merge]) summary_writer.add_summary(summary=train_merge_value, global_step=cycle) if cycle != 0 and cycle % args.val_freq == 0: _, disc_val_loss, gen_val_value, fake_image_val_value = sess.run( [ optimizer_disc, gen_loss_val, sum_gen_val, sum_fake_image_val ]) _, gen_val_loss, disc_val_value = sess.run( [optimizer_gen, disc_loss_val, sum_disc_val]) print('Step: ' + str(step) + ' Cycle: ' + str(cycle) + ' Val discriminator loss: ' + str(disc_val_loss) + ' Val generator loss: ' + str(gen_val_loss)) summary_writer.add_summary(summary=gen_val_value, global_step=cycle) summary_writer.add_summary(summary=disc_val_value, global_step=cycle) summary_writer.add_summary(summary=fake_image_val_value, global_step=cycle) if cycle != 0 and cycle % args.snapshot_interval == 0: saver.save(sess=sess, save_path=model_save_path, global_step=encode_step_cycle(step, cycle)) cycle += 1
def main(): parser = argparse.ArgumentParser( description='Gaussian-Poisson GAN for high-resolution image blending') parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder or G') parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder or G') parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') parser.add_argument( '--image_size', type=int, default=64, help='The height / width of the input image to network') parser.add_argument('--color_weight', type=float, default=1, help='Color weight') parser.add_argument( '--sigma', type=float, default=0.5, help='Sigma for gaussian smooth of Gaussian-Poisson Equation') parser.add_argument('--gradient_kernel', type=str, default='normal', help='Kernel type for calc gradient') parser.add_argument('--smooth_sigma', type=float, default=1, help='Sigma for gaussian smooth of Laplacian pyramid') parser.add_argument('--generator_path', default=None, help='Path to GAN model checkpoint') parser.add_argument( '--list_path', default='', help= 'File for input list in csv format: obj_path;bg_path;mask_path in each line' ) parser.add_argument('--result_folder', default='blending_result', help='Name for folder storing results') parser.add_argument('--src_image', default='DataBase/test_images/src.jpg', help='Path for source image') parser.add_argument('--dst_image', default='DataBase/test_images/dst.jpg', help='Path for destination image') parser.add_argument('--mask_image', default='DataBase/test_images/mask.png', help='Path for mask image') parser.add_argument('--blended_image', default='DataBase/test_images/result2.jpg', help='Where to save blended image') args = parser.parse_args() print('Input arguments:') for key, value in vars(args).items(): print('\t{}: {}'.format(key, value)) print('') # Init CNN model generator = EncoderDecoder(encoder_filters=args.nef, encoded_dims=args.nBottleneck, output_channels=args.nc, decoder_filters=args.ngf, is_training=False, image_size=args.image_size, scope_name='generator') inputdata = tf.placeholder( dtype=tf.float32, shape=[1, args.image_size, args.image_size, args.nc], name='input') gan_im_tens = generator(inputdata) loader = tf.train.Saver(tf.all_variables()) sess = tf.Session() with sess.as_default(): loader.restore(sess=sess, save_path=args.generator_path) # Init image list if args.list_path: print('Load images from {} ...'.format(args.list_path)) with open(args.list_path) as f: test_list = [line.strip().split(';') for line in f] print('\t {} images in total ...\n'.format(len(test_list))) else: test_list = [(args.src_image, args.dst_image, args.mask_image)] if not args.blended_image: # Init result folder if not os.path.isdir(args.result_folder): os.makedirs(args.result_folder) print('Result will save to {} ...\n'.format(args.result_folder)) total_size = len(test_list) for idx in range(total_size): print('Processing {}/{} ...'.format(idx + 1, total_size)) # load image obj = cv2.cvtColor(cv2.imread(test_list[idx][0], 1), cv2.COLOR_BGR2RGB) / 255 bg = cv2.cvtColor(cv2.imread(test_list[idx][1], 1), cv2.COLOR_BGR2RGB) / 255 mask = cv2.imread(test_list[idx][2], 0).astype(obj.dtype) blended_im = gp_gan(obj, bg, mask, gan_im_tens, inputdata, sess, args.image_size, color_weight=args.color_weight, sigma=args.sigma, gradient_kernel=args.gradient_kernel, smooth_sigma=args.smooth_sigma) if args.blended_image: cv2.imwrite(args.blended_image, cv2.cvtColor(blended_im, cv2.COLOR_RGB2BGR)) else: cv2.imwrite( '{}/obj_{}_bg_{}_mask_{}.png'.format( args.result_folder, basename(test_list[idx][0]), basename(test_list[idx][1]), basename(test_list[idx][2])), blended_im)
from keras.models import load_model from keras.preprocessing import image import numpy as np from scipy.misc import imsave from model import EncoderDecoder import sys DECODER_PATH = sys.argv[1] #'./decoder_3.h5' INPUT_IMG_PATH = '../doge-256.jpg' OUTPUT_IMG_PATH = './doge-decoded.jpg' encoder_decoder = EncoderDecoder(decoder_path=DECODER_PATH, target_layer=int(sys.argv[2])) input_img = image.load_img(INPUT_IMG_PATH) input_img = image.img_to_array(input_img) input_img = np.expand_dims(input_img, axis=0) output_img = encoder_decoder.model.predict([input_img])[0] imsave(OUTPUT_IMG_PATH, output_img)
def train(config): # --------- configurations --------- # batch_size = config['batch_size'] save_path = config['save'] # path to store model saved_model = config['load'] # None or path if not os.path.exists(save_path): os.makedirs(save_path) # ---------------------------------- # write_config(save_path + '/config.txt', config) # random seed random.seed(config['random_seed']) # np.random.seed(config['random_seed']) batches, vocab_size, src_word2id, tgt_word2id = construct_training_data_batches( config) tgt_id2word = list(tgt_word2id.keys()) params = { 'vocab_src_size': vocab_size['src'], 'vocab_tgt_size': vocab_size['tgt'], 'go_id': tgt_word2id['<go>'], 'eos_id': tgt_word2id['</s>'] } model = EncoderDecoder(config, params) model.build_network() learning_rate = config['learning_rate'] decay_rate = config['decay_rate'] tf_variables = tf.trainable_variables() for i in range(len(tf_variables)): print(tf_variables[i]) # save & restore model saver = tf.train.Saver(max_to_keep=1) if config['use_gpu']: if 'X_SGE_CUDA_DEVICE' in os.environ: print('running on the stack...') cuda_device = os.environ['X_SGE_CUDA_DEVICE'] print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device)) os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device else: # development only e.g. air202 print('running locally...') os.environ[ 'CUDA_VISIBLE_DEVICES'] = '1' # choose the device (GPU) here sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True # Whether the GPU memory usage can grow dynamically. sess_config.gpu_options.per_process_gpu_memory_fraction = 0.95 # The fraction of GPU memory that the process can use. else: os.environ['CUDA_VISIBLE_DEVICES'] = '' sess_config = tf.ConfigProto() with tf.Session(config=sess_config) as sess: if saved_model == None: sess.run(tf.global_variables_initializer()) # ------------ load pre-trained embeddings ------------ # if config['load_embedding_src'] != None: src_embedding = sess.run(model.src_word_embeddings) src_embedding_matrix = load_pretrained_embedding( src_word2id, src_embedding, config['load_embedding_src']) sess.run( model.src_word_embeddings.assign(src_embedding_matrix)) if config['load_embedding_tgt'] != None: if config['load_embedding_tgt'] == config[ 'load_embedding_src']: sess.run( model.tgt_word_embeddings.assign(src_embedding_matrix)) else: tgt_embedding = sess.run(model.tgt_word_embeddings) tgt_embedding_matrix = load_pretrained_embedding( tgt_word2id, tgt_embedding, config['load_embedding_tgt']) sess.run( model.tgt_word_embeddings.assign(tgt_embedding_matrix)) # ----------------------------------------------------- # else: new_saver = tf.train.import_meta_graph(saved_model + '.meta') new_saver.restore(sess, saved_model) print('loaded model...', saved_model) # ------------ TensorBoard ------------ # # summary_writer = tf.summary.FileWriter(save_path + '/tfboard/', graph_def=sess.graph_def) # ------------------------------------- # # ------------ To print out some output -------------------- # my_sentences = [ 'this is test . </s>', 'this is confirm my reservation at hotel . </s>', 'playing tennis good for you . </s>', 'when talking about successful longterm business relationships customer services are important element </s>' ] my_sent_ids = [] for my_sentence in my_sentences: ids = [] for word in my_sentence.split(): if word in src_word2id: ids.append(src_word2id[word]) else: ids.append(src_word2id['<unk>']) my_sent_ids.append(ids) my_sent_len = [len(my_sent) for my_sent in my_sent_ids] my_sent_ids = [ ids + [src_word2id['</s>']] * (config['max_sentence_length'] - len(ids)) for ids in my_sent_ids ] infer_dict = { model.src_word_ids: my_sent_ids, model.src_sentence_lengths: my_sent_len, model.dropout: 0.0, model.learning_rate: learning_rate } # ---------------------------------------------------------- # num_epochs = config['num_epochs'] for epoch in range(num_epochs): print("num_batches = ", len(batches)) random.shuffle(batches) epoch_loss = 0 for i, batch in enumerate(batches): feed_dict = { model.src_word_ids: batch['src_word_ids'], model.tgt_word_ids: batch['tgt_word_ids'], model.src_sentence_lengths: batch['src_sentence_lengths'], model.tgt_sentence_lengths: batch['tgt_sentence_lengths'], model.dropout: config['dropout'], model.learning_rate: learning_rate } [_, loss] = sess.run([model.train_op, model.train_loss], feed_dict=feed_dict) epoch_loss += loss if i % 100 == 0: # to print out training status # if config['decoding_method'] != 'beamsearch': # [train_loss, infer_loss] = sess.run([model.train_loss, model.infer_loss], feed_dict=feed_dict) # print("batch: {} --- train_loss: {:.5f} | inf_loss: {:.5f}".format(i, train_loss, infer_loss)) # else: # --- beam search --- # # [train_loss] = sess.run([model.train_loss], feed_dict=feed_dict) # print("BEAMSEARCH - batch: {} --- train_loss: {:.5f}".format(i, train_loss)) print("batch: {} --- avg train loss: {:.5f}".format( i, epoch_loss / (i + 1))) sys.stdout.flush() if i % 500 == 0: [my_translations] = sess.run([model.translations], feed_dict=infer_dict) # pdb.set_trace() for my_sent in my_translations: my_words = [tgt_id2word[id] for id in my_sent] print(' '.join(my_words)) model.increment_counter() learning_rate *= decay_rate print("---------------------------------------------------") print("epoch {} done".format(epoch + 1)) print("total training loss = {}".format(epoch_loss)) print("---------------------------------------------------") if math.isnan(epoch_loss): print("stop training - loss/gradient exploded") break saver.save(sess, save_path + '/model', global_step=epoch)
if __name__ == '__main__': preparation() train_data_loader = non_pair_data_loader(args.batch_size) train_data_loader.create_batches(args.train_file_list, args.train_label_list, if_shuffle=True) # create models ae_model = get_cuda( EncoderDecoder( vocab_size=args.vocab_size, embedding_size=args.embedding_size, hidden_size=args.hidden_size, num_layers=args.num_layers_AE, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, sos_idx=args.id_bos, eos_idx=args.id_eos, pad_idx=args.id_pad, unk_idx=args.id_unk, max_sequence_length=args.max_sequence_length, rnn_type=args.rnn_type, bidirectional=True, )) train_iters(ae_model, train_data_loader) print("Done!")
def main(): parser = argparse.ArgumentParser() parser.add_argument('-data', help="Path to ar2en dataset.", default='./ar2en_dataset') parser.add_argument('-embeddings_size', type=int, default=300) parser.add_argument('-layers', type=int, default=2) parser.add_argument('-hidden_sizes', type=int, default=300) parser.add_argument('-dropout', type=float, default=0.1) parser.add_argument('-epochs', type=int, default=20) parser.add_argument('-optimizer', choices=['sgd', 'adam'], default='adam') parser.add_argument('-learning_rate', type=float, default=0.001) parser.add_argument('-l2_decay', type=float, default=0.0) parser.add_argument('-batch_size', type=int, default=64) parser.add_argument( '-cuda', action='store_true', help= 'Whether or not to use cuda for parallelization (if devices available)' ) parser.add_argument('-name', type=str, required=False, default=None, help="Filename for the plot") parser.add_argument('-quiet', action='store_true', help='No execution output.') parser.add_argument( '-tqdm', action='store_true', help='Whether or not to use TQDM progress bar in training.') parser.add_argument( '-display_vocabularies', action="store_true", help="Only display the vocabularies (no further execution).") parser.add_argument( '-reverse_source_string', action="store_true", help="Whether or not to reverse the source arabic string.") parser.add_argument( '-bidirectional', action="store_true", help="Whether or not to use a bidirectional encoder LSTM.") parser.add_argument('-attention', type=str, choices=["dot", "general"], required=False, default=None, help="Attention mechanism in the decoder.") opt = parser.parse_args() # ############# # # 1 - Load Data # # ############# # dataset = Ar2EnDataset(opt.data, opt.reverse_source_string) if opt.display_vocabularies: sys.exit(0) dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True) X_dev, y_dev = dataset.X_dev, dataset.y_dev X_test, y_test = dataset.X_test, dataset.y_test # ################ # # 2 - Create Model # # ################ # device = torch.device( "cuda:0" if torch.cuda.is_available() and opt.cuda else "cpu") if not opt.quiet: print(f"Using device '{device}'", flush=True) model = EncoderDecoder(dataset.n_inputs, dataset.n_outputs, opt.embeddings_size, opt.attention, opt.bidirectional, opt.hidden_sizes, opt.layers, opt.dropout, dataset.arabic_vocabulary, dataset.english_vocabulary, device) # ############# # # 3 - Optimizer # # ############# # optimizer = { "adam": torch.optim.Adam, "sgd": torch.optim.SGD }[opt.optimizer](model.parameters(), lr=opt.learning_rate, weight_decay=opt.l2_decay) criterion = nn.CrossEntropyLoss( ignore_index=dataset.english_vocabulary["$PAD"]) # ###################### # # 4 - Train and Evaluate # # ###################### # epochs = torch.arange(1, opt.epochs + 1) train_mean_losses = [] val_word_acc = [] val_char_acc = [] train_losses = [] for epoch in epochs: if not opt.quiet: print('\nTraining epoch {}'.format(epoch), flush=True) if opt.tqdm: from tqdm import tqdm dataloader = tqdm(dataloader) for X_batch, y_batch in dataloader: loss = train_batch(X_batch, y_batch, model, optimizer, criterion) train_losses.append(loss) mean_loss = torch.tensor(train_losses).mean().item() word_acc, char_acc = evaluate(model, X_dev, y_dev) train_mean_losses.append(mean_loss) val_word_acc.append(word_acc) val_char_acc.append(char_acc) if not opt.quiet: print('Training loss: %.4f' % mean_loss, flush=True) print('Valid word acc: %.4f' % val_word_acc[-1], flush=True) print('Valid char acc: %.4f' % val_char_acc[-1], flush=True) final_test_accuracy_words, final_test_accuracy_chars = evaluate( model, X_test, y_test) if not opt.quiet: print('\nFinal Test Word Acc: %.4f' % final_test_accuracy_words, flush=True) print('Final Test Char Acc: %.4f' % final_test_accuracy_chars, flush=True) # ######## # # 5 - Plot # # ######## # name = opt.name if opt.name is not None else "encoder_decoder" plot(epochs, train_mean_losses, ylabel='Loss', name=name + "_loss", title="Training Loss") plot( epochs, val_word_acc, ylabel='Word Val Acc', name=name + "_acc", title= f"Word Validation Accuracy\n(Final Word Test Accuracy: {round(final_test_accuracy_words,3)})" ) return final_test_accuracy_words
use_task = False task_dim = 15 train_split = [i for i in range(400)] # train_split = [0] # num_epochs = 15000 dataset = JIGSAWSegmentsDataset(dataset_path, dataset_tasks) dataloader = JIGSAWSegmentsDataloader(batch_size, input_length, output_length, dataset, scale=scale) model = EncoderDecoder(src_vocab, tgt_vocab, N=num_layers, input_size=feature_dim, hidden_layer=hidden_layer, h=num_heads, dropout=dropout, task_dim=task_dim) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=betas, eps=eps) running_loss_plot = train_epochs(dataloader, train_split, model, loss_function, optimizer, n_epochs=num_epochs, use_gpu=use_gpu, use_task=use_task)
def main(): parser = argparse.ArgumentParser(description='Train Blending GAN') parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder') parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder') parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') parser.add_argument('--lr_d', type=float, default=0.0002, help='Learning rate for Critic, default=0.0002') parser.add_argument('--lr_g', type=float, default=0.002, help='Learning rate for Generator, default=0.002') parser.add_argument('--beta1', type=float, default=0.5, help='Beta for Adam, default=0.5') parser.add_argument('--l2_weight', type=float, default=0.999, help='Weight for l2 loss, default=0.999') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--n_epoch', type=int, default=25, help='# of epochs to train for') parser.add_argument('--data_root', help='Path to dataset') parser.add_argument('--load_size', type=int, default=64, help='Scale image to load_size') parser.add_argument( '--image_size', type=int, default=64, help='The height / width of the input image to network') parser.add_argument('--ratio', type=float, default=0.5, help='Ratio for center square size v.s. image_size') parser.add_argument('--val_ratio', type=float, default=0.05, help='Ratio for validation set v.s. data set') parser.add_argument('--d_iters', type=int, default=5, help='# of D iters per each G iter') parser.add_argument('--clamp_lower', type=float, default=-0.01, help='Lower bound for clipping') parser.add_argument('--clamp_upper', type=float, default=0.01, help='Upper bound for clipping') parser.add_argument('--experiment', default='encoder_decoder_blending_result', help='Where to store samples and models') parser.add_argument('--test_folder', default='samples', help='Where to store test results') parser.add_argument('--workers', type=int, default=10, help='# of data loading workers') parser.add_argument('--batch_size', type=int, default=64, help='Input batch size') parser.add_argument('--test_size', type=int, default=64, help='Batch size for testing') parser.add_argument('--train_samples', type=int, default=150000, help='# of training examples') parser.add_argument('--test_samples', type=int, default=256, help='# of testing examples') parser.add_argument('--manual_seed', type=int, default=5, help='Manul seed') parser.add_argument('--resume', default='', help='Resume the training from snapshot') parser.add_argument('--snapshot_interval', type=int, default=1, help='Interval of snapshot (epochs)') parser.add_argument('--print_interval', type=int, default=1, help='Interval of printing log to console (iteration)') parser.add_argument('--plot_interval', type=int, default=10, help='Interval of plot (iteration)') args = parser.parse_args() random.seed(args.manual_seed) print('Input arguments:') for key, value in vars(args).items(): print('\t{}: {}'.format(key, value)) print('') # Set up G & D print('Create & Init models ...') print('\tInit G network ...') G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size, conv_init=init_conv, bn_init=init_bn) print('\tInit D network ...') D = DCGAN_D(args.image_size, args.ndf, conv_init=init_conv, bn_init=init_bn) if args.gpu >= 0: print('\tCopy models to gpu {} ...'.format(args.gpu)) chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current G.to_gpu() # Copy the model to the GPU D.to_gpu() print('Init models done ...\n') # Setup an optimizer optimizer_d = make_optimizer(D, args.lr_d, args.beta1) optimizer_g = make_optimizer(G, args.lr_g, args.beta1) ######################################################################################################################## # Setup dataset & iterator print('Load images from {} ...'.format(args.data_root)) folders = sorted([ folder for folder in os.listdir(args.data_root) if os.path.isdir(os.path.join(args.data_root, folder)) ]) val_end = int(args.val_ratio * len(folders)) print('\t{} folders in total, {} val folders ...'.format( len(folders), val_end)) trainset = BlendingDataset(args.train_samples, folders[val_end:], args.data_root, args.ratio, args.load_size, args.image_size) valset = BlendingDataset(args.test_samples, folders[:val_end], args.data_root, args.ratio, args.load_size, args.image_size) print('\tTrainset contains {} image files'.format(len(trainset))) print('\tValset contains {} image files'.format(len(valset))) print('') train_iter = chainer.iterators.MultiprocessIterator( trainset, args.batch_size, n_processes=args.workers, n_prefetch=args.workers) ######################################################################################################################## # Set up a trainer updater = EncoderDecoderBlendingUpdater(models=(G, D), args=args, iterator=train_iter, optimizer={ 'main': optimizer_g, 'D': optimizer_d }, device=args.gpu) trainer = training.Trainer(updater, (args.n_epoch, 'epoch'), out=args.experiment) # Snapshot snapshot_interval = (args.snapshot_interval, 'epoch') trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(G, 'g_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(D, 'd_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) # Display print_interval = (args.print_interval, 'iteration') trainer.extend(extensions.LogReport(trigger=print_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'main/loss', 'D/loss', 'main/l2_loss']), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=args.print_interval)) trainer.extend(extensions.dump_graph('D/loss', out_name='TrainGraph.dot')) # Plot plot_interval = (args.plot_interval, 'iteration') trainer.extend(extensions.PlotReport(['main/loss'], 'iteration', file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(extensions.PlotReport(['D/loss'], 'iteration', file_name='d_loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(extensions.PlotReport(['main/l2_loss'], 'iteration', file_name='l2_loss.png', trigger=plot_interval), trigger=plot_interval) # Eval path = os.path.join(args.experiment, args.test_folder) if not os.path.isdir(path): os.makedirs(path) print('Saving samples to {} ...\n'.format(path)) train_batch = [trainset[idx][0] for idx in range(args.test_size)] train_v = Variable(chainer.dataset.concat_examples(train_batch, args.gpu), volatile='on') trainer.extend(sampler(G, path, train_v, 'fake_samples_train_{}.png'), trigger=plot_interval) val_batch = [valset[idx][0] for idx in range(args.test_size)] val_v = Variable(chainer.dataset.concat_examples(val_batch, args.gpu), volatile='on') trainer.extend(sampler(G, path, val_v, 'fake_samples_val_{}.png'), trigger=plot_interval) if args.resume: # Resume from a snapshot print('Resume from {} ... \n'.format(args.resume)) chainer.serializers.load_npz(args.resume, trainer) # Run the training print('Training start ...\n') trainer.run()
from keras.models import load_model from keras.preprocessing import image import numpy as np from scipy.misc import imsave from model import EncoderDecoder DECODER_PATH = './models/decoder_5.h5' INPUT_IMG_PATH = './doge-256.jpg' OUTPUT_IMG_PATH = './doge-decoded.jpg' encoder_decoder = EncoderDecoder(decoder_path=DECODER_PATH) input_img = image.load_img(INPUT_IMG_PATH) input_img = image.img_to_array(input_img) input_img = np.expand_dims(input_img, axis=0) output_img = encoder_decoder.model.predict([input_img])[0] imsave(OUTPUT_IMG_PATH, output_img)
def train(train_loader, test_loader, gradient_clipping=1, hidden_state_size=10, lr=0.001, epochs=100, classify=True): model = EncoderDecoder(input_size=28, hidden_size=hidden_state_size, output_size=28, labels_num=10) if not classify \ else EncoderDecoder(input_size=28, hidden_size=hidden_state_size, output_size=28, is_prediction=True, labels_num=10) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr) loss_name = "mse" min_loss = float("inf") task_name = "classify" if classify else "reconstruct" validation_losses = [] validation_accuracies = [] tensorboard_writer = init_writer(results_path, lr, classify, hidden_state_size, epochs) for epoch in range(1, epochs): total_loss = 0 total_batches = 0 for batch_idx, (data, target) in enumerate(train_loader): data = data.to(device) target = target.to(device) # data_sequential = data # turn each image to vector sized 784 data_sequential = data.view(data.shape[0], 28, 28) optimizer.zero_grad() if classify: resconstucted_batch, batch_pred_probs = model(data_sequential) loss = model.loss(data_sequential, resconstucted_batch, target, batch_pred_probs) else: resconstucted_batch = model(data_sequential) loss = model.loss(data_sequential, resconstucted_batch) total_loss += loss.item() loss.backward() if gradient_clipping: nn.utils.clip_grad_norm_(model.parameters(), max_norm=gradient_clipping) optimizer.step() total_batches += 1 epoch_loss = total_loss / total_batches tensorboard_writer.add_scalar('train_loss', epoch_loss, epoch) print(f'Train Epoch: {epoch} \t loss: {epoch_loss}') validation_loss = validation(model, test_loader, validation_losses, device, classify, validation_accuracies, tensorboard_writer, epoch) model.train() if epoch % 5 == 0 or validation_loss < min_loss: file_name = f"ae_toy_{loss_name}_lr={lr}_hidden_size={hidden_state_size}_epoch={epoch}_gradient_clipping={gradient_clipping}.pt" path = os.path.join(results_path, "saved_models", "MNIST_task", task_name, file_name) torch.save(model, path) min_loss = min(validation_loss, min_loss) plot_validation_loss(epochs, gradient_clipping, lr, loss_name, validation_losses, hidden_state_size, task_name) if classify: plot_validation_acc(epochs, gradient_clipping, lr, loss_name, validation_accuracies, hidden_state_size, task_name)
def main(): # Check if cuda is available if torch.cuda.is_available(): device = "cuda" else: device = "cpu" transform = transforms.Compose([transforms.ToTensor()]) # Create the train test split # Automatically download if missing train_data = datasets.MNIST(root="data", train=True, transform=transform, download=True) val_data = datasets.MNIST(root="data", train=False, transform=transform, download=True) # Create dataloaders train_loader = DataLoader(train_data, batch_size=32, shuffle=True) val_loader = DataLoader(val_data, batch_size=32, shuffle=False) # Define model, loss function and optimizer net = EncoderDecoder() net.to(device) epochs=10 optimizer = Adam(net.parameters(), lr=0.001, weight_decay=1e-7) loss_fn = MSELoss(reduction="mean") # Training loop for i in range(epochs): # print(i) print("Epoch {}/{}".format(i + 1, epochs)) epoch_loss = [] counter = 0 for imgs, labels in train_loader: imgs = imgs.to(device) labels = labels.to(device) imgs = imgs.reshape(imgs.shape[0], -1) # print(imgs.device) # print(labels.device) counter += 1 # print(features.shape) # print(labels) # print(labels.dtype) y_pred = net(imgs) # print(y_pred.dtype) # print(y_pred) loss = loss_fn(imgs, y_pred) epoch_loss.append(loss.item()) # with torch.no_grad(): # acc = accuracy_score(labels.view(-1).cpu(), y_pred.view(-1).cpu()) print("{}/{}. Train Loss: {:.2f}".format(counter, len(train_data)//32, loss.item()), end="\r") # print(loss.dtype) loss.backward() optimizer.step() optimizer.zero_grad() epoch_loss = np.array(epoch_loss) print() print("Checking val loss") val_loss = [] counter = 0 for imgs, labels in val_loader: imgs = imgs.to(device) labels = labels.to(device) imgs = imgs.reshape(imgs.shape[0], -1) counter += 1 # print(features.shape) # print(labels) # print(labels.dtype) with torch.no_grad(): y_pred = net(imgs) loss = loss_fn(imgs, y_pred) val_loss.append(loss.item()) print("{}/{}. Train Loss: {:.2f}".format(counter, len(val_data)//32, loss.item()), end="\r") print() val_loss = np.array(val_loss) print("Training loss epoch: {:.2f}\tValidation loss: {:.2f}".format(epoch_loss.mean(), val_loss.mean())) # Save model torch.save(net, "model.pth")
def __init__(self): self.value = 1 # create and instance of the server attached to some port self.server = TCPServer("localhost", 9999) # start it listening in a separate control thread self.server_thread = Thread(target=self.server.serve_forever) self.server_thread.start() self.stop = False parser = argparse.ArgumentParser( description= 'Gaussian-Poisson GAN for high-resolution image blending') parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder or G') parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder or G') parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') parser.add_argument( '--image_size', type=int, default=64, help='The height / width of the input image to network') parser.add_argument('--color_weight', type=float, default=0.2, help='Color weight') parser.add_argument( '--sigma', type=float, default=0.5, help='Sigma for gaussian smooth of Gaussian-Poisson Equation') parser.add_argument('--gradient_kernel', type=str, default='normal', help='Kernel type for calc gradient') parser.add_argument( '--smooth_sigma', type=float, default=1, help='Sigma for gaussian smooth of Laplacian pyramid') parser.add_argument('--supervised', type=lambda x: x == 'True', default=True, help='Use unsupervised Blending GAN if False') parser.add_argument('--nz', type=int, default=200, help='Size of the latent z vector') parser.add_argument('--n_iteration', type=int, default=1500, help='# of iterations for optimizing z') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--g_path', default='../models/blending_gan.npz', help='Path for pretrained Blending GAN model') parser.add_argument( '--unsupervised_path', default='../models/unsupervised_blending_gan.npz', help='Path for pretrained unsupervised Blending GAN model') parser.add_argument( '--list_path', default='', help= 'File for input list in csv format: obj_path;bg_path;mask_path in each line' ) parser.add_argument('--result_folder', default='blending_result', help='Name for folder storing results') parser.add_argument('--src_image', default='', help='Path for source image') parser.add_argument('--dst_image', default='', help='Path for destination image') parser.add_argument('--mask_image', default='', help='Path for mask image') parser.add_argument('--blended_image', default='', help='Where to save blended image') parser.add_argument('--car_type', default='rangerover', help='specify the car type') args = parser.parse_args() self.nef = args.nef self.ngf = args.ngf self.nc = args.nc self.nBottleneck = args.nBottleneck self.ndf = args.ndf self.image_size = args.image_size self.color_weight = args.color_weight self.sigma = args.sigma self.gradient_kernel = args.gradient_kernel self.smooth_sigma = args.smooth_sigma self.supervised = args.supervised self.nz = args.nz self.n_iteration = args.n_iteration self.gpu = args.gpu self.g_path = args.g_path self.unsupervised_path = args.unsupervised_path self.list_path = args.list_path self.result_folder = args.result_folder self.src_image = args.src_image self.dst_image = args.dst_image self.mask_image = args.mask_image self.blended_image = args.blended_image print('Input arguments:') for key, value in vars(args).items(): print('\t{}: {}'.format(key, value)) print('') # Init CNN model if args.supervised: self.G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size) print('Load pretrained Blending GAN model from {} ...'.format( args.g_path)) serializers.load_npz(args.g_path, self.G) else: chainer.config.use_cudnn = 'never' self.G = DCGAN_G(args.image_size, args.nc, args.ngf) print( 'Load pretrained unsupervised Blending GAN model from {} ...'. format(args.unsupervised_path)) serializers.load_npz(args.unsupervised_path, self.G) if args.gpu >= 0: cuda.get_device(args.gpu).use() # Make a specified GPU current self.G.to_gpu() # Copy the model to the GPU
def train(train_loader, validate_data, device, gradient_clipping=1, hidden_state_size=10, lr=0.001, opt="adam", epochs=1000, batch_size=32): model = EncoderDecoder(1, hidden_state_size, 1, 50).to(device) validate_data = validate_data.to(device) if (opt == "adam"): optimizer = torch.optim.Adam(model.parameters(), lr=lr) else: optimizer = torch.optim.RMSprop(model.parameters(), lr=lr) optimizer_name = 'adam' if 'adam' in str(optimizer).lower() else 'mse' mse = nn.MSELoss() min_loss = float("inf") best_loss_global = float("inf") min_in, min_out = None, None validation_losses = [] for epoch in range(0, epochs): total_loss = 0 for batch_idx, data in enumerate(train_loader): data = data.to(device) optimizer.zero_grad() output = model(data) loss = mse(output, data) total_loss += loss.item() if loss.item() < min_loss: min_loss = loss.item() min_in, min_out = data, output loss.backward() if gradient_clipping: nn.utils.clip_grad_norm_(model.parameters(), max_norm=gradient_clipping) optimizer.step() epoch_loss = total_loss / len(train_loader) best_loss_global = min(best_loss_global, epoch_loss) print(f'Train Epoch: {epoch} \t loss: {epoch_loss}') if epoch % 100 == 0: path = f'{results_path}saved_models/ae_toy_{optimizer_name}_lr={lr}_hidden_size={hidden_state_size}_' \ f'_gradient_clipping={gradient_clipping}_' create_folders(path) torch.save(model, path + f"/epoch={epoch}_bestloss={best_loss_global}.pt") # run validation if epoch % 20 == 0: model.eval() mse.eval() output = model(validate_data) loss = mse(output, validate_data) # print("Accuracy: {:.4f}".format(acc)) validation_losses.append(loss.item()) mse.train() model.train() plot_sequence_examples(epochs, gradient_clipping, lr, min_in, min_out, optimizer_name, batch_size) plot_validation_loss(epochs, gradient_clipping, lr, optimizer_name, validation_losses, batch_size)
def main(): parser = argparse.ArgumentParser(description='Gaussian-Poisson GAN for high-resolution image blending') parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder or G') parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder or G') parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') parser.add_argument('--image_size', type=int, default=64, help='The height / width of the input image to network') parser.add_argument('--color_weight', type=float, default=0.2, help='Color weight') parser.add_argument('--sigma', type=float, default=0.5, help='Sigma for gaussian smooth of Gaussian-Poisson Equation') parser.add_argument('--gradient_kernel', type=str, default='normal', help='Kernel type for calc gradient') parser.add_argument('--smooth_sigma', type=float, default=1, help='Sigma for gaussian smooth of Laplacian pyramid') parser.add_argument('--supervised', type=lambda x: x == 'True', default=True, help='Use unsupervised Blending GAN if False') parser.add_argument('--nz', type=int, default=200, help='Size of the latent z vector') parser.add_argument('--n_iteration', type=int, default=1500, help='# of iterations for optimizing z') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--g_path', default='../models/blending_gan.npz', help='Path for pretrained Blending GAN model') parser.add_argument('--unsupervised_path', default='../models/unsupervised_blending_gan.npz', help='Path for pretrained unsupervised Blending GAN model') parser.add_argument('--list_path', default='', help='File for input list in csv format: obj_path;bg_path;mask_path in each line') parser.add_argument('--result_folder', default='blending_result', help='Name for folder storing results') parser.add_argument('--src_image', default='', help='Path for source image') parser.add_argument('--dst_image', default='', help='Path for destination image') parser.add_argument('--mask_image', default='', help='Path for mask image') parser.add_argument('--blended_image', default='', help='Where to save blended image') parser.add_argument('--car_type', default='rangerover', help='specify the car type') args = parser.parse_args() for key, value in vars(args).items(): print('\t{}: {}'.format(key, value)) # Init CNN model if args.supervised: G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size) print('Load pretrained Blending GAN model from {} ...'.format(args.g_path)) serializers.load_npz(args.g_path, G) else: chainer.config.use_cudnn = 'never' G = DCGAN_G(args.image_size, args.nc, args.ngf) print('Load pretrained unsupervised Blending GAN model from {} ...'.format(args.unsupervised_path)) serializers.load_npz(args.unsupervised_path, G) if args.gpu >= 0: cuda.get_device(args.gpu).use() # Make a specified GPU current G.to_gpu() # Copy the model to the GPU # load car image based on name that was given by blender script car_image = cv2.imread(args.src_image) # Load background image based on name that was given by blender script cam_im_path = 'original/'+args.src_image.split('/')[2] camera_image = cv2.imread(cam_im_path) # Create mask mask = create_mask(car_image) # Create composite composite = create_composite(car_image,mask,camera_image) cv2.imwrite('composites/'+args.car_type+'/'+args.src_image.split('/')[2],composite) # Harmonize the composite GPGAN_result = harmonize(car_image,camera_image,mask,G,args.image_size,args.gpu, args.color_weight, args.sigma, args.gradient_kernel, args.smooth_sigma, args.supervised, args.nz,args.n_iteration) # Save the result cv2.imwrite('GPGAN_output/'+args.car_type+'/'+args.src_image.split('/')[2],GPGAN_result)
def test(noise_type): global test_dataset if noise_type == NoiseDataloader.GAUSSIAN: test_dataset = NoiseDataloader(dataset_type=NoiseDataloader.TEST, noisy_per_image=1, noise_type=NoiseDataloader.GAUSSIAN) elif noise_type == NoiseDataloader.TEXT_OVERLAY: test_dataset = NoiseDataloader(dataset_type=NoiseDataloader.TEST, noisy_per_image=1, noise_type=NoiseDataloader.TEXT_OVERLAY) elif noise_type == NoiseDataloader.SALT_PEPPER: test_dataset = NoiseDataloader(dataset_type=NoiseDataloader.TEST, noisy_per_image=1, noise_type=NoiseDataloader.SALT_PEPPER) else: return # Initializing network network = EncoderDecoder() network = nn.DataParallel(network) instance = '010' pretrained_model_folder_path = os.path.join(pp.trained_models_folder_path, 'Instance_' + instance) for pretrained_model_file_name in os.listdir(pretrained_model_folder_path): try: if pretrained_model_file_name.endswith('.pt'): network.load_state_dict( torch.load(os.path.join(pretrained_model_folder_path, pretrained_model_file_name))) print('Network weights initialized using file from:', pretrained_model_file_name) else: continue except: print('Unable to load network with weights from:', pretrained_model_file_name) continue idx = random.randint(0, len(test_dataset)) noisy_image, clean_image = test_dataset[idx] predicted_image = network(torch.unsqueeze(torch.as_tensor(noisy_image), dim=0))[0] clean_image = NoiseDataloader.convert_model_output_to_image(clean_image) noisy_image = NoiseDataloader.convert_model_output_to_image(noisy_image) predicted_image = NoiseDataloader.convert_model_output_to_image(predicted_image) plt.figure(num='Network Performance using weights at {}'.format(pretrained_model_file_name), figsize=(20, 20)) plt.subplot(2, 2, 1) plt.imshow(clean_image, cmap='gray') plt.colorbar() plt.title('Original Image') plt.subplot(2, 2, 2) plt.imshow(noisy_image, cmap='gray') plt.colorbar() plt.title('Noisy Image') plt.subplot(2, 2, 3) plt.imshow(predicted_image, cmap='gray') plt.colorbar() plt.title('Predicted Image') plt.subplot(2, 2, 4) plt.imshow(np.sqrt(np.sum((clean_image - predicted_image) ** 2, axis=2)), cmap='gray') plt.title('Euclidean Distance') plt.colorbar() plt.show()
def adapt(config): if 'X_SGE_CUDA_DEVICE' in os.environ: print('running on the stack...') cuda_device = os.environ['X_SGE_CUDA_DEVICE'] print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device)) os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device else: # development only e.g. air202 print('running locally...') os.environ[ 'CUDA_VISIBLE_DEVICES'] = '3' # choose the device (GPU) here sess_config = tf.ConfigProto() batches, vocab_size, src_word2id, tgt_word2id = construct_training_data_batches( config) tgt_id2word = list(tgt_word2id.keys()) params = { 'vocab_src_size': len(src_word2id), 'vocab_tgt_size': len(tgt_word2id), 'go_id': tgt_word2id['<go>'], 'eos_id': tgt_word2id['</s>'] } # build the model model = EncoderDecoder(config, params) model.build_network() # -------- Adaption work -------- # bias_name = 'decoder/decode_with_shared_attention/decoder/dense/bias:0' weight_name = 'decoder/decode_with_shared_attention/decoder/dense/kernel:0' param_names = [bias_name, weight_name] # param_names = [var.name for var in tf.trainable_variables()] model.adapt_weights(param_names) # ------------------------------- # new_save_path = config['save'] if not os.path.exists(new_save_path): os.makedirs(new_save_path) write_config(new_save_path + '/config.txt', config) # save & restore model saver = tf.train.Saver(max_to_keep=1) save_path = config['load'] model_number = config['model_number'] if config[ 'model_number'] != None else config['num_epochs'] - 1 full_save_path_to_model = save_path + '/model-' + str(model_number) with tf.Session(config=sess_config) as sess: # Restore variables from disk. saver.restore(sess, full_save_path_to_model) for epoch in range(10): print("num_batches = ", len(batches)) random.shuffle(batches) for i, batch in enumerate(batches): feed_dict = { model.src_word_ids: batch['src_word_ids'], model.tgt_word_ids: batch['tgt_word_ids'], model.src_sentence_lengths: batch['src_sentence_lengths'], model.tgt_sentence_lengths: batch['tgt_sentence_lengths'], model.dropout: config['dropout'] } _ = sess.run([model.adapt_op], feed_dict=feed_dict) if i % 100 == 0: # to print out training status if config['decoding_method'] != 'beamsearch': [train_loss, infer_loss ] = sess.run([model.train_loss, model.infer_loss], feed_dict=feed_dict) print( "batch: {} --- train_loss: {:.5f} | inf_loss: {:.5f}" .format(i, train_loss, infer_loss)) else: # --- beam search --- # [train_loss] = sess.run([model.train_loss], feed_dict=feed_dict) print("BEAMSEARCH - batch: {} --- train_loss: {:.5f}". format(i, train_loss)) sys.stdout.flush() model.increment_counter() print("################## EPOCH {} done ##################".format( epoch)) saver.save(sess, new_save_path + '/model', global_step=epoch)
def translate(config): if 'X_SGE_CUDA_DEVICE' in os.environ: print('running on the stack...') cuda_device = os.environ['X_SGE_CUDA_DEVICE'] print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device)) os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device else: # development only e.g. air202 print('running locally...') os.environ[ 'CUDA_VISIBLE_DEVICES'] = '0' # choose the device (GPU) here sess_config = tf.ConfigProto() vocab_paths = { 'vocab_src': config['vocab_src'], 'vocab_tgt': config['vocab_tgt'] } src_word2id, tgt_word2id = load_vocab(vocab_paths) tgt_id2word = list(tgt_word2id.keys()) params = { 'vocab_src_size': len(src_word2id), 'vocab_tgt_size': len(tgt_word2id), 'go_id': tgt_word2id['<go>'], 'eos_id': tgt_word2id['</s>'] } # build the model model = EncoderDecoder(config, params) model.build_network() # save & restore model saver = tf.train.Saver() save_path = config['load'] model_number = config['model_number'] if config[ 'model_number'] != None else config['num_epochs'] - 1 full_save_path_to_model = save_path + '/model-' + str(model_number) # ------ PPL Parser for Fluency Score ------ # # parser = PplParser() # rnnlm_model = "/home/alta/BLTSpeaking/ged-pm574/gec-lm/train-rnnlm/rnnlms/v3/one-billion/RNN_weight.OOS.cuedrnnlm.rnnlm.300.300/train_LM.wgt.iter9" # # test_file = "/home/alta/BLTSpeaking/ged-pm574/nmt-exp/tmp/translate_ppl.txt" # test_file = config['tgtfile'] # intermediatefile = "tmp/trans-intermediate.txt" # inputwlist = "/home/alta/BLTSpeaking/ged-pm574/gec-lm/train-rnnlm/rnnlms/v3/one-billion/lib/wlists/train.lst.index" # outputwlist = "/home/alta/BLTSpeaking/ged-pm574/gec-lm/train-rnnlm/rnnlms/v3/one-billion/lib/wlists/train.lst.index" # vocabsize = "64002" # parser.make_cmd(rnnlm_model, test_file, inputwlist, outputwlist, vocabsize, intermediatefile) # ------------------------------------------ # with tf.Session(config=sess_config) as sess: # Restore variables from disk. saver.restore(sess, full_save_path_to_model) # print("Model restored") src_sent_ids, src_sent_len = src_data(config['srcfile'], src_word2id, config['max_sentence_length']) num_sentences = len(src_sent_ids) batch_size = 1000 num_batches = int(num_sentences / batch_size) + 1 print('num_batches =', num_batches) beam_width = config['beam_width'] outputs = [] for i in range(num_batches): i_start = batch_size * i i_end = i_start + batch_size if i_start + batch_size <= num_sentences else num_sentences translate_dict = { model.src_word_ids: src_sent_ids[i_start:i_end], model.src_sentence_lengths: src_sent_len[i_start:i_end], model.dropout: 0.0 } predicted_ids = sess.run(model.predicted_ids, feed_dict=translate_dict) for sentence in predicted_ids: beam = [] for k in range(beam_width): translation = sentence[:, k] words = [] for id in translation: if id == params['eos_id']: break words.append(tgt_id2word[id]) beam.append(words) outputs.append(beam) print('#', end='') sys.stdout.flush() print("num outputs: ", len(outputs)) # for i in range(len(outputs)): # if len(outputs[i]) != 10: # pdb.set_trace() # print("no problem!") # pdb.set_trace() with open(config['tgtfile'], 'w', encoding="utf8") as file: for output in outputs: for beam in output: x = "<s> " + " ".join(beam[:-1]).upper() + " </s>\n" file.write(x)