def __init__(self, batch_size, frequent=50): self.batch_size = batch_size self.frequent = frequent self.init = False self.tic = 0 self.last_count = 0 self.summary_writer = tb.FileWriter('./logs/')
def __init__(self, batch_size: int, output_folder: str, optimized_metric: str = C.PERPLEXITY, use_tensorboard: bool = False, cp_decoder: Optional[checkpoint_decoder.CheckpointDecoder] = None) -> None: self.output_folder = output_folder # stores dicts of metric names & values for each checkpoint self.metrics = [] # type: List[Dict] self.metrics_filename = os.path.join(output_folder, C.METRICS_NAME) self.best_checkpoint = 0 self.start_tic = time.time() self.summary_writer = None if use_tensorboard: import tensorboard # pylint: disable=import-error log_dir = os.path.join(output_folder, C.TENSORBOARD_NAME) if os.path.exists(log_dir): logger.info("Deleting existing tensorboard log dir %s", log_dir) shutil.rmtree(log_dir) logger.info("Logging training events for Tensorboard at '%s'", log_dir) self.summary_writer = tensorboard.FileWriter(log_dir) self.cp_decoder = cp_decoder self.ctx = mp.get_context('spawn') # type: ignore self.decoder_metric_queue = self.ctx.Queue() self.decoder_process = None # type: Optional[mp.Process] utils.check_condition(optimized_metric in C.METRICS, "Unsupported metric: %s" % optimized_metric) if optimized_metric == C.BLEU: utils.check_condition(self.cp_decoder is not None, "%s requires CheckpointDecoder" % C.BLEU) self.optimized_metric = optimized_metric self.validation_best = C.METRIC_WORST[self.optimized_metric] logger.info("Early stopping by optimizing '%s'", self.optimized_metric) self.tic = 0
def __init__(self, batch_size, frequent=50): self.batch_size = batch_size self.frequent = frequent self.init = False self.tic = 0 self.last_count = 0 self.summary_writer = tb.FileWriter( os.path.join(config.output_path, 'tb_logs'))
def __init__(self, batch_size: int, output_folder: str, optimized_metric: str = C.PERPLEXITY, use_tensorboard: bool = False, checkpoint_decoder: Optional[ sockeye.checkpoint_decoder.CheckpointDecoder] = None, num_concurrent_decodes: int = 1) -> None: self.metrics = [ ] # stores dicts of metric names & values for each checkpoint self.metrics_filename = os.path.join(output_folder, C.METRICS_NAME) open(self.metrics_filename, 'w').close() # clear metrics file self.best_checkpoint = 0 self.start_tic = time.time() self.summary_writer = None if use_tensorboard: import tensorboard # pylint: disable=import-error log_dir = os.path.join(output_folder, C.TENSORBOARD_NAME) if os.path.exists(log_dir): logger.info("Deleting existing tensorboard log dir %s", log_dir) shutil.rmtree(log_dir) logger.info("Logging training events for Tensorboard at '%s'", log_dir) self.summary_writer = tensorboard.FileWriter(log_dir) self.checkpoint_decoder = checkpoint_decoder self.ctx = mp.get_context('spawn') self.num_concurrent_decodes = num_concurrent_decodes self.decoder_metric_queue = self.ctx.Queue() self.decoder_processes = [] # TODO(fhieber): MXNet Speedometer uses root logger. How to fix this? self.speedometer = mx.callback.Speedometer( batch_size=batch_size, frequent=C.MEASURE_SPEED_EVERY, auto_reset=False) self.optimized_metric = optimized_metric if self.optimized_metric == C.PERPLEXITY: self.minimize = True self.validation_best = np.inf elif self.optimized_metric == C.ACCURACY: self.minimize = False self.validation_best = -np.inf elif self.optimized_metric == C.BLEU: assert self.checkpoint_decoder is not None, "BLEU requires CheckpointDecoder" self.minimize = False self.validation_best = -np.inf else: raise ValueError("No other metrics supported") logger.info("Early stopping by optimizing '%s' (minimize=%s)", self.optimized_metric, self.minimize) self.tic = 0
def __init__(self, logging_dir, logfile_name, print_freq=10): self.log_dir = logging_dir self.print_freq = print_freq if not os.path.isdir(logging_dir): os.makedirs(logging_dir) self.summary_writer = tensorboard.FileWriter(logdir=logging_dir) # standard logger to print to terminal logfile = osp.join(logging_dir, 'log.txt') stdout = Logger(logfile) sys.stdout = stdout
def __init__(self, batch_size: int, output_folder: str, optimized_metric: str = C.PERPLEXITY, use_tensorboard: bool = False, cp_decoder: Optional[ checkpoint_decoder.CheckpointDecoder] = None, num_concurrent_decodes: int = 1) -> None: self.output_folder = output_folder self.metrics = [ ] # stores dicts of metric names & values for each checkpoint self.metrics_filename = os.path.join(output_folder, C.METRICS_NAME) self.best_checkpoint = 0 self.start_tic = time.time() self.summary_writer = None if use_tensorboard: import tensorboard # pylint: disable=import-error log_dir = os.path.join(output_folder, C.TENSORBOARD_NAME) if os.path.exists(log_dir): logger.info("Deleting existing tensorboard log dir %s", log_dir) shutil.rmtree(log_dir) logger.info("Logging training events for Tensorboard at '%s'", log_dir) self.summary_writer = tensorboard.FileWriter(log_dir) self.cp_decoder = cp_decoder self.ctx = mp.get_context('spawn') self.num_concurrent_decodes = num_concurrent_decodes self.decoder_metric_queue = self.ctx.Queue() self.decoder_processes = [] # TODO(fhieber): MXNet Speedometer uses root logger. How to fix this? self.speedometer = mx.callback.Speedometer( batch_size=batch_size, frequent=C.MEASURE_SPEED_EVERY, auto_reset=False) utils.check_condition(optimized_metric in C.METRICS, "Unsupported metric: %s" % optimized_metric) if optimized_metric == C.BLEU: utils.check_condition(self.cp_decoder is not None, "%s requires CheckpointDecoder" % C.BLEU) self.optimized_metric = optimized_metric self.validation_best = C.METRIC_WORST[self.optimized_metric] logger.info("Early stopping by optimizing '%s'", self.optimized_metric) self.tic = 0
def main(argv): (opts, args) = parser.parse_args(argv) # Load experiment setting assert isinstance(opts, object) config = NetConfig(opts.config) batch_size = config.hyperparameters['batch_size'] max_iterations = config.hyperparameters['max_iterations'] trainer = [] exec("trainer=%s(config.hyperparameters)" % config.hyperparameters['trainer']) trainer.cuda(opts.gpu) iterations = 0 train_writer = tensorboard.FileWriter( "%s/%s" % (opts.log, os.path.splitext(os.path.basename(opts.config))[0])) snapshot_directory = prepare_snapshot_folder(config.snapshot_prefix) image_directory, snapshot_directory = prepare_snapshot_and_image_folder( config.snapshot_prefix, iterations, config.image_save_iterations) # Load datasets train_loader_a = get_data_loader(config.datasets['train_a'], batch_size) train_loader_b = get_data_loader(config.datasets['train_b'], batch_size) test_loader_b = get_data_loader( config.datasets['test_b'], batch_size=config.hyperparameters['test_batch_size']) best_score = 0 for ep in range(0, MAX_EPOCHS): for it, ((images_a, labels_a), (images_b, labels_b)) in enumerate( itertools.izip(train_loader_a, train_loader_b)): if images_a.size(0) != batch_size or images_b.size( 0) != batch_size: continue trainer.dis.train() images_a = Variable(images_a.cuda(opts.gpu)) labels_a = Variable(labels_a.cuda(opts.gpu)).view(images_a.size(0)) images_b = Variable(images_b.cuda(opts.gpu)) # Main training code trainer.dis_update(images_a, labels_a, images_b, config.hyperparameters) x_aa, x_ba, x_ab, x_bb = trainer.gen_update( images_a, images_b, config.hyperparameters) # Dump training stats in log file if (iterations + 1) % config.display == 0: write_loss(iterations, max_iterations, trainer, train_writer) # # Save network weights if (iterations + 1) % config.snapshot_save_iterations == 0: trainer.dis.eval() score = 0 num_samples = 0 for tit, (test_images_b, test_labels_b) in enumerate(test_loader_b): test_images_b = Variable(test_images_b.cuda(opts.gpu)) test_labels_b = Variable(test_labels_b.cuda( opts.gpu)).view(test_images_b.size(0)) cls_outputs = trainer.dis.classify_b(test_images_b) _, cls_predicts = torch.max(cls_outputs.data, 1) cls_acc = (cls_predicts == test_labels_b.data).sum() score += cls_acc num_samples += test_images_b.size(0) score /= 1.0 * num_samples print('Classification accuracy for Test_B dataset: %4.4f' % score) if score > best_score: best_score = score trainer.save(config.snapshot_prefix, iterations=-1) train_writer.add_summary(summary.scalar('test_b_acc', score), iterations + 1) img_name = image_directory + "/images_a.jpg" torchvision.utils.save_image(images_a.data / 2 + 0.5, img_name) img_name = image_directory + "/images_b.jpg" torchvision.utils.save_image(images_b.data / 2 + 0.5, img_name) img_name = image_directory + "/x_aa.jpg" torchvision.utils.save_image(x_aa.data / 2 + 0.5, img_name) img_name = image_directory + "/x_ab.jpg" torchvision.utils.save_image(x_ab.data / 2 + 0.5, img_name) img_name = image_directory + "/x_bb.jpg" torchvision.utils.save_image(x_bb.data / 2 + 0.5, img_name) img_name = image_directory + "/x_ba.jpg" torchvision.utils.save_image(x_ba.data / 2 + 0.5, img_name) iterations += 1 if iterations == max_iterations: return
def train(args): text_field = data.Field(lower=args.lower, include_lengths=True, batch_first=True) label_field = data.Field(sequential=False) filter_pred = None if not args.fine_grained: filter_pred = lambda ex: ex.label != 'neutral' dataset_splits = datasets.SST.splits(root='./data/sst', text_field=text_field, label_field=label_field, fine_grained=args.fine_grained, train_subtrees=True, filter_pred=filter_pred) text_field.build_vocab(*dataset_splits, vectors=args.pretrained) label_field.build_vocab(*dataset_splits) logging.info(f'Initialize with pretrained vectors: {args.pretrained}') logging.info(f'Number of classes: {len(label_field.vocab)}') train_loader, valid_loader, _ = data.BucketIterator.splits( datasets=dataset_splits, batch_size=args.batch_size, device=args.gpu) num_classes = len(label_field.vocab) model = SSTModel(num_classes=num_classes, num_words=len(text_field.vocab), word_dim=args.word_dim, hidden_dim=args.hidden_dim, clf_hidden_dim=args.clf_hidden_dim, clf_num_layers=args.clf_num_layers, use_leaf_rnn=args.leaf_rnn, bidirectional=args.bidirectional, intra_attention=args.intra_attention, use_batchnorm=args.batchnorm, dropout_prob=args.dropout) if args.pretrained: model.word_embedding.weight.data.set_(text_field.vocab.vectors) if args.fix_word_embedding: logging.info('Will not update word embeddings') model.word_embedding.weight.requires_grad = False if args.gpu > -1: logging.info(f'Using GPU {args.gpu}') model.cuda(args.gpu) params = [p for p in model.parameters() if p.requires_grad] if args.optimizer == 'adam': optimizer_class = optim.Adam elif args.optimizer == 'adagrad': optimizer_class = optim.Adagrad elif args.optimizer == 'adadelta': optimizer_class = optim.Adadelta optimizer = optimizer_class(params=params, weight_decay=args.l2reg) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='max', factor=0.5, patience=20 * args.halve_lr_every, verbose=True) criterion = nn.CrossEntropyLoss() train_summary_writer = tensorboard.FileWriter(logdir=os.path.join( args.save_dir, 'log', 'train'), flush_secs=10) valid_summary_writer = tensorboard.FileWriter(logdir=os.path.join( args.save_dir, 'log', 'valid'), flush_secs=10) def run_iter(batch, is_training): model.train(is_training) words, length = batch.text label = batch.label length = wrap_with_variable(batch.text[1], volatile=not is_training, gpu=args.gpu) logits = model(words=words, length=length) label_pred = logits.max(1)[1] accuracy = torch.eq(label, label_pred).float().mean() loss = criterion(input=logits, target=label) if is_training: optimizer.zero_grad() loss.backward() clip_grad_norm(parameters=params, max_norm=5) optimizer.step() return loss, accuracy def add_scalar_summary(summary_writer, name, value, step): value = unwrap_scalar_variable(value) summ = summary.scalar(name=name, scalar=value) summary_writer.add_summary(summary=summ, global_step=step) num_train_batches = len(train_loader) validate_every = num_train_batches // 20 best_vaild_accuacy = 0 iter_count = 0 for batch_iter, train_batch in enumerate(train_loader): train_loss, train_accuracy = run_iter(batch=train_batch, is_training=True) iter_count += 1 add_scalar_summary(summary_writer=train_summary_writer, name='loss', value=train_loss, step=iter_count) add_scalar_summary(summary_writer=train_summary_writer, name='accuracy', value=train_accuracy, step=iter_count) if (batch_iter + 1) % validate_every == 0: valid_loss_sum = valid_accuracy_sum = 0 num_valid_batches = len(valid_loader) for valid_batch in valid_loader: valid_loss, valid_accuracy = run_iter(batch=valid_batch, is_training=False) valid_loss_sum += unwrap_scalar_variable(valid_loss) valid_accuracy_sum += unwrap_scalar_variable(valid_accuracy) valid_loss = valid_loss_sum / num_valid_batches valid_accuracy = valid_accuracy_sum / num_valid_batches add_scalar_summary(summary_writer=valid_summary_writer, name='loss', value=valid_loss, step=iter_count) add_scalar_summary(summary_writer=valid_summary_writer, name='accuracy', value=valid_accuracy, step=iter_count) scheduler.step(valid_accuracy) progress = train_loader.epoch logging.info(f'Epoch {progress:.2f}: ' f'valid loss = {valid_loss:.4f}, ' f'valid accuracy = {valid_accuracy:.4f}') if valid_accuracy > best_vaild_accuacy: best_vaild_accuacy = valid_accuracy model_filename = (f'model-{progress:.2f}' f'-{valid_loss:.4f}' f'-{valid_accuracy:.4f}.pkl') model_path = os.path.join(args.save_dir, model_filename) torch.save(model.state_dict(), model_path) print(f'Saved the new best model to {model_path}') if progress > args.max_epoch: break
def main(argv): (opts, args) = parser.parse_args(argv) # Load experiment setting assert isinstance(opts, object) config = NetConfig(opts.config) train_writer = tensorboard.FileWriter("%s/%s" % (opts.log,os.path.splitext(os.path.basename(opts.config))[0])) max_iterations = config.hyperparameters['max_iterations'] batch_size = config.hyperparameters['batch_size'] vae_enc_w = config.hyperparameters['vae_enc_w'] vae_ll_w = config.hyperparameters['vae_ll_w'] gan_w = config.hyperparameters['gan_w'] ch = config.hyperparameters['ch'] gen_net = config.hyperparameters['gen'] dis_net = config.hyperparameters['dis'] image_size = config.datasets['a']['image_size'] input_dims = list() input_dims.append(config.datasets['a']['channels']) input_dims.append(config.datasets['b']['channels']) # Load datasets train_loader_a = get_data_loader(config.datasets['a'], batch_size) train_loader_b = get_data_loader(config.datasets['b'], batch_size) train_loader_a2 = get_data_loader(config.datasets['a'], batch_size) train_loader_b2 = get_data_loader(config.datasets['b'], batch_size) trainer = UNITTrainer(gen_net, dis_net, batch_size, ch, input_dims, image_size, opts.lr) iterations = 0 if opts.resume == 1: iterations = resume(trainer, config.snapshot_prefix) trainer.cuda(opts.gpu) directory = os.path.dirname(config.snapshot_prefix) image_directory = directory + "/images" if not os.path.exists(directory): os.makedirs(directory) if not os.path.exists(image_directory): os.makedirs(image_directory) write_html(directory + "/index.html", iterations + 1, config.image_save_iterations, image_directory, image_size) for ep in range(0, MAX_EPOCHS): for it, (images_a, images_b, images_a2, images_b2) in enumerate(itertools.izip(train_loader_a, train_loader_b, train_loader_a2, train_loader_b2)): if images_a.size(0) != batch_size or images_b.size(0) != batch_size: continue images_a = Variable(images_a.cuda(opts.gpu)) images_b = Variable(images_b.cuda(opts.gpu)) images_a2 = Variable(images_a2.cuda(opts.gpu)) images_b2 = Variable(images_b2.cuda(opts.gpu)) # Main training code trainer.dis_update(images_a, images_b, images_a2, images_b2) x_aa, x_ba, x_ab, x_bb = trainer.gen_update(images_a, images_b, gan_w, vae_ll_w, vae_enc_w) # Dump training stats in log file if (iterations+1) % config.display == 0: print("Iteration: %08d/%08d" %(iterations+1,max_iterations)) members = [attr for attr in dir(trainer) \ if not callable(getattr(trainer, attr)) and not attr.startswith("__") and 'loss' in attr] for m in members: train_writer.add_summary(summary.scalar(m, getattr(trainer, m)), iterations + 1) members = [attr for attr in dir(trainer) \ if not callable(getattr(trainer, attr)) and not attr.startswith("__") and 'acc' in attr] for m in members: train_writer.add_summary(summary.scalar(m, getattr(trainer, m)), iterations + 1) # Save intermediate visualization results if (iterations+1) % config.image_save_iterations == 0: assembled_images = make_save_image(images_a[0:1,::], x_aa[0:1,::], x_ab[0:1,::], images_b[0:1,::], x_ba[0:1,::], x_bb[0:1,::]) img_filename = '%s/gen_%08d.jpg' % (image_directory, iterations + 1) torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=1) write_html(directory + "/index.html", iterations + 1, config.image_save_iterations, image_directory, image_size) else: assembled_images = make_save_image(images_a[0:1,::], x_aa[0:1,::], x_ab[0:1,::], images_b[0:1,::],x_ba[0:1,::], x_bb[0:1,::]) img_filename = '%s/gen.jpg' % (image_directory) torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=1) # Save network weights if (iterations+1) % config.snapshot_save_iterations == 0: gen_filename = '%s_gen_%08d.pkl' % (config.snapshot_prefix, iterations + 1) dis_filename = '%s_dis_%08d.pkl' % (config.snapshot_prefix, iterations + 1) torch.save(trainer.gen.state_dict(), gen_filename) torch.save(trainer.dis.state_dict(), dis_filename) iterations += 1 if iterations == max_iterations: return
def main(argv): (opts, args) = parser.parse_args(argv) # Load experiment setting assert isinstance(opts, object) config = NetConfig(opts.config) batch_size = config.hyperparameters['batch_size'] max_iterations = config.hyperparameters['max_iterations'] train_loader_a = get_data_loader(config.datasets['train_a'], batch_size) train_loader_b = get_data_loader(config.datasets['train_b'], batch_size) trainer = [] exec("trainer=%s(config.hyperparameters)" % config.hyperparameters['trainer']) # Check if resume training iterations = 0 if opts.resume == 1: iterations = trainer.resume(config.snapshot_prefix) trainer.cuda(opts.gpu) ###################################################################################################################### # Setup logger and repare image outputs train_writer = tensorboard.FileWriter( "%s/%s" % (opts.log, os.path.splitext(os.path.basename(opts.config))[0])) image_directory, snapshot_directory = prepare_snapshot_and_image_folder( config.snapshot_prefix, iterations, config.image_save_iterations) for ep in range(0, MAX_EPOCHS): for it, (images_a, images_b) in enumerate( itertools.izip(train_loader_a, train_loader_b)): if images_a.size(0) != batch_size or images_b.size( 0) != batch_size: continue images_a = Variable(images_a.cuda(opts.gpu)) images_b = Variable(images_b.cuda(opts.gpu)) # Main training code trainer.dis_update(images_a, images_b, config.hyperparameters) image_outputs = trainer.gen_update(images_a, images_b, config.hyperparameters) assembled_images = trainer.assemble_outputs( images_a, images_b, image_outputs) # Dump training stats in log file if (iterations + 1) % config.display == 0: write_loss(iterations, max_iterations, trainer, train_writer) if (iterations + 1) % config.image_save_iterations == 0: img_filename = '%s/gen_%08d.jpg' % (image_directory, iterations + 1) torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=1) write_html(snapshot_directory + "/index.html", iterations + 1, config.image_save_iterations, image_directory) elif (iterations + 1) % config.image_display_iterations == 0: img_filename = '%s/gen.jpg' % (image_directory) torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=1) # Save network weights if (iterations + 1) % config.snapshot_save_iterations == 0: trainer.save(config.snapshot_prefix, iterations) iterations += 1 if iterations >= max_iterations: return
def main(): parser = argparse.ArgumentParser() parser.add_argument('--word-dim', type=int, default=300, help='size of word embeddings') parser.add_argument('--hidden-dim', type=int, default=300, help='number of hidden units per layer') parser.add_argument('--num-layers', type=int, default=1, help='number of layers in BiLSTM') parser.add_argument('--att-dim', type=int, default=350, help='number of attention unit') parser.add_argument('--att-hops', type=int, default=4, help='number of attention hops, for multi-hop attention model') parser.add_argument('--clf-hidden-dim', type=int, default=512, help='hidden (fully connected) layer size for classifier MLP') parser.add_argument('--clip', type=float, default=0.5, help='clip to prevent the too large grad in LSTM') parser.add_argument('--lr', type=float, default=.001, help='initial learning rate') parser.add_argument('--weight-decay', type=float, default=1e-5, help='weight decay rate per batch') parser.add_argument('--dropout', type=float, default=0.3) parser.add_argument('--max-epoch', type=int, default=8) parser.add_argument('--seed', type=int, default=666) parser.add_argument('--cuda', action='store_true', default=True) parser.add_argument('--optimizer', default='adam', choices=['adam', 'sgd']) parser.add_argument('--batch-size', type=int, default=32, help='batch size for training') parser.add_argument('--penalization-coeff', type=float, default=0.1, help='the penalization coefficient') parser.add_argument('--fix-word-embedding', action='store_true') parser.add_argument('--model-type', required=True, choices=['sa', 'avgblock', 'hard']) parser.add_argument('--data-type', required=True, choices=['age2', 'dbpedia', 'yahoo']) parser.add_argument('--data', required=True, help='pickle file obtained by dataset dump') parser.add_argument('--save-dir', type=str, required=True, help='path to save the final model') parser.add_argument('--block-size', type=int, default=-1, help='block size only when model-type is avgblock') args = parser.parse_args() torch.manual_seed(args.seed) random.seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") else: torch.cuda.manual_seed(args.seed) ####################################### # a simple log file, the same content as stdout if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s') logFormatter = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s') rootLogger = logging.getLogger() fileHandler = logging.FileHandler(os.path.join(args.save_dir, 'stdout.log')) fileHandler.setFormatter(logFormatter) rootLogger.addHandler(fileHandler) ######################################## for k, v in vars(args).items(): logging.info(k+':'+str(v)) ##################################################################### if args.data_type == 'age2': data = AGE2(datapath=args.data, batch_size=args.batch_size) num_classes = 5 elif args.data_type == 'dbpedia': data = DBpedia(datapath=args.data, batch_size=args.batch_size) num_classes = 14 elif args.data_type == 'yahoo': data = Yahoo(datapath=args.data, batch_size=args.batch_size) num_classes = 10 else: raise Exception('Invalid argument data-type') ##################################################################### if args.model_type == 'avgblock': assert args.block_size > 0 ##################################################################### tic = time.time() model = Classifier( dictionary=data, dropout=args.dropout, num_words=data.num_words, num_layers=args.num_layers, hidden_dim=args.hidden_dim, word_dim=args.word_dim, att_dim=args.att_dim, att_hops=args.att_hops, clf_hidden_dim=args.clf_hidden_dim, num_classes=num_classes, model_type=args.model_type, block_size=args.block_size, ) print('It takes %.2f sec to build the model.' % (time.time() - tic)) logging.info(model) model.word_embedding.weight.data.set_(data.weight) if args.fix_word_embedding: model.word_embedding.weight.requires_grad = False if args.cuda: model = model.cuda() ''' count parameters num_params = sum(np.prod(p.size()) for p in model.parameters()) num_embedding_params = np.prod(model.word_embedding.weight.size()) print('# of parameters: %d' % num_params) print('# of word embedding parameters: %d' % num_embedding_params) print('# of parameters (excluding word embeddings): %d' % (num_params - num_embedding_params)) ''' if args.optimizer == 'adam': optimizer_class = optim.Adam elif args.optimizer == 'sgd': optimizer_class = optim.SGD else: raise Exception('For other optimizers, please add it yourself. supported ones are: SGD and Adam.') params = [p for p in model.parameters() if p.requires_grad] optimizer = optimizer_class(params=params, lr=args.lr, weight_decay=args.weight_decay) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='max', factor=0.5, patience=10, verbose=True) criterion = nn.CrossEntropyLoss() # Identity matrix for each batch I = Variable(torch.eye(args.att_hops).unsqueeze(0).expand(args.batch_size, -1, -1)) if args.cuda: I = I.cuda() trpack = { 'model': model, 'params': params, 'criterion': criterion, 'optimizer': optimizer, 'I': I, } train_summary_writer = tensorboard.FileWriter( logdir=os.path.join(args.save_dir, 'log', 'train'), flush_secs=10) valid_summary_writer = tensorboard.FileWriter( logdir=os.path.join(args.save_dir, 'log', 'valid'), flush_secs=10) tsw, vsw = train_summary_writer, valid_summary_writer logging.info('number of train batches: %d' % data.train_num_batch) validate_every = data.train_num_batch // 10 best_vaild_accuacy = 0 iter_count = 0 tic = time.time() for epoch_num in range(args.max_epoch): for batch_iter, train_batch in enumerate(data.train_minibatch_generator()): progress = epoch_num + batch_iter / data.train_num_batch iter_count += 1 train_loss, train_accuracy = train_iter(args, train_batch, **trpack) add_scalar_summary(tsw, 'loss', train_loss, iter_count) add_scalar_summary(tsw, 'acc', train_accuracy, iter_count) if (batch_iter + 1) % (data.train_num_batch // 100) == 0: tac = (time.time() - tic) / 60 print(' %.2f minutes\tprogress: %.2f' % (tac, progress)) if (batch_iter + 1) % validate_every == 0: correct_sum = 0 for valid_batch in data.dev_minibatch_generator(): correct, supplements = eval_iter(args, model, valid_batch) correct_sum += unwrap_scalar_variable(correct) valid_accuracy = correct_sum / data.dev_size scheduler.step(valid_accuracy) add_scalar_summary(vsw, 'acc', valid_accuracy, iter_count) logging.info('Epoch %.2f: valid accuracy = %.4f' % (progress, valid_accuracy)) if valid_accuracy > best_vaild_accuacy: correct_sum = 0 for test_batch in data.test_minibatch_generator(): correct, supplements = eval_iter(args, model, test_batch) correct_sum += unwrap_scalar_variable(correct) test_accuracy = correct_sum / data.test_size best_vaild_accuacy = valid_accuracy model_filename = ('model-%.2f-%.4f-%.4f.pkl' % (progress, valid_accuracy, test_accuracy)) model_path = os.path.join(args.save_dir, model_filename) torch.save(model.state_dict(), model_path) print('Saved the new best model to %s' % model_path)
def main(argv): (opts, args) = parser.parse_args(argv) # Load experiment setting assert isinstance(opts, object) config = NetConfig(opts.config) batch_size = config.hyperparameters['batch_size'] max_iterations = config.hyperparameters['max_iterations'] # multi-domain loaders train_loaders = [] for i, train_x in enumerate(config.datasets.keys()): print('Domain %d = %s' % (i, train_x)) train_loader = get_data_loader(config.datasets[train_x], batch_size) train_loaders.append(train_loader) # exec initialization of trainer trainer = [] exec('trainer = %s(config.hyperparameters)' % config.hyperparameters['trainer']) iterations = 0 if opts.resume == 1: iterations = trainer.resume(config.snapshot_prefix) trainer.cuda(opts.gpu) ###### setup logger and repare image outputs train_writer = tensorboard.FileWriter( "%s/%s" % (opts.log, os.path.splitext(os.path.basename(opts.config))[0])) image_directory, snapshot_directory = prepare_snapshot_and_image_folder( config.snapshot_prefix, iterations, config.image_save_iterations) domain_number = len(train_loaders) for ep in range(0, MAX_EPOCHS): for it, images in enumerate(itertools.izip(*train_loaders)): images_list = [] for image in images: im = Variable(image.cuda(opts.gpu)) images_list.append(im) #print('im shape = ', im.size()) assembled_list = [] for i in xrange(domain_number): for j in xrange(domain_number): # first: all of them VAE pass if i == j: continue #trainer.vae_update(images_list[i], images_list[j], config.hyperparameters, i, j) # second: all crossing pairs for GAN, let the lambda judge the else: # i != j trainer.dis_update(images_list[i], images_list[j], config.hyperparameters, i, j) image_outputs = trainer.gen_update( images_list[i], images_list[j], config.hyperparameters, i, j) assembled = trainer.assemble_outputs( images_list[i], images_list[j], image_outputs) assembled_list.append(assembled) assembled_images = torch.cat(assembled_list, 2) # Dump training stats in log file for t in xrange(domain_number * domain_number - domain_number): if (iterations + 1) % config.display == 0: write_loss(iterations, max_iterations, trainer, train_writer) if (iterations + 1) % config.image_save_iterations == 0: img_filename = '%s/gen_%08d.jpg' % (image_directory, iterations + 1) torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=1) write_html(snapshot_directory + '/index.html', iterations + 1, config.image_save_iterations, image_directory) elif (iterations + 1) % config.image_display_iterations == 0: img_filename = '%s/gen.jpg' % (image_directory) torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=1) if (iterations + 1) % config.snapshot_save_iterations == 0: trainer.save(config.snapshot_prefix, iterations) iterations += 1 if iterations >= max_iterations: return
def train(args): experiment_name = (f'w{args.word_dim}_lh{args.lstm_hidden_dims}' f'_mh{args.mlp_hidden_dim}_ml{args.mlp_num_layers}' f'_d{args.dropout_prob}') save_dir = os.path.join(args.save_root_dir, experiment_name) train_summary_writer = tensorboard.FileWriter( logdir=os.path.join(save_dir, 'log', 'train')) valid_summary_writer = tensorboard.FileWriter( logdir=os.path.join(save_dir, 'log', 'valid')) lstm_hidden_dims = [int(d) for d in args.lstm_hidden_dims.split(',')] logging.info('Loading data...') text_field = data.Field(lower=True, include_lengths=True, batch_first=False) label_field = data.Field(sequential=False) if not os.path.exists(args.data_dir): os.makedirs(args.data_dir) dataset_splits = datasets.SNLI.splits(text_field=text_field, label_field=label_field, root=args.data_dir) text_field.build_vocab(*dataset_splits, vectors=args.pretrained) label_field.build_vocab(*dataset_splits) train_loader, valid_loader, _ = data.BucketIterator.splits( datasets=dataset_splits, batch_size=args.batch_size, device=args.gpu) logging.info('Building model...') num_classes = len(label_field.vocab) num_words = len(text_field.vocab) model = NLIModel(num_words=num_words, word_dim=args.word_dim, lstm_hidden_dims=lstm_hidden_dims, mlp_hidden_dim=args.mlp_hidden_dim, mlp_num_layers=args.mlp_num_layers, num_classes=num_classes, dropout_prob=args.dropout_prob) num_total_params = sum(np.prod(p.size()) for p in model.parameters()) num_word_embedding_params = np.prod(model.word_embedding.weight.size()) if args.pretrained: model.word_embedding.weight.data.set_(text_field.vocab.vectors) model.cuda(args.gpu) logging.info(f'# of total parameters: {num_total_params}') logging.info(f'# of intrinsic parameters: ' f'{num_total_params - num_word_embedding_params}') logging.info(f'# of word embedding parameters: ' f'{num_word_embedding_params}') criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(params=model.parameters(), lr=2e-4) # Halve LR every two epochs scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=2, gamma=0.5) def run_iter(batch, is_training): pre_input, pre_lengths = batch.premise hyp_input, hyp_lengths = batch.hypothesis label = batch.label model.train(is_training) model_output = model(pre_input=pre_input, pre_lengths=pre_lengths, hyp_input=hyp_input, hyp_lengths=hyp_lengths) label_pred = model_output.max(1)[1] loss = criterion(input=model_output, target=label) accuracy = torch.eq(label, label_pred).float().mean() if is_training: model.zero_grad() loss.backward() optimizer.step() return loss, accuracy def add_scalar_summary(summary_writer, name, value, step): summ = summary.scalar(name=name, scalar=value) summary_writer.add_summary(summary=summ, global_step=step) logging.info('Training starts!') cur_epoch = 0 for iter_count, train_batch in enumerate(train_loader): train_loss, train_accuracy = run_iter(batch=train_batch, is_training=True) add_scalar_summary(summary_writer=train_summary_writer, name='loss', value=train_loss.data[0], step=iter_count) add_scalar_summary(summary_writer=train_summary_writer, name='accuracy', value=train_accuracy.data[0], step=iter_count) if int(train_loader.epoch) > cur_epoch: cur_epoch = int(train_loader.epoch) num_valid_batches = len(valid_loader) valid_loss_sum = valid_accracy_sum = 0 for valid_batch in valid_loader: valid_loss, valid_accuracy = run_iter(batch=valid_batch, is_training=False) valid_loss_sum += valid_loss.data[0] valid_accracy_sum += valid_accuracy.data[0] valid_loss = valid_loss_sum / num_valid_batches valid_accuracy = valid_accracy_sum / num_valid_batches add_scalar_summary(summary_writer=valid_summary_writer, name='loss', value=valid_loss, step=iter_count) add_scalar_summary(summary_writer=valid_summary_writer, name='accuracy', value=valid_accuracy, step=iter_count) progress = train_loader.epoch logging.info(f'Epoch {progress:.2f}: ' f'valid loss = {valid_loss:.4f}, ' f'valid accuracy = {valid_accuracy:.4f}') model_filename = (f'model-{progress:.2f}' f'-{valid_loss:.4f}' f'-{valid_accuracy:.4f}.pkl') model_path = os.path.join(save_dir, model_filename) torch.save(model.state_dict(), model_path) logging.info(f'Saved the model to: {model_path}') scheduler.step() logging.info(f'Update learning rate to: {scheduler.get_lr()[0]}') if progress > args.max_epoch: break
def train(args): with open(args.train_data, 'rb') as f: train_dataset: SNLIDataset = pickle.load(f) with open(args.valid_data, 'rb') as f: valid_dataset: SNLIDataset = pickle.load(f) train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2, collate_fn=train_dataset.collate, pin_memory=True) valid_loader = DataLoader(dataset=valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=2, collate_fn=valid_dataset.collate, pin_memory=True) word_vocab = train_dataset.word_vocab label_vocab = train_dataset.label_vocab model = SNLIModel(num_classes=len(label_vocab), num_words=len(word_vocab), word_dim=args.word_dim, hidden_dim=args.hidden_dim, clf_hidden_dim=args.clf_hidden_dim, clf_num_layers=args.clf_num_layers, use_leaf_rnn=args.leaf_rnn, use_batchnorm=args.batchnorm, intra_attention=args.intra_attention, dropout_prob=args.dropout) if args.glove: logging.info('Loading GloVe pretrained vectors...') model.word_embedding.weight.data.zero_() glove_weight = load_glove( path=args.glove, vocab=word_vocab, init_weight=model.word_embedding.weight.data.numpy()) glove_weight[word_vocab.pad_id] = 0 model.word_embedding.weight.data.set_(torch.FloatTensor(glove_weight)) if args.fix_word_embedding: logging.info('Will not update word embeddings') model.word_embedding.weight.requires_grad = False if args.gpu > -1: logging.info(f'Using GPU {args.gpu}') model.cuda(args.gpu) params = [p for p in model.parameters() if p.requires_grad] optimizer = optim.Adam(params=params) criterion = nn.CrossEntropyLoss() train_summary_writer = tensorboard.FileWriter(logdir=os.path.join( args.save_dir, 'log', 'train'), flush_secs=10) valid_summary_writer = tensorboard.FileWriter(logdir=os.path.join( args.save_dir, 'log', 'valid'), flush_secs=10) def run_iter(batch, is_training): model.train(is_training) pre = wrap_with_variable(batch['pre'], volatile=not is_training, gpu=args.gpu) hyp = wrap_with_variable(batch['hyp'], volatile=not is_training, gpu=args.gpu) pre_length = wrap_with_variable(batch['pre_length'], volatile=not is_training, gpu=args.gpu) hyp_length = wrap_with_variable(batch['hyp_length'], volatile=not is_training, gpu=args.gpu) label = wrap_with_variable(batch['label'], volatile=not is_training, gpu=args.gpu) logits = model(pre=pre, pre_length=pre_length, hyp=hyp, hyp_length=hyp_length) label_pred = logits.max(1)[1] accuracy = torch.eq(label, label_pred).float().mean() loss = criterion(input=logits, target=label) if is_training: optimizer.zero_grad() loss.backward() clip_grad_norm(parameters=params, max_norm=5) optimizer.step() return loss, accuracy def add_scalar_summary(summary_writer, name, value, step): value = unwrap_scalar_variable(value) summ = summary.scalar(name=name, scalar=value) summary_writer.add_summary(summary=summ, global_step=step) num_train_batches = len(train_loader) validate_every = num_train_batches // 10 best_vaild_accuacy = 0 iter_count = 0 for epoch_num in range(1, args.max_epoch + 1): logging.info(f'Epoch {epoch_num}: start') for batch_iter, train_batch in enumerate(train_loader): if args.anneal_temperature and iter_count % 500 == 0: gamma = 0.00001 new_temperature = max([0.5, math.exp(-gamma * iter_count)]) model.encoder.gumbel_temperature = new_temperature logging.info( f'Iter #{iter_count}: ' f'Set Gumbel temperature to {new_temperature:.4f}') train_loss, train_accuracy = run_iter(batch=train_batch, is_training=True) iter_count += 1 add_scalar_summary(summary_writer=train_summary_writer, name='loss', value=train_loss, step=iter_count) add_scalar_summary(summary_writer=train_summary_writer, name='accuracy', value=train_accuracy, step=iter_count) if (batch_iter + 1) % validate_every == 0: valid_loss_sum = valid_accuracy_sum = 0 num_valid_batches = len(valid_loader) for valid_batch in valid_loader: valid_loss, valid_accuracy = run_iter(batch=valid_batch, is_training=False) valid_loss_sum += unwrap_scalar_variable(valid_loss) valid_accuracy_sum += unwrap_scalar_variable( valid_accuracy) valid_loss = valid_loss_sum / num_valid_batches valid_accuracy = valid_accuracy_sum / num_valid_batches add_scalar_summary(summary_writer=valid_summary_writer, name='loss', value=valid_loss, step=iter_count) add_scalar_summary(summary_writer=valid_summary_writer, name='accuracy', value=valid_accuracy, step=iter_count) progress = epoch_num + batch_iter / num_train_batches logging.info(f'Epoch {progress:.2f}: ' f'valid loss = {valid_loss:.4f}, ' f'valid accuracy = {valid_accuracy:.4f}') if valid_accuracy > best_vaild_accuacy: best_vaild_accuacy = valid_accuracy model_filename = (f'model-{progress:.2f}' f'-{valid_loss:.4f}' f'-{valid_accuracy:.4f}.pkl') model_path = os.path.join(args.save_dir, model_filename) torch.save(model.state_dict(), model_path) print(f'Saved the new best model to {model_path}')
def main(argv): (opts, args) = parser.parse_args(argv) # Load experiment setting assert isinstance(opts, object) config = NetConfig(opts.config) batch_size = config.hyperparameters['batch_size'] max_iterations = config.hyperparameters['max_iterations'] train_loader_a = get_data_loader(config.datasets['train_a'], batch_size) train_loader_b = get_data_loader(config.datasets['train_b'], batch_size) # Parse ROI parameters roi = [int(val_str) for val_str in opts.roi.split(',')] roi_x = roi[0] roi_y = roi[1] roi_w = roi[2] roi_h = roi[3] cmd1 = "trainer=%s(config.hyperparameters)" % config.hyperparameters[ 'trainer'] cmd2 = "roi_trainer=%s(config.hyperparameters)" % config.hyperparameters[ 'trainer'] local_dict = locals() exec(cmd1, globals(), local_dict) trainer = local_dict['trainer'] exec(cmd2, globals(), local_dict) roi_trainer = local_dict['roi_trainer'] # Check if resume training iterations = 0 if opts.resume == 1: iterations = trainer.resume(config.snapshot_prefix) roi_trainer.resume(config.snapshot_prefix) trainer.cuda(opts.gpu) roi_trainer.cuda(opts.gpu) ###################################################################################################################### # Setup logger and repare image outputs train_writer = tensorboard.FileWriter( "%s/%s" % (opts.log, os.path.splitext(os.path.basename(opts.config))[0])) image_directory, snapshot_directory = prepare_snapshot_and_image_folder( config.snapshot_prefix, iterations, config.image_save_iterations) for ep in range(0, MAX_EPOCHS): for it, (images_a, images_b) in enumerate(izip(train_loader_a, train_loader_b)): if images_a.size(0) != batch_size or images_b.size( 0) != batch_size: continue # Crop images according to ROI roi_images_a = images_a[:, :, roi_y:roi_y + roi_h, roi_x:roi_x + roi_w].clone() roi_images_b = images_b[:, :, roi_y:roi_y + roi_h, roi_x:roi_x + roi_w].clone() roi_images_a = Variable(roi_images_a.cuda(opts.gpu)) roi_images_b = Variable(roi_images_b.cuda(opts.gpu)) images_a = Variable(images_a.cuda(opts.gpu)) images_b = Variable(images_b.cuda(opts.gpu)) # Main training code trainer.dis_update(images_a, images_b, config.hyperparameters) trainer.gen_update(images_a, images_b, config.hyperparameters) # Training code for ROI roi_trainer.dis_update(roi_images_a, roi_images_b, config.hyperparameters) roi_image_outputs = roi_trainer.gen_update(roi_images_a, roi_images_b, config.hyperparameters) roi_assembled_images = roi_trainer.assemble_outputs( roi_images_a, roi_images_b, roi_image_outputs) # Paste ROI to original images to update generator x_aa, x_ba, x_ab, x_bb, shared = trainer.gen(images_a, images_b) x_ba_paste = x_ba.clone() x_ab_paste = x_ab.clone() x_ba_paste[:, :, roi_y:roi_y + roi_h, roi_x:roi_x + roi_w] = roi_image_outputs[1].clone() x_ab_paste[:, :, roi_y:roi_y + roi_h, roi_x:roi_x + roi_w] = roi_image_outputs[2].clone() trainer.gen.zero_grad() image_outputs = trainer.gen_update_helper(images_a, images_b, x_aa, x_ba_paste, x_ab_paste, x_bb, shared, config.hyperparameters) assembled_images = trainer.assemble_outputs( images_a, images_b, image_outputs) # Dump training stats in log file if (iterations + 1) % config.display == 0: write_loss(iterations, max_iterations, trainer, train_writer) if (iterations + 1) % config.image_save_iterations == 0: img_filename = '%s/gen_%08d.jpg' % (image_directory, iterations + 1) torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=1) img_filename = '%s/roi_gen_%08d.jpg' % (image_directory, iterations + 1) torchvision.utils.save_image(roi_assembled_images.data / 2 + 0.5, img_filename, nrow=1) write_html(snapshot_directory + "/index.html", iterations + 1, config.image_save_iterations, image_directory) elif (iterations + 1) % config.image_display_iterations == 0: img_filename = '%s/gen.jpg' % (image_directory) torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=1) img_filename = '%s/roi_gen.jpg' % (image_directory) torchvision.utils.save_image(roi_assembled_images.data / 2 + 0.5, img_filename, nrow=1) # Save network weights if (iterations + 1) % config.snapshot_save_iterations == 0: trainer.save(config.snapshot_prefix, iterations) iterations += 1 if iterations >= max_iterations: return
import pdb import numpy as np import mxnet as mx from poserecog.bucket_io import BucketSentenceIter from poserecog.get_lstm_sym import get_lstm from poserecog.config import lstm_config as lcf from poserecog.train_script import fit as script_fit import logging #head = '%(asctime)-15s %(message)s' #logging.basicConfig(level=logging.DEBUG, format=head) import time tm = time.strftime("%m_%d_%H_%M") logging.basicConfig(filename='log/' + time.strftime("%m_%d_%H_%M") + '.log',\ level=logging.DEBUG) import tensorboard sw_train = tensorboard.FileWriter('log/%s_train/' % tm) sw_val = tensorboard.FileWriter('log/%s_val/' % tm) def Perplexity(label, pred): label = label.T.reshape((-1, )) loss = 0. for i in range(pred.shape[0]): loss += -np.log(max(1e-10, pred[i][int(label[i])])) return np.exp(loss / label.size) def monitor_train(param): metric = dict(param.eval_metric.get_name_value()) sw_train.add_summary(tensorboard.summary.scalar('perp',\ metric['Perplexity']))
def main(argv): (opts, args) = parser.parse_args(argv) # Load experiment setting assert isinstance(opts, object) config = NetConfig(opts.config) batch_size = config.hyperparameters['batch_size'] max_iterations = config.hyperparameters['max_iterations'] train_loader_a = get_data_loader(config.datasets['train_a'], batch_size) train_loader_b = get_data_loader(config.datasets['train_b'], batch_size) train_loader_c = get_data_loader(config.datasets['train_c'], batch_size) train_loader_d = get_data_loader(config.datasets['train_d'], batch_size) trainer = [] trainer = init_trainer(trainer, config) print("============ DISCRIMINATOR ==============") print(trainer.dis) print("============ GENERATOR ==============") print(trainer.gen) # Set up for warm start if opts.warm_start == 1: gen_ab = None gen_cd = None dis_ab = None dis_cd = None (gen_ab, gen_cd, dis_ab, dis_cd) = init_warm_start_models(gen_ab, gen_cd, dis_ab, dis_cd, config) # If not warm starting check if resume training iterations = 0 if opts.resume == 1 and opts.warm_start == 0: iterations = trainer.resume(config.snapshot_prefix) trainer.cuda(opts.gpu) # Warm start if opts.warm_start == 1: print("============ GENERATOR AB ==============") print(gen_ab) print("============ GENERATOR CD ==============") print(gen_cd) print("============ DISCRIMINATOR AB ==============") print(dis_ab) print("============ DISCRIMINATOR CD ==============") print(dis_cd) dirname = os.path.dirname(config.snapshot_prefix) model_path = os.path.join(dirname, opts.gen_ab) gen_ab.load_state_dict(torch.load(model_path)) print("Pre trained generator ab loaded from: {}".format(model_path)) model_path = os.path.join(dirname, opts.gen_cd) gen_cd.load_state_dict(torch.load(model_path)) print("Pre trained generator cd loaded from: {}".format(model_path)) gen_ab.cuda(opts.gpu) gen_cd.cuda(opts.gpu) model_path = os.path.join(dirname, opts.dis_ab) dis_ab.load_state_dict(torch.load(model_path)) print("Pre trained discriminaor ab loaded from: {}".format(model_path)) model_path = os.path.join(dirname, opts.dis_cd) dis_cd.load_state_dict(torch.load(model_path)) print("Pre trained generator cd loaded from: {}".format(model_path)) dis_ab.cuda(opts.gpu) dis_cd.cuda(opts.gpu) # Warm start init trainer.dis.model_A = dis_ab.model_A trainer.dis.model_B = dis_ab.model_B trainer.dis.model_C = dis_cd.model_A trainer.dis.model_D = dis_cd.model_B trainer.gen.encode_A = gen_ab.encode_A trainer.gen.encode_B = gen_ab.encode_B trainer.gen.encode_C = gen_cd.encode_A trainer.gen.encode_D = gen_cd.encode_B trainer.gen.decode_A = gen_ab.decode_A trainer.gen.decode_B = gen_ab.decode_B trainer.gen.decode_C = gen_cd.decode_A trainer.gen.decode_D = gen_cd.decode_B # Shared blocks - take mean of two original models # Functions inspired from this thread # https://discuss.pytorch.org/t/running-average-of-parameters/902/2 def flatten_params(model1, model2): p1 = torch.cat( [param.data.view(-1) for param in model1.parameters()], 0) p2 = torch.cat( [param.data.view(-1) for param in model2.parameters()], 0) return (p1, p2) def load_params(flattened_params, model): offset = 0 for param in model.parameters(): fp1 = flattened_params[0][offset:offset + param.nelement()] fp2 = flattened_params[1][offset:offset + param.nelement()] fpjoint = fp1 + fp2 fpjoint = torch.div(fpjoint, 2.0) param.data.copy_(fpjoint).view(param.size()) offset += param.nelement() model_S_new = flatten_params(dis_ab.model_S, dis_cd.model_S) load_params(model_S_new, trainer.dis.model_S) gen_enc_new = flatten_params(gen_ab.enc_shared, gen_cd.enc_shared) load_params(gen_enc_new, trainer.gen.enc_shared) gen_dec_new = flatten_params(gen_ab.dec_shared, gen_cd.dec_shared) load_params(gen_dec_new, trainer.gen.dec_shared) print("Initialized model with params from separately trained models") # print("============ DISCRIMINATOR ==============") # print(trainer.dis) # print("============ GENERATOR ==============") # print(trainer.gen) ###################################################################################################################### # Setup logger and repare image outputs train_writer = tensorboard.FileWriter( "%s/%s" % (opts.log, os.path.splitext(os.path.basename(opts.config))[0])) image_directory, snapshot_directory = prepare_snapshot_and_image_folder( config.snapshot_prefix, iterations, config.image_save_iterations) for ep in range(0, MAX_EPOCHS): for it, (images_a, images_b, images_c, images_d) in enumerate( itertools.izip(train_loader_a, train_loader_b, train_loader_c, train_loader_d)): if images_a.size(0) != batch_size or images_b.size( 0) != batch_size or images_c.size( 0) != batch_size or images_d.size(0) != batch_size: continue images_a = Variable(images_a.cuda(opts.gpu)) images_b = Variable(images_b.cuda(opts.gpu)) images_c = Variable(images_c.cuda(opts.gpu)) images_d = Variable(images_d.cuda(opts.gpu)) # Main training code trainer.dis_update(images_a, images_b, images_c, images_d, config.hyperparameters) image_outputs = trainer.gen_update(images_a, images_b, images_c, images_d, config.hyperparameters) assembled_images = trainer.assemble_outputs( images_a, images_b, images_c, images_d, image_outputs) assembled_dbl_loop_images = trainer.assemble_double_loop_outputs( images_a, images_b, images_c, images_d, image_outputs) # print(assembled_images.data.shape) # print(assembled_dbl_loop_images.data.shape) # Dump training stats in log file if (iterations + 1) % config.display == 0: write_loss(iterations, max_iterations, trainer, train_writer) if (iterations + 1) % config.image_save_iterations == 0: img_filename = '%s/gen_%08d.jpg' % (image_directory, iterations + 1) torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=2) dbl_img_filename = '%s/gen_dbl_%08d.jpg' % (image_directory, iterations + 1) torchvision.utils.save_image( assembled_dbl_loop_images.data / 2 + 0.5, dbl_img_filename, nrow=2) write_html(snapshot_directory + "/index.html", iterations + 1, config.image_save_iterations, image_directory) elif (iterations + 1) % config.image_display_iterations == 0: img_filename = '%s/gen.jpg' % (image_directory) torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=2) dbl_img_filename = '%s/gen_dbl.jpg' % (image_directory) torchvision.utils.save_image( assembled_dbl_loop_images.data / 2 + 0.5, dbl_img_filename, nrow=2) # Save network weights if (iterations + 1) % config.snapshot_save_iterations == 0: trainer.save(config.snapshot_prefix, iterations) iterations += 1 if iterations >= max_iterations: return