Esempio n. 1
0
 def __init__(self, batch_size, frequent=50):
     self.batch_size = batch_size
     self.frequent = frequent
     self.init = False
     self.tic = 0
     self.last_count = 0
     self.summary_writer = tb.FileWriter('./logs/')
Esempio n. 2
0
 def __init__(self,
              batch_size: int,
              output_folder: str,
              optimized_metric: str = C.PERPLEXITY,
              use_tensorboard: bool = False,
              cp_decoder: Optional[checkpoint_decoder.CheckpointDecoder] = None) -> None:
     self.output_folder = output_folder
     # stores dicts of metric names & values for each checkpoint
     self.metrics = []  # type: List[Dict]
     self.metrics_filename = os.path.join(output_folder, C.METRICS_NAME)
     self.best_checkpoint = 0
     self.start_tic = time.time()
     self.summary_writer = None
     if use_tensorboard:
         import tensorboard  # pylint: disable=import-error
         log_dir = os.path.join(output_folder, C.TENSORBOARD_NAME)
         if os.path.exists(log_dir):
             logger.info("Deleting existing tensorboard log dir %s", log_dir)
             shutil.rmtree(log_dir)
         logger.info("Logging training events for Tensorboard at '%s'", log_dir)
         self.summary_writer = tensorboard.FileWriter(log_dir)
     self.cp_decoder = cp_decoder
     self.ctx = mp.get_context('spawn')  # type: ignore
     self.decoder_metric_queue = self.ctx.Queue()
     self.decoder_process = None  # type: Optional[mp.Process]
     utils.check_condition(optimized_metric in C.METRICS, "Unsupported metric: %s" % optimized_metric)
     if optimized_metric == C.BLEU:
         utils.check_condition(self.cp_decoder is not None, "%s requires CheckpointDecoder" % C.BLEU)
     self.optimized_metric = optimized_metric
     self.validation_best = C.METRIC_WORST[self.optimized_metric]
     logger.info("Early stopping by optimizing '%s'", self.optimized_metric)
     self.tic = 0
Esempio n. 3
0
 def __init__(self, batch_size, frequent=50):
     self.batch_size = batch_size
     self.frequent = frequent
     self.init = False
     self.tic = 0
     self.last_count = 0
     self.summary_writer = tb.FileWriter(
         os.path.join(config.output_path, 'tb_logs'))
Esempio n. 4
0
 def __init__(self,
              batch_size: int,
              output_folder: str,
              optimized_metric: str = C.PERPLEXITY,
              use_tensorboard: bool = False,
              checkpoint_decoder: Optional[
                  sockeye.checkpoint_decoder.CheckpointDecoder] = None,
              num_concurrent_decodes: int = 1) -> None:
     self.metrics = [
     ]  # stores dicts of metric names & values for each checkpoint
     self.metrics_filename = os.path.join(output_folder, C.METRICS_NAME)
     open(self.metrics_filename, 'w').close()  # clear metrics file
     self.best_checkpoint = 0
     self.start_tic = time.time()
     self.summary_writer = None
     if use_tensorboard:
         import tensorboard  # pylint: disable=import-error
         log_dir = os.path.join(output_folder, C.TENSORBOARD_NAME)
         if os.path.exists(log_dir):
             logger.info("Deleting existing tensorboard log dir %s",
                         log_dir)
             shutil.rmtree(log_dir)
         logger.info("Logging training events for Tensorboard at '%s'",
                     log_dir)
         self.summary_writer = tensorboard.FileWriter(log_dir)
     self.checkpoint_decoder = checkpoint_decoder
     self.ctx = mp.get_context('spawn')
     self.num_concurrent_decodes = num_concurrent_decodes
     self.decoder_metric_queue = self.ctx.Queue()
     self.decoder_processes = []
     # TODO(fhieber): MXNet Speedometer uses root logger. How to fix this?
     self.speedometer = mx.callback.Speedometer(
         batch_size=batch_size,
         frequent=C.MEASURE_SPEED_EVERY,
         auto_reset=False)
     self.optimized_metric = optimized_metric
     if self.optimized_metric == C.PERPLEXITY:
         self.minimize = True
         self.validation_best = np.inf
     elif self.optimized_metric == C.ACCURACY:
         self.minimize = False
         self.validation_best = -np.inf
     elif self.optimized_metric == C.BLEU:
         assert self.checkpoint_decoder is not None, "BLEU requires CheckpointDecoder"
         self.minimize = False
         self.validation_best = -np.inf
     else:
         raise ValueError("No other metrics supported")
     logger.info("Early stopping by optimizing '%s' (minimize=%s)",
                 self.optimized_metric, self.minimize)
     self.tic = 0
Esempio n. 5
0
    def __init__(self, logging_dir, logfile_name, print_freq=10):

        self.log_dir = logging_dir
        self.print_freq = print_freq

        if not os.path.isdir(logging_dir):
            os.makedirs(logging_dir)

        self.summary_writer = tensorboard.FileWriter(logdir=logging_dir)

        # standard logger to print to terminal
        logfile = osp.join(logging_dir, 'log.txt')
        stdout = Logger(logfile)
        sys.stdout = stdout
Esempio n. 6
0
 def __init__(self,
              batch_size: int,
              output_folder: str,
              optimized_metric: str = C.PERPLEXITY,
              use_tensorboard: bool = False,
              cp_decoder: Optional[
                  checkpoint_decoder.CheckpointDecoder] = None,
              num_concurrent_decodes: int = 1) -> None:
     self.output_folder = output_folder
     self.metrics = [
     ]  # stores dicts of metric names & values for each checkpoint
     self.metrics_filename = os.path.join(output_folder, C.METRICS_NAME)
     self.best_checkpoint = 0
     self.start_tic = time.time()
     self.summary_writer = None
     if use_tensorboard:
         import tensorboard  # pylint: disable=import-error
         log_dir = os.path.join(output_folder, C.TENSORBOARD_NAME)
         if os.path.exists(log_dir):
             logger.info("Deleting existing tensorboard log dir %s",
                         log_dir)
             shutil.rmtree(log_dir)
         logger.info("Logging training events for Tensorboard at '%s'",
                     log_dir)
         self.summary_writer = tensorboard.FileWriter(log_dir)
     self.cp_decoder = cp_decoder
     self.ctx = mp.get_context('spawn')
     self.num_concurrent_decodes = num_concurrent_decodes
     self.decoder_metric_queue = self.ctx.Queue()
     self.decoder_processes = []
     # TODO(fhieber): MXNet Speedometer uses root logger. How to fix this?
     self.speedometer = mx.callback.Speedometer(
         batch_size=batch_size,
         frequent=C.MEASURE_SPEED_EVERY,
         auto_reset=False)
     utils.check_condition(optimized_metric in C.METRICS,
                           "Unsupported metric: %s" % optimized_metric)
     if optimized_metric == C.BLEU:
         utils.check_condition(self.cp_decoder is not None,
                               "%s requires CheckpointDecoder" % C.BLEU)
     self.optimized_metric = optimized_metric
     self.validation_best = C.METRIC_WORST[self.optimized_metric]
     logger.info("Early stopping by optimizing '%s'", self.optimized_metric)
     self.tic = 0
def main(argv):
    (opts, args) = parser.parse_args(argv)

    # Load experiment setting
    assert isinstance(opts, object)
    config = NetConfig(opts.config)

    batch_size = config.hyperparameters['batch_size']
    max_iterations = config.hyperparameters['max_iterations']

    trainer = []
    exec("trainer=%s(config.hyperparameters)" %
         config.hyperparameters['trainer'])
    trainer.cuda(opts.gpu)

    iterations = 0

    train_writer = tensorboard.FileWriter(
        "%s/%s" %
        (opts.log, os.path.splitext(os.path.basename(opts.config))[0]))
    snapshot_directory = prepare_snapshot_folder(config.snapshot_prefix)
    image_directory, snapshot_directory = prepare_snapshot_and_image_folder(
        config.snapshot_prefix, iterations, config.image_save_iterations)

    # Load datasets
    train_loader_a = get_data_loader(config.datasets['train_a'], batch_size)
    train_loader_b = get_data_loader(config.datasets['train_b'], batch_size)
    test_loader_b = get_data_loader(
        config.datasets['test_b'],
        batch_size=config.hyperparameters['test_batch_size'])

    best_score = 0
    for ep in range(0, MAX_EPOCHS):
        for it, ((images_a, labels_a), (images_b, labels_b)) in enumerate(
                itertools.izip(train_loader_a, train_loader_b)):
            if images_a.size(0) != batch_size or images_b.size(
                    0) != batch_size:
                continue
            trainer.dis.train()
            images_a = Variable(images_a.cuda(opts.gpu))
            labels_a = Variable(labels_a.cuda(opts.gpu)).view(images_a.size(0))
            images_b = Variable(images_b.cuda(opts.gpu))
            # Main training code
            trainer.dis_update(images_a, labels_a, images_b,
                               config.hyperparameters)
            x_aa, x_ba, x_ab, x_bb = trainer.gen_update(
                images_a, images_b, config.hyperparameters)

            # Dump training stats in log file
            if (iterations + 1) % config.display == 0:
                write_loss(iterations, max_iterations, trainer, train_writer)

            # # Save network weights
            if (iterations + 1) % config.snapshot_save_iterations == 0:
                trainer.dis.eval()
                score = 0
                num_samples = 0
                for tit, (test_images_b,
                          test_labels_b) in enumerate(test_loader_b):
                    test_images_b = Variable(test_images_b.cuda(opts.gpu))
                    test_labels_b = Variable(test_labels_b.cuda(
                        opts.gpu)).view(test_images_b.size(0))
                    cls_outputs = trainer.dis.classify_b(test_images_b)
                    _, cls_predicts = torch.max(cls_outputs.data, 1)
                    cls_acc = (cls_predicts == test_labels_b.data).sum()
                    score += cls_acc
                    num_samples += test_images_b.size(0)
                score /= 1.0 * num_samples
                print('Classification accuracy for Test_B dataset: %4.4f' %
                      score)
                if score > best_score:
                    best_score = score
                    trainer.save(config.snapshot_prefix, iterations=-1)
                train_writer.add_summary(summary.scalar('test_b_acc', score),
                                         iterations + 1)
                img_name = image_directory + "/images_a.jpg"
                torchvision.utils.save_image(images_a.data / 2 + 0.5, img_name)
                img_name = image_directory + "/images_b.jpg"
                torchvision.utils.save_image(images_b.data / 2 + 0.5, img_name)
                img_name = image_directory + "/x_aa.jpg"
                torchvision.utils.save_image(x_aa.data / 2 + 0.5, img_name)
                img_name = image_directory + "/x_ab.jpg"
                torchvision.utils.save_image(x_ab.data / 2 + 0.5, img_name)
                img_name = image_directory + "/x_bb.jpg"
                torchvision.utils.save_image(x_bb.data / 2 + 0.5, img_name)
                img_name = image_directory + "/x_ba.jpg"
                torchvision.utils.save_image(x_ba.data / 2 + 0.5, img_name)

            iterations += 1
            if iterations == max_iterations:
                return
Esempio n. 8
0
def train(args):
    text_field = data.Field(lower=args.lower,
                            include_lengths=True,
                            batch_first=True)
    label_field = data.Field(sequential=False)

    filter_pred = None
    if not args.fine_grained:
        filter_pred = lambda ex: ex.label != 'neutral'
    dataset_splits = datasets.SST.splits(root='./data/sst',
                                         text_field=text_field,
                                         label_field=label_field,
                                         fine_grained=args.fine_grained,
                                         train_subtrees=True,
                                         filter_pred=filter_pred)

    text_field.build_vocab(*dataset_splits, vectors=args.pretrained)
    label_field.build_vocab(*dataset_splits)

    logging.info(f'Initialize with pretrained vectors: {args.pretrained}')
    logging.info(f'Number of classes: {len(label_field.vocab)}')

    train_loader, valid_loader, _ = data.BucketIterator.splits(
        datasets=dataset_splits, batch_size=args.batch_size, device=args.gpu)

    num_classes = len(label_field.vocab)
    model = SSTModel(num_classes=num_classes,
                     num_words=len(text_field.vocab),
                     word_dim=args.word_dim,
                     hidden_dim=args.hidden_dim,
                     clf_hidden_dim=args.clf_hidden_dim,
                     clf_num_layers=args.clf_num_layers,
                     use_leaf_rnn=args.leaf_rnn,
                     bidirectional=args.bidirectional,
                     intra_attention=args.intra_attention,
                     use_batchnorm=args.batchnorm,
                     dropout_prob=args.dropout)
    if args.pretrained:
        model.word_embedding.weight.data.set_(text_field.vocab.vectors)
    if args.fix_word_embedding:
        logging.info('Will not update word embeddings')
        model.word_embedding.weight.requires_grad = False
    if args.gpu > -1:
        logging.info(f'Using GPU {args.gpu}')
        model.cuda(args.gpu)
    params = [p for p in model.parameters() if p.requires_grad]
    if args.optimizer == 'adam':
        optimizer_class = optim.Adam
    elif args.optimizer == 'adagrad':
        optimizer_class = optim.Adagrad
    elif args.optimizer == 'adadelta':
        optimizer_class = optim.Adadelta
    optimizer = optimizer_class(params=params, weight_decay=args.l2reg)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                               mode='max',
                                               factor=0.5,
                                               patience=20 *
                                               args.halve_lr_every,
                                               verbose=True)
    criterion = nn.CrossEntropyLoss()

    train_summary_writer = tensorboard.FileWriter(logdir=os.path.join(
        args.save_dir, 'log', 'train'),
                                                  flush_secs=10)
    valid_summary_writer = tensorboard.FileWriter(logdir=os.path.join(
        args.save_dir, 'log', 'valid'),
                                                  flush_secs=10)

    def run_iter(batch, is_training):
        model.train(is_training)
        words, length = batch.text
        label = batch.label
        length = wrap_with_variable(batch.text[1],
                                    volatile=not is_training,
                                    gpu=args.gpu)
        logits = model(words=words, length=length)
        label_pred = logits.max(1)[1]
        accuracy = torch.eq(label, label_pred).float().mean()
        loss = criterion(input=logits, target=label)
        if is_training:
            optimizer.zero_grad()
            loss.backward()
            clip_grad_norm(parameters=params, max_norm=5)
            optimizer.step()
        return loss, accuracy

    def add_scalar_summary(summary_writer, name, value, step):
        value = unwrap_scalar_variable(value)
        summ = summary.scalar(name=name, scalar=value)
        summary_writer.add_summary(summary=summ, global_step=step)

    num_train_batches = len(train_loader)
    validate_every = num_train_batches // 20
    best_vaild_accuacy = 0
    iter_count = 0
    for batch_iter, train_batch in enumerate(train_loader):
        train_loss, train_accuracy = run_iter(batch=train_batch,
                                              is_training=True)
        iter_count += 1
        add_scalar_summary(summary_writer=train_summary_writer,
                           name='loss',
                           value=train_loss,
                           step=iter_count)
        add_scalar_summary(summary_writer=train_summary_writer,
                           name='accuracy',
                           value=train_accuracy,
                           step=iter_count)

        if (batch_iter + 1) % validate_every == 0:
            valid_loss_sum = valid_accuracy_sum = 0
            num_valid_batches = len(valid_loader)
            for valid_batch in valid_loader:
                valid_loss, valid_accuracy = run_iter(batch=valid_batch,
                                                      is_training=False)
                valid_loss_sum += unwrap_scalar_variable(valid_loss)
                valid_accuracy_sum += unwrap_scalar_variable(valid_accuracy)
            valid_loss = valid_loss_sum / num_valid_batches
            valid_accuracy = valid_accuracy_sum / num_valid_batches
            add_scalar_summary(summary_writer=valid_summary_writer,
                               name='loss',
                               value=valid_loss,
                               step=iter_count)
            add_scalar_summary(summary_writer=valid_summary_writer,
                               name='accuracy',
                               value=valid_accuracy,
                               step=iter_count)
            scheduler.step(valid_accuracy)
            progress = train_loader.epoch
            logging.info(f'Epoch {progress:.2f}: '
                         f'valid loss = {valid_loss:.4f}, '
                         f'valid accuracy = {valid_accuracy:.4f}')
            if valid_accuracy > best_vaild_accuacy:
                best_vaild_accuacy = valid_accuracy
                model_filename = (f'model-{progress:.2f}'
                                  f'-{valid_loss:.4f}'
                                  f'-{valid_accuracy:.4f}.pkl')
                model_path = os.path.join(args.save_dir, model_filename)
                torch.save(model.state_dict(), model_path)
                print(f'Saved the new best model to {model_path}')
            if progress > args.max_epoch:
                break
Esempio n. 9
0
def main(argv):
  (opts, args) = parser.parse_args(argv)
  # Load experiment setting
  assert isinstance(opts, object)
  config = NetConfig(opts.config)
  train_writer = tensorboard.FileWriter("%s/%s" % (opts.log,os.path.splitext(os.path.basename(opts.config))[0]))

  max_iterations = config.hyperparameters['max_iterations']
  batch_size = config.hyperparameters['batch_size']
  vae_enc_w = config.hyperparameters['vae_enc_w']
  vae_ll_w = config.hyperparameters['vae_ll_w']
  gan_w = config.hyperparameters['gan_w']
  ch = config.hyperparameters['ch']
  gen_net = config.hyperparameters['gen']
  dis_net = config.hyperparameters['dis']

  image_size = config.datasets['a']['image_size']
  input_dims = list()
  input_dims.append(config.datasets['a']['channels'])
  input_dims.append(config.datasets['b']['channels'])

  # Load datasets
  train_loader_a = get_data_loader(config.datasets['a'], batch_size)
  train_loader_b = get_data_loader(config.datasets['b'], batch_size)
  train_loader_a2 = get_data_loader(config.datasets['a'], batch_size)
  train_loader_b2 = get_data_loader(config.datasets['b'], batch_size)
  trainer = UNITTrainer(gen_net, dis_net, batch_size, ch, input_dims, image_size, opts.lr)

  iterations = 0
  if opts.resume == 1:
    iterations = resume(trainer, config.snapshot_prefix)

  trainer.cuda(opts.gpu)

  directory = os.path.dirname(config.snapshot_prefix)
  image_directory = directory + "/images"
  if not os.path.exists(directory):
    os.makedirs(directory)
  if not os.path.exists(image_directory):
    os.makedirs(image_directory)

  write_html(directory + "/index.html", iterations + 1, config.image_save_iterations, image_directory, image_size)

  for ep in range(0, MAX_EPOCHS):
    for it, (images_a, images_b, images_a2, images_b2) in enumerate(itertools.izip(train_loader_a, train_loader_b, train_loader_a2, train_loader_b2)):
      if images_a.size(0) != batch_size or images_b.size(0) != batch_size:
        continue
      images_a = Variable(images_a.cuda(opts.gpu))
      images_b = Variable(images_b.cuda(opts.gpu))
      images_a2 = Variable(images_a2.cuda(opts.gpu))
      images_b2 = Variable(images_b2.cuda(opts.gpu))
      # Main training code
      trainer.dis_update(images_a, images_b, images_a2, images_b2)
      x_aa, x_ba, x_ab, x_bb = trainer.gen_update(images_a, images_b, gan_w, vae_ll_w, vae_enc_w)

      # Dump training stats in log file
      if (iterations+1) % config.display == 0:
        print("Iteration: %08d/%08d" %(iterations+1,max_iterations))
        members = [attr for attr in dir(trainer) \
                   if not callable(getattr(trainer, attr)) and not attr.startswith("__") and 'loss' in attr]
        for m in members:
          train_writer.add_summary(summary.scalar(m, getattr(trainer, m)), iterations + 1)

        members = [attr for attr in dir(trainer) \
                   if not callable(getattr(trainer, attr)) and not attr.startswith("__") and 'acc' in attr]
        for m in members:
          train_writer.add_summary(summary.scalar(m, getattr(trainer, m)), iterations + 1)

      # Save intermediate visualization results
      if (iterations+1) % config.image_save_iterations == 0:
        assembled_images = make_save_image(images_a[0:1,::], x_aa[0:1,::], x_ab[0:1,::], images_b[0:1,::], x_ba[0:1,::], x_bb[0:1,::])
        img_filename = '%s/gen_%08d.jpg' % (image_directory, iterations + 1)
        torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=1)
        write_html(directory + "/index.html", iterations + 1, config.image_save_iterations, image_directory, image_size)
      else:
        assembled_images = make_save_image(images_a[0:1,::], x_aa[0:1,::], x_ab[0:1,::], images_b[0:1,::],x_ba[0:1,::], x_bb[0:1,::])
        img_filename = '%s/gen.jpg' % (image_directory)
        torchvision.utils.save_image(assembled_images.data / 2 + 0.5, img_filename, nrow=1)

      # Save network weights
      if (iterations+1) % config.snapshot_save_iterations == 0:
        gen_filename = '%s_gen_%08d.pkl' % (config.snapshot_prefix, iterations + 1)
        dis_filename = '%s_dis_%08d.pkl' % (config.snapshot_prefix, iterations + 1)
        torch.save(trainer.gen.state_dict(), gen_filename)
        torch.save(trainer.dis.state_dict(), dis_filename)




      iterations += 1
      if iterations == max_iterations:
        return
Esempio n. 10
0
def main(argv):
    (opts, args) = parser.parse_args(argv)

    # Load experiment setting
    assert isinstance(opts, object)
    config = NetConfig(opts.config)

    batch_size = config.hyperparameters['batch_size']
    max_iterations = config.hyperparameters['max_iterations']

    train_loader_a = get_data_loader(config.datasets['train_a'], batch_size)
    train_loader_b = get_data_loader(config.datasets['train_b'], batch_size)

    trainer = []
    exec("trainer=%s(config.hyperparameters)" %
         config.hyperparameters['trainer'])
    # Check if resume training
    iterations = 0
    if opts.resume == 1:
        iterations = trainer.resume(config.snapshot_prefix)
    trainer.cuda(opts.gpu)

    ######################################################################################################################
    # Setup logger and repare image outputs
    train_writer = tensorboard.FileWriter(
        "%s/%s" %
        (opts.log, os.path.splitext(os.path.basename(opts.config))[0]))
    image_directory, snapshot_directory = prepare_snapshot_and_image_folder(
        config.snapshot_prefix, iterations, config.image_save_iterations)

    for ep in range(0, MAX_EPOCHS):
        for it, (images_a, images_b) in enumerate(
                itertools.izip(train_loader_a, train_loader_b)):
            if images_a.size(0) != batch_size or images_b.size(
                    0) != batch_size:
                continue
            images_a = Variable(images_a.cuda(opts.gpu))
            images_b = Variable(images_b.cuda(opts.gpu))

            # Main training code
            trainer.dis_update(images_a, images_b, config.hyperparameters)
            image_outputs = trainer.gen_update(images_a, images_b,
                                               config.hyperparameters)
            assembled_images = trainer.assemble_outputs(
                images_a, images_b, image_outputs)

            # Dump training stats in log file
            if (iterations + 1) % config.display == 0:
                write_loss(iterations, max_iterations, trainer, train_writer)

            if (iterations + 1) % config.image_save_iterations == 0:
                img_filename = '%s/gen_%08d.jpg' % (image_directory,
                                                    iterations + 1)
                torchvision.utils.save_image(assembled_images.data / 2 + 0.5,
                                             img_filename,
                                             nrow=1)
                write_html(snapshot_directory + "/index.html", iterations + 1,
                           config.image_save_iterations, image_directory)
            elif (iterations + 1) % config.image_display_iterations == 0:
                img_filename = '%s/gen.jpg' % (image_directory)
                torchvision.utils.save_image(assembled_images.data / 2 + 0.5,
                                             img_filename,
                                             nrow=1)

            # Save network weights
            if (iterations + 1) % config.snapshot_save_iterations == 0:
                trainer.save(config.snapshot_prefix, iterations)

            iterations += 1
            if iterations >= max_iterations:
                return
Esempio n. 11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--word-dim', type=int, default=300, help='size of word embeddings')
    parser.add_argument('--hidden-dim', type=int, default=300, help='number of hidden units per layer')
    parser.add_argument('--num-layers', type=int, default=1, help='number of layers in BiLSTM')
    parser.add_argument('--att-dim', type=int, default=350, help='number of attention unit')
    parser.add_argument('--att-hops', type=int, default=4, help='number of attention hops, for multi-hop attention model')
    parser.add_argument('--clf-hidden-dim', type=int, default=512, help='hidden (fully connected) layer size for classifier MLP')
    parser.add_argument('--clip', type=float, default=0.5, help='clip to prevent the too large grad in LSTM')
    parser.add_argument('--lr', type=float, default=.001, help='initial learning rate')
    parser.add_argument('--weight-decay', type=float, default=1e-5, help='weight decay rate per batch')
    parser.add_argument('--dropout', type=float, default=0.3)
    parser.add_argument('--max-epoch', type=int, default=8)
    parser.add_argument('--seed', type=int, default=666)
    parser.add_argument('--cuda', action='store_true', default=True)
    parser.add_argument('--optimizer', default='adam', choices=['adam', 'sgd'])
    parser.add_argument('--batch-size', type=int, default=32, help='batch size for training')
    parser.add_argument('--penalization-coeff', type=float, default=0.1, help='the penalization coefficient')
    parser.add_argument('--fix-word-embedding', action='store_true')


    parser.add_argument('--model-type', required=True, choices=['sa', 'avgblock', 'hard'])
    parser.add_argument('--data-type', required=True, choices=['age2', 'dbpedia', 'yahoo'])
    parser.add_argument('--data', required=True, help='pickle file obtained by dataset dump')
    parser.add_argument('--save-dir', type=str, required=True, help='path to save the final model')
    parser.add_argument('--block-size', type=int, default=-1, help='block size only when model-type is avgblock')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if torch.cuda.is_available():
        if not args.cuda:
            print("WARNING: You have a CUDA device, so you should probably run with --cuda")
        else:
            torch.cuda.manual_seed(args.seed)
    #######################################
    # a simple log file, the same content as stdout
    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s')
    logFormatter = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s')
    rootLogger = logging.getLogger()
    fileHandler = logging.FileHandler(os.path.join(args.save_dir, 'stdout.log'))
    fileHandler.setFormatter(logFormatter)
    rootLogger.addHandler(fileHandler)
    ########################################
    for k, v in vars(args).items():
        logging.info(k+':'+str(v))

    #####################################################################
    if args.data_type == 'age2':
        data = AGE2(datapath=args.data, batch_size=args.batch_size)
        num_classes = 5
    elif args.data_type == 'dbpedia':
        data = DBpedia(datapath=args.data, batch_size=args.batch_size)
        num_classes = 14
    elif args.data_type == 'yahoo':
        data = Yahoo(datapath=args.data, batch_size=args.batch_size)
        num_classes = 10
    else:
        raise Exception('Invalid argument data-type')
    #####################################################################
    if args.model_type == 'avgblock':
        assert args.block_size > 0
    #####################################################################


    tic = time.time()
    model = Classifier(
        dictionary=data,
        dropout=args.dropout,
        num_words=data.num_words,
        num_layers=args.num_layers,
        hidden_dim=args.hidden_dim,
        word_dim=args.word_dim,
        att_dim=args.att_dim,
        att_hops=args.att_hops,
        clf_hidden_dim=args.clf_hidden_dim,
        num_classes=num_classes,
        model_type=args.model_type,
        block_size=args.block_size,
    )
    print('It takes %.2f sec to build the model.' % (time.time() - tic))
    logging.info(model)

    model.word_embedding.weight.data.set_(data.weight)
    if args.fix_word_embedding:
        model.word_embedding.weight.requires_grad = False
    if args.cuda:
        model = model.cuda()
    ''' count parameters
    num_params = sum(np.prod(p.size()) for p in model.parameters())
    num_embedding_params = np.prod(model.word_embedding.weight.size())
    print('# of parameters: %d' % num_params)
    print('# of word embedding parameters: %d' % num_embedding_params)
    print('# of parameters (excluding word embeddings): %d' % (num_params - num_embedding_params))
    '''
    if args.optimizer == 'adam':
        optimizer_class = optim.Adam
    elif args.optimizer == 'sgd':
        optimizer_class = optim.SGD
    else:
        raise Exception('For other optimizers, please add it yourself. supported ones are: SGD and Adam.')
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optimizer_class(params=params, lr=args.lr, weight_decay=args.weight_decay)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='max', factor=0.5, patience=10, verbose=True)
    criterion = nn.CrossEntropyLoss()
    # Identity matrix for each batch
    I = Variable(torch.eye(args.att_hops).unsqueeze(0).expand(args.batch_size, -1, -1))
    if args.cuda:
        I = I.cuda()
    trpack = {
            'model': model,
            'params': params, 
            'criterion': criterion, 
            'optimizer': optimizer,
            'I': I,
            }

    train_summary_writer = tensorboard.FileWriter(
        logdir=os.path.join(args.save_dir, 'log', 'train'), flush_secs=10)
    valid_summary_writer = tensorboard.FileWriter(
        logdir=os.path.join(args.save_dir, 'log', 'valid'), flush_secs=10)
    tsw, vsw = train_summary_writer, valid_summary_writer

    logging.info('number of train batches: %d' % data.train_num_batch)
    validate_every = data.train_num_batch // 10
    best_vaild_accuacy = 0
    iter_count = 0
    tic = time.time()

    for epoch_num in range(args.max_epoch):
        for batch_iter, train_batch in enumerate(data.train_minibatch_generator()):
            progress = epoch_num + batch_iter / data.train_num_batch 
            iter_count += 1

            train_loss, train_accuracy = train_iter(args, train_batch, **trpack)
            add_scalar_summary(tsw, 'loss', train_loss, iter_count)
            add_scalar_summary(tsw, 'acc', train_accuracy, iter_count)

            if (batch_iter + 1) % (data.train_num_batch // 100) == 0:
                tac = (time.time() - tic) / 60
                print('   %.2f minutes\tprogress: %.2f' % (tac, progress))
            if (batch_iter + 1) % validate_every == 0:
                correct_sum = 0
                for valid_batch in data.dev_minibatch_generator():
                    correct, supplements = eval_iter(args, model, valid_batch)
                    correct_sum += unwrap_scalar_variable(correct)
                valid_accuracy = correct_sum / data.dev_size 
                scheduler.step(valid_accuracy)
                add_scalar_summary(vsw, 'acc', valid_accuracy, iter_count)
                logging.info('Epoch %.2f: valid accuracy = %.4f' % (progress, valid_accuracy))
                if valid_accuracy > best_vaild_accuacy:
                    correct_sum = 0
                    for test_batch in data.test_minibatch_generator():
                        correct, supplements = eval_iter(args, model, test_batch)
                        correct_sum += unwrap_scalar_variable(correct)
                    test_accuracy = correct_sum / data.test_size
                    best_vaild_accuacy = valid_accuracy
                    model_filename = ('model-%.2f-%.4f-%.4f.pkl' % (progress, valid_accuracy, test_accuracy))
                    model_path = os.path.join(args.save_dir, model_filename)
                    torch.save(model.state_dict(), model_path)
                    print('Saved the new best model to %s' % model_path)
Esempio n. 12
0
def main(argv):
    (opts, args) = parser.parse_args(argv)

    # Load experiment setting
    assert isinstance(opts, object)
    config = NetConfig(opts.config)

    batch_size = config.hyperparameters['batch_size']
    max_iterations = config.hyperparameters['max_iterations']

    # multi-domain loaders
    train_loaders = []
    for i, train_x in enumerate(config.datasets.keys()):
        print('Domain %d = %s' % (i, train_x))
        train_loader = get_data_loader(config.datasets[train_x], batch_size)
        train_loaders.append(train_loader)

    # exec initialization of trainer
    trainer = []
    exec('trainer = %s(config.hyperparameters)' %
         config.hyperparameters['trainer'])

    iterations = 0
    if opts.resume == 1:
        iterations = trainer.resume(config.snapshot_prefix)
    trainer.cuda(opts.gpu)

    ###### setup logger and repare image outputs
    train_writer = tensorboard.FileWriter(
        "%s/%s" %
        (opts.log, os.path.splitext(os.path.basename(opts.config))[0]))
    image_directory, snapshot_directory = prepare_snapshot_and_image_folder(
        config.snapshot_prefix, iterations, config.image_save_iterations)

    domain_number = len(train_loaders)
    for ep in range(0, MAX_EPOCHS):
        for it, images in enumerate(itertools.izip(*train_loaders)):
            images_list = []
            for image in images:
                im = Variable(image.cuda(opts.gpu))
                images_list.append(im)
                #print('im shape = ', im.size())

            assembled_list = []
            for i in xrange(domain_number):
                for j in xrange(domain_number):
                    # first:  all of them VAE pass
                    if i == j:
                        continue
                        #trainer.vae_update(images_list[i], images_list[j], config.hyperparameters, i, j)
                    # second: all crossing pairs for GAN, let the lambda judge the
                    else:  # i != j
                        trainer.dis_update(images_list[i], images_list[j],
                                           config.hyperparameters, i, j)
                        image_outputs = trainer.gen_update(
                            images_list[i], images_list[j],
                            config.hyperparameters, i, j)

                        assembled = trainer.assemble_outputs(
                            images_list[i], images_list[j], image_outputs)
                        assembled_list.append(assembled)

            assembled_images = torch.cat(assembled_list, 2)
            # Dump training stats in log file

            for t in xrange(domain_number * domain_number - domain_number):
                if (iterations + 1) % config.display == 0:
                    write_loss(iterations, max_iterations, trainer,
                               train_writer)
                if (iterations + 1) % config.image_save_iterations == 0:
                    img_filename = '%s/gen_%08d.jpg' % (image_directory,
                                                        iterations + 1)
                    torchvision.utils.save_image(assembled_images.data / 2 +
                                                 0.5,
                                                 img_filename,
                                                 nrow=1)
                    write_html(snapshot_directory + '/index.html',
                               iterations + 1, config.image_save_iterations,
                               image_directory)
                elif (iterations + 1) % config.image_display_iterations == 0:
                    img_filename = '%s/gen.jpg' % (image_directory)
                    torchvision.utils.save_image(assembled_images.data / 2 +
                                                 0.5,
                                                 img_filename,
                                                 nrow=1)

                if (iterations + 1) % config.snapshot_save_iterations == 0:
                    trainer.save(config.snapshot_prefix, iterations)

                iterations += 1
                if iterations >= max_iterations:
                    return
Esempio n. 13
0
def train(args):
    experiment_name = (f'w{args.word_dim}_lh{args.lstm_hidden_dims}'
                       f'_mh{args.mlp_hidden_dim}_ml{args.mlp_num_layers}'
                       f'_d{args.dropout_prob}')
    save_dir = os.path.join(args.save_root_dir, experiment_name)
    train_summary_writer = tensorboard.FileWriter(
        logdir=os.path.join(save_dir, 'log', 'train'))
    valid_summary_writer = tensorboard.FileWriter(
        logdir=os.path.join(save_dir, 'log', 'valid'))

    lstm_hidden_dims = [int(d) for d in args.lstm_hidden_dims.split(',')]

    logging.info('Loading data...')
    text_field = data.Field(lower=True,
                            include_lengths=True,
                            batch_first=False)
    label_field = data.Field(sequential=False)
    if not os.path.exists(args.data_dir):
        os.makedirs(args.data_dir)
    dataset_splits = datasets.SNLI.splits(text_field=text_field,
                                          label_field=label_field,
                                          root=args.data_dir)
    text_field.build_vocab(*dataset_splits, vectors=args.pretrained)
    label_field.build_vocab(*dataset_splits)
    train_loader, valid_loader, _ = data.BucketIterator.splits(
        datasets=dataset_splits, batch_size=args.batch_size, device=args.gpu)

    logging.info('Building model...')
    num_classes = len(label_field.vocab)
    num_words = len(text_field.vocab)
    model = NLIModel(num_words=num_words,
                     word_dim=args.word_dim,
                     lstm_hidden_dims=lstm_hidden_dims,
                     mlp_hidden_dim=args.mlp_hidden_dim,
                     mlp_num_layers=args.mlp_num_layers,
                     num_classes=num_classes,
                     dropout_prob=args.dropout_prob)
    num_total_params = sum(np.prod(p.size()) for p in model.parameters())
    num_word_embedding_params = np.prod(model.word_embedding.weight.size())
    if args.pretrained:
        model.word_embedding.weight.data.set_(text_field.vocab.vectors)
    model.cuda(args.gpu)

    logging.info(f'# of total parameters: {num_total_params}')
    logging.info(f'# of intrinsic parameters: '
                 f'{num_total_params - num_word_embedding_params}')
    logging.info(f'# of word embedding parameters: '
                 f'{num_word_embedding_params}')

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(params=model.parameters(), lr=2e-4)
    # Halve LR every two epochs
    scheduler = lr_scheduler.StepLR(optimizer=optimizer,
                                    step_size=2,
                                    gamma=0.5)

    def run_iter(batch, is_training):
        pre_input, pre_lengths = batch.premise
        hyp_input, hyp_lengths = batch.hypothesis
        label = batch.label
        model.train(is_training)
        model_output = model(pre_input=pre_input,
                             pre_lengths=pre_lengths,
                             hyp_input=hyp_input,
                             hyp_lengths=hyp_lengths)
        label_pred = model_output.max(1)[1]
        loss = criterion(input=model_output, target=label)
        accuracy = torch.eq(label, label_pred).float().mean()
        if is_training:
            model.zero_grad()
            loss.backward()
            optimizer.step()
        return loss, accuracy

    def add_scalar_summary(summary_writer, name, value, step):
        summ = summary.scalar(name=name, scalar=value)
        summary_writer.add_summary(summary=summ, global_step=step)

    logging.info('Training starts!')
    cur_epoch = 0
    for iter_count, train_batch in enumerate(train_loader):
        train_loss, train_accuracy = run_iter(batch=train_batch,
                                              is_training=True)
        add_scalar_summary(summary_writer=train_summary_writer,
                           name='loss',
                           value=train_loss.data[0],
                           step=iter_count)
        add_scalar_summary(summary_writer=train_summary_writer,
                           name='accuracy',
                           value=train_accuracy.data[0],
                           step=iter_count)

        if int(train_loader.epoch) > cur_epoch:
            cur_epoch = int(train_loader.epoch)
            num_valid_batches = len(valid_loader)
            valid_loss_sum = valid_accracy_sum = 0
            for valid_batch in valid_loader:
                valid_loss, valid_accuracy = run_iter(batch=valid_batch,
                                                      is_training=False)
                valid_loss_sum += valid_loss.data[0]
                valid_accracy_sum += valid_accuracy.data[0]
            valid_loss = valid_loss_sum / num_valid_batches
            valid_accuracy = valid_accracy_sum / num_valid_batches
            add_scalar_summary(summary_writer=valid_summary_writer,
                               name='loss',
                               value=valid_loss,
                               step=iter_count)
            add_scalar_summary(summary_writer=valid_summary_writer,
                               name='accuracy',
                               value=valid_accuracy,
                               step=iter_count)
            progress = train_loader.epoch
            logging.info(f'Epoch {progress:.2f}: '
                         f'valid loss = {valid_loss:.4f}, '
                         f'valid accuracy = {valid_accuracy:.4f}')
            model_filename = (f'model-{progress:.2f}'
                              f'-{valid_loss:.4f}'
                              f'-{valid_accuracy:.4f}.pkl')
            model_path = os.path.join(save_dir, model_filename)
            torch.save(model.state_dict(), model_path)
            logging.info(f'Saved the model to: {model_path}')
            scheduler.step()
            logging.info(f'Update learning rate to: {scheduler.get_lr()[0]}')

            if progress > args.max_epoch:
                break
Esempio n. 14
0
def train(args):
    with open(args.train_data, 'rb') as f:
        train_dataset: SNLIDataset = pickle.load(f)
    with open(args.valid_data, 'rb') as f:
        valid_dataset: SNLIDataset = pickle.load(f)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=2,
                              collate_fn=train_dataset.collate,
                              pin_memory=True)
    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=2,
                              collate_fn=valid_dataset.collate,
                              pin_memory=True)
    word_vocab = train_dataset.word_vocab
    label_vocab = train_dataset.label_vocab

    model = SNLIModel(num_classes=len(label_vocab),
                      num_words=len(word_vocab),
                      word_dim=args.word_dim,
                      hidden_dim=args.hidden_dim,
                      clf_hidden_dim=args.clf_hidden_dim,
                      clf_num_layers=args.clf_num_layers,
                      use_leaf_rnn=args.leaf_rnn,
                      use_batchnorm=args.batchnorm,
                      intra_attention=args.intra_attention,
                      dropout_prob=args.dropout)
    if args.glove:
        logging.info('Loading GloVe pretrained vectors...')
        model.word_embedding.weight.data.zero_()
        glove_weight = load_glove(
            path=args.glove,
            vocab=word_vocab,
            init_weight=model.word_embedding.weight.data.numpy())
        glove_weight[word_vocab.pad_id] = 0
        model.word_embedding.weight.data.set_(torch.FloatTensor(glove_weight))
    if args.fix_word_embedding:
        logging.info('Will not update word embeddings')
        model.word_embedding.weight.requires_grad = False
    if args.gpu > -1:
        logging.info(f'Using GPU {args.gpu}')
        model.cuda(args.gpu)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adam(params=params)
    criterion = nn.CrossEntropyLoss()

    train_summary_writer = tensorboard.FileWriter(logdir=os.path.join(
        args.save_dir, 'log', 'train'),
                                                  flush_secs=10)
    valid_summary_writer = tensorboard.FileWriter(logdir=os.path.join(
        args.save_dir, 'log', 'valid'),
                                                  flush_secs=10)

    def run_iter(batch, is_training):
        model.train(is_training)
        pre = wrap_with_variable(batch['pre'],
                                 volatile=not is_training,
                                 gpu=args.gpu)
        hyp = wrap_with_variable(batch['hyp'],
                                 volatile=not is_training,
                                 gpu=args.gpu)
        pre_length = wrap_with_variable(batch['pre_length'],
                                        volatile=not is_training,
                                        gpu=args.gpu)
        hyp_length = wrap_with_variable(batch['hyp_length'],
                                        volatile=not is_training,
                                        gpu=args.gpu)
        label = wrap_with_variable(batch['label'],
                                   volatile=not is_training,
                                   gpu=args.gpu)
        logits = model(pre=pre,
                       pre_length=pre_length,
                       hyp=hyp,
                       hyp_length=hyp_length)
        label_pred = logits.max(1)[1]
        accuracy = torch.eq(label, label_pred).float().mean()
        loss = criterion(input=logits, target=label)
        if is_training:
            optimizer.zero_grad()
            loss.backward()
            clip_grad_norm(parameters=params, max_norm=5)
            optimizer.step()
        return loss, accuracy

    def add_scalar_summary(summary_writer, name, value, step):
        value = unwrap_scalar_variable(value)
        summ = summary.scalar(name=name, scalar=value)
        summary_writer.add_summary(summary=summ, global_step=step)

    num_train_batches = len(train_loader)
    validate_every = num_train_batches // 10
    best_vaild_accuacy = 0
    iter_count = 0
    for epoch_num in range(1, args.max_epoch + 1):
        logging.info(f'Epoch {epoch_num}: start')
        for batch_iter, train_batch in enumerate(train_loader):
            if args.anneal_temperature and iter_count % 500 == 0:
                gamma = 0.00001
                new_temperature = max([0.5, math.exp(-gamma * iter_count)])
                model.encoder.gumbel_temperature = new_temperature
                logging.info(
                    f'Iter #{iter_count}: '
                    f'Set Gumbel temperature to {new_temperature:.4f}')
            train_loss, train_accuracy = run_iter(batch=train_batch,
                                                  is_training=True)
            iter_count += 1
            add_scalar_summary(summary_writer=train_summary_writer,
                               name='loss',
                               value=train_loss,
                               step=iter_count)
            add_scalar_summary(summary_writer=train_summary_writer,
                               name='accuracy',
                               value=train_accuracy,
                               step=iter_count)

            if (batch_iter + 1) % validate_every == 0:
                valid_loss_sum = valid_accuracy_sum = 0
                num_valid_batches = len(valid_loader)
                for valid_batch in valid_loader:
                    valid_loss, valid_accuracy = run_iter(batch=valid_batch,
                                                          is_training=False)
                    valid_loss_sum += unwrap_scalar_variable(valid_loss)
                    valid_accuracy_sum += unwrap_scalar_variable(
                        valid_accuracy)
                valid_loss = valid_loss_sum / num_valid_batches
                valid_accuracy = valid_accuracy_sum / num_valid_batches
                add_scalar_summary(summary_writer=valid_summary_writer,
                                   name='loss',
                                   value=valid_loss,
                                   step=iter_count)
                add_scalar_summary(summary_writer=valid_summary_writer,
                                   name='accuracy',
                                   value=valid_accuracy,
                                   step=iter_count)
                progress = epoch_num + batch_iter / num_train_batches
                logging.info(f'Epoch {progress:.2f}: '
                             f'valid loss = {valid_loss:.4f}, '
                             f'valid accuracy = {valid_accuracy:.4f}')
                if valid_accuracy > best_vaild_accuacy:
                    best_vaild_accuacy = valid_accuracy
                    model_filename = (f'model-{progress:.2f}'
                                      f'-{valid_loss:.4f}'
                                      f'-{valid_accuracy:.4f}.pkl')
                    model_path = os.path.join(args.save_dir, model_filename)
                    torch.save(model.state_dict(), model_path)
                    print(f'Saved the new best model to {model_path}')
Esempio n. 15
0
def main(argv):
    (opts, args) = parser.parse_args(argv)

    # Load experiment setting
    assert isinstance(opts, object)
    config = NetConfig(opts.config)

    batch_size = config.hyperparameters['batch_size']
    max_iterations = config.hyperparameters['max_iterations']

    train_loader_a = get_data_loader(config.datasets['train_a'], batch_size)
    train_loader_b = get_data_loader(config.datasets['train_b'], batch_size)

    # Parse ROI parameters
    roi = [int(val_str) for val_str in opts.roi.split(',')]
    roi_x = roi[0]
    roi_y = roi[1]
    roi_w = roi[2]
    roi_h = roi[3]

    cmd1 = "trainer=%s(config.hyperparameters)" % config.hyperparameters[
        'trainer']
    cmd2 = "roi_trainer=%s(config.hyperparameters)" % config.hyperparameters[
        'trainer']
    local_dict = locals()
    exec(cmd1, globals(), local_dict)
    trainer = local_dict['trainer']
    exec(cmd2, globals(), local_dict)
    roi_trainer = local_dict['roi_trainer']

    # Check if resume training
    iterations = 0
    if opts.resume == 1:
        iterations = trainer.resume(config.snapshot_prefix)
        roi_trainer.resume(config.snapshot_prefix)
    trainer.cuda(opts.gpu)
    roi_trainer.cuda(opts.gpu)

    ######################################################################################################################
    # Setup logger and repare image outputs
    train_writer = tensorboard.FileWriter(
        "%s/%s" %
        (opts.log, os.path.splitext(os.path.basename(opts.config))[0]))
    image_directory, snapshot_directory = prepare_snapshot_and_image_folder(
        config.snapshot_prefix, iterations, config.image_save_iterations)

    for ep in range(0, MAX_EPOCHS):
        for it, (images_a,
                 images_b) in enumerate(izip(train_loader_a, train_loader_b)):
            if images_a.size(0) != batch_size or images_b.size(
                    0) != batch_size:
                continue

            # Crop images according to ROI
            roi_images_a = images_a[:, :, roi_y:roi_y + roi_h,
                                    roi_x:roi_x + roi_w].clone()
            roi_images_b = images_b[:, :, roi_y:roi_y + roi_h,
                                    roi_x:roi_x + roi_w].clone()

            roi_images_a = Variable(roi_images_a.cuda(opts.gpu))
            roi_images_b = Variable(roi_images_b.cuda(opts.gpu))

            images_a = Variable(images_a.cuda(opts.gpu))
            images_b = Variable(images_b.cuda(opts.gpu))

            # Main training code
            trainer.dis_update(images_a, images_b, config.hyperparameters)
            trainer.gen_update(images_a, images_b, config.hyperparameters)

            # Training code for ROI
            roi_trainer.dis_update(roi_images_a, roi_images_b,
                                   config.hyperparameters)
            roi_image_outputs = roi_trainer.gen_update(roi_images_a,
                                                       roi_images_b,
                                                       config.hyperparameters)
            roi_assembled_images = roi_trainer.assemble_outputs(
                roi_images_a, roi_images_b, roi_image_outputs)

            # Paste ROI to original images to update generator
            x_aa, x_ba, x_ab, x_bb, shared = trainer.gen(images_a, images_b)
            x_ba_paste = x_ba.clone()
            x_ab_paste = x_ab.clone()
            x_ba_paste[:, :, roi_y:roi_y + roi_h,
                       roi_x:roi_x + roi_w] = roi_image_outputs[1].clone()
            x_ab_paste[:, :, roi_y:roi_y + roi_h,
                       roi_x:roi_x + roi_w] = roi_image_outputs[2].clone()
            trainer.gen.zero_grad()
            image_outputs = trainer.gen_update_helper(images_a, images_b, x_aa,
                                                      x_ba_paste, x_ab_paste,
                                                      x_bb, shared,
                                                      config.hyperparameters)
            assembled_images = trainer.assemble_outputs(
                images_a, images_b, image_outputs)

            # Dump training stats in log file
            if (iterations + 1) % config.display == 0:
                write_loss(iterations, max_iterations, trainer, train_writer)

            if (iterations + 1) % config.image_save_iterations == 0:
                img_filename = '%s/gen_%08d.jpg' % (image_directory,
                                                    iterations + 1)
                torchvision.utils.save_image(assembled_images.data / 2 + 0.5,
                                             img_filename,
                                             nrow=1)
                img_filename = '%s/roi_gen_%08d.jpg' % (image_directory,
                                                        iterations + 1)
                torchvision.utils.save_image(roi_assembled_images.data / 2 +
                                             0.5,
                                             img_filename,
                                             nrow=1)
                write_html(snapshot_directory + "/index.html", iterations + 1,
                           config.image_save_iterations, image_directory)
            elif (iterations + 1) % config.image_display_iterations == 0:
                img_filename = '%s/gen.jpg' % (image_directory)
                torchvision.utils.save_image(assembled_images.data / 2 + 0.5,
                                             img_filename,
                                             nrow=1)
                img_filename = '%s/roi_gen.jpg' % (image_directory)
                torchvision.utils.save_image(roi_assembled_images.data / 2 +
                                             0.5,
                                             img_filename,
                                             nrow=1)

            # Save network weights
            if (iterations + 1) % config.snapshot_save_iterations == 0:
                trainer.save(config.snapshot_prefix, iterations)

            iterations += 1
            if iterations >= max_iterations:
                return
Esempio n. 16
0
import pdb
import numpy as np
import mxnet as mx
from poserecog.bucket_io import BucketSentenceIter
from poserecog.get_lstm_sym import get_lstm
from poserecog.config import lstm_config as lcf
from poserecog.train_script import fit as script_fit
import logging
#head = '%(asctime)-15s %(message)s'
#logging.basicConfig(level=logging.DEBUG, format=head)
import time
tm = time.strftime("%m_%d_%H_%M")
logging.basicConfig(filename='log/' + time.strftime("%m_%d_%H_%M") + '.log',\
                    level=logging.DEBUG)
import tensorboard
sw_train = tensorboard.FileWriter('log/%s_train/' % tm)
sw_val = tensorboard.FileWriter('log/%s_val/' % tm)


def Perplexity(label, pred):
    label = label.T.reshape((-1, ))
    loss = 0.
    for i in range(pred.shape[0]):
        loss += -np.log(max(1e-10, pred[i][int(label[i])]))
    return np.exp(loss / label.size)


def monitor_train(param):
    metric = dict(param.eval_metric.get_name_value())
    sw_train.add_summary(tensorboard.summary.scalar('perp',\
                                metric['Perplexity']))
def main(argv):
    (opts, args) = parser.parse_args(argv)

    # Load experiment setting
    assert isinstance(opts, object)
    config = NetConfig(opts.config)

    batch_size = config.hyperparameters['batch_size']
    max_iterations = config.hyperparameters['max_iterations']

    train_loader_a = get_data_loader(config.datasets['train_a'], batch_size)
    train_loader_b = get_data_loader(config.datasets['train_b'], batch_size)
    train_loader_c = get_data_loader(config.datasets['train_c'], batch_size)
    train_loader_d = get_data_loader(config.datasets['train_d'], batch_size)

    trainer = []
    trainer = init_trainer(trainer, config)

    print("============ DISCRIMINATOR ==============")
    print(trainer.dis)
    print("============ GENERATOR ==============")
    print(trainer.gen)

    # Set up for warm start
    if opts.warm_start == 1:
        gen_ab = None
        gen_cd = None
        dis_ab = None
        dis_cd = None
        (gen_ab, gen_cd, dis_ab,
         dis_cd) = init_warm_start_models(gen_ab, gen_cd, dis_ab, dis_cd,
                                          config)
    # If not warm starting check if resume training
    iterations = 0
    if opts.resume == 1 and opts.warm_start == 0:
        iterations = trainer.resume(config.snapshot_prefix)
    trainer.cuda(opts.gpu)

    # Warm start
    if opts.warm_start == 1:
        print("============ GENERATOR AB ==============")
        print(gen_ab)
        print("============ GENERATOR CD ==============")
        print(gen_cd)
        print("============ DISCRIMINATOR AB ==============")
        print(dis_ab)
        print("============ DISCRIMINATOR CD ==============")
        print(dis_cd)
        dirname = os.path.dirname(config.snapshot_prefix)
        model_path = os.path.join(dirname, opts.gen_ab)
        gen_ab.load_state_dict(torch.load(model_path))
        print("Pre trained generator ab loaded from: {}".format(model_path))
        model_path = os.path.join(dirname, opts.gen_cd)
        gen_cd.load_state_dict(torch.load(model_path))
        print("Pre trained generator cd loaded from: {}".format(model_path))
        gen_ab.cuda(opts.gpu)
        gen_cd.cuda(opts.gpu)
        model_path = os.path.join(dirname, opts.dis_ab)
        dis_ab.load_state_dict(torch.load(model_path))
        print("Pre trained discriminaor ab loaded from: {}".format(model_path))
        model_path = os.path.join(dirname, opts.dis_cd)
        dis_cd.load_state_dict(torch.load(model_path))
        print("Pre trained generator cd loaded from: {}".format(model_path))
        dis_ab.cuda(opts.gpu)
        dis_cd.cuda(opts.gpu)

        # Warm start init
        trainer.dis.model_A = dis_ab.model_A
        trainer.dis.model_B = dis_ab.model_B
        trainer.dis.model_C = dis_cd.model_A
        trainer.dis.model_D = dis_cd.model_B

        trainer.gen.encode_A = gen_ab.encode_A
        trainer.gen.encode_B = gen_ab.encode_B
        trainer.gen.encode_C = gen_cd.encode_A
        trainer.gen.encode_D = gen_cd.encode_B
        trainer.gen.decode_A = gen_ab.decode_A
        trainer.gen.decode_B = gen_ab.decode_B
        trainer.gen.decode_C = gen_cd.decode_A
        trainer.gen.decode_D = gen_cd.decode_B

        # Shared blocks - take mean of two original models
        # Functions inspired from this thread
        # https://discuss.pytorch.org/t/running-average-of-parameters/902/2
        def flatten_params(model1, model2):
            p1 = torch.cat(
                [param.data.view(-1) for param in model1.parameters()], 0)
            p2 = torch.cat(
                [param.data.view(-1) for param in model2.parameters()], 0)
            return (p1, p2)

        def load_params(flattened_params, model):
            offset = 0
            for param in model.parameters():
                fp1 = flattened_params[0][offset:offset + param.nelement()]
                fp2 = flattened_params[1][offset:offset + param.nelement()]
                fpjoint = fp1 + fp2
                fpjoint = torch.div(fpjoint, 2.0)
                param.data.copy_(fpjoint).view(param.size())
                offset += param.nelement()

        model_S_new = flatten_params(dis_ab.model_S, dis_cd.model_S)
        load_params(model_S_new, trainer.dis.model_S)
        gen_enc_new = flatten_params(gen_ab.enc_shared, gen_cd.enc_shared)
        load_params(gen_enc_new, trainer.gen.enc_shared)
        gen_dec_new = flatten_params(gen_ab.dec_shared, gen_cd.dec_shared)
        load_params(gen_dec_new, trainer.gen.dec_shared)
        print("Initialized model with params from separately trained models")

    # print("============ DISCRIMINATOR ==============")
    # print(trainer.dis)
    # print("============ GENERATOR ==============")
    # print(trainer.gen)

    ######################################################################################################################
    # Setup logger and repare image outputs
    train_writer = tensorboard.FileWriter(
        "%s/%s" %
        (opts.log, os.path.splitext(os.path.basename(opts.config))[0]))
    image_directory, snapshot_directory = prepare_snapshot_and_image_folder(
        config.snapshot_prefix, iterations, config.image_save_iterations)

    for ep in range(0, MAX_EPOCHS):
        for it, (images_a, images_b, images_c, images_d) in enumerate(
                itertools.izip(train_loader_a, train_loader_b, train_loader_c,
                               train_loader_d)):
            if images_a.size(0) != batch_size or images_b.size(
                    0) != batch_size or images_c.size(
                        0) != batch_size or images_d.size(0) != batch_size:
                continue
            images_a = Variable(images_a.cuda(opts.gpu))
            images_b = Variable(images_b.cuda(opts.gpu))
            images_c = Variable(images_c.cuda(opts.gpu))
            images_d = Variable(images_d.cuda(opts.gpu))

            # Main training code
            trainer.dis_update(images_a, images_b, images_c, images_d,
                               config.hyperparameters)
            image_outputs = trainer.gen_update(images_a, images_b, images_c,
                                               images_d,
                                               config.hyperparameters)
            assembled_images = trainer.assemble_outputs(
                images_a, images_b, images_c, images_d, image_outputs)
            assembled_dbl_loop_images = trainer.assemble_double_loop_outputs(
                images_a, images_b, images_c, images_d, image_outputs)
            # print(assembled_images.data.shape)
            # print(assembled_dbl_loop_images.data.shape)

            # Dump training stats in log file
            if (iterations + 1) % config.display == 0:
                write_loss(iterations, max_iterations, trainer, train_writer)

            if (iterations + 1) % config.image_save_iterations == 0:
                img_filename = '%s/gen_%08d.jpg' % (image_directory,
                                                    iterations + 1)
                torchvision.utils.save_image(assembled_images.data / 2 + 0.5,
                                             img_filename,
                                             nrow=2)
                dbl_img_filename = '%s/gen_dbl_%08d.jpg' % (image_directory,
                                                            iterations + 1)
                torchvision.utils.save_image(
                    assembled_dbl_loop_images.data / 2 + 0.5,
                    dbl_img_filename,
                    nrow=2)
                write_html(snapshot_directory + "/index.html", iterations + 1,
                           config.image_save_iterations, image_directory)
            elif (iterations + 1) % config.image_display_iterations == 0:
                img_filename = '%s/gen.jpg' % (image_directory)
                torchvision.utils.save_image(assembled_images.data / 2 + 0.5,
                                             img_filename,
                                             nrow=2)
                dbl_img_filename = '%s/gen_dbl.jpg' % (image_directory)
                torchvision.utils.save_image(
                    assembled_dbl_loop_images.data / 2 + 0.5,
                    dbl_img_filename,
                    nrow=2)

            # Save network weights
            if (iterations + 1) % config.snapshot_save_iterations == 0:
                trainer.save(config.snapshot_prefix, iterations)

            iterations += 1
            if iterations >= max_iterations:
                return