Ejemplo n.º 1
0
    def test_create_restore_delete(self):
        # Create the hyperparameters and objects to save.
        hp = models.registry.get_default_hparams('cifar_resnet_20')
        model = models.registry.get(hp.model_hparams)
        optimizer = optimizers.get_optimizer(hp.training_hparams, model)
        dataloader = datasets.registry.get(hp.dataset_hparams)
        step = Step.from_epoch(13, 27, 400)

        # Run one step of SGD.
        examples, labels = next(iter(dataloader))
        optimizer.zero_grad()
        model.train()
        model.loss_criterion(model(examples), labels).backward()
        optimizer.step()

        # Create a fake logger.
        logger = MetricLogger()
        logger.add('test_accuracy', Step.from_epoch(0, 0, 400), 0.1)
        logger.add('test_accuracy', Step.from_epoch(10, 0, 400), 0.5)
        logger.add('test_accuracy', Step.from_epoch(100, 0, 400), 0.8)

        # Save a checkpoint.
        checkpointing.save_checkpoint_callback(self.root, step, model, optimizer, logger)
        self.assertTrue(os.path.exists(paths.checkpoint(self.root)))

        # Create new models.
        model2 = models.registry.get(hp.model_hparams)
        optimizer2 = optimizers.get_optimizer(hp.training_hparams, model)

        # Ensure the new model has different weights.
        sd1, sd2 = model.state_dict(), model2.state_dict()
        for k in model.prunable_layer_names:
            self.assertFalse(np.array_equal(sd1[k].numpy(), sd2[k].numpy()))

        self.assertIn('momentum_buffer', optimizer.state[optimizer.param_groups[0]['params'][0]])
        self.assertNotIn('momentum_buffer', optimizer2.state[optimizer.param_groups[0]['params'][0]])

        # Restore the checkpointt.
        step2, logger2 = checkpointing.restore_checkpoint(self.root, model2, optimizer2, 400)

        self.assertTrue(os.path.exists(paths.checkpoint(self.root)))
        self.assertEqual(step, step2)
        self.assertEqual(str(logger), str(logger2))

        # Ensure the new model is now the same.
        sd1, sd2 = model.state_dict(), model2.state_dict()
        self.assertEqual(set(sd1.keys()), set(sd2.keys()))
        for k in sd1:
            self.assertTrue(np.array_equal(sd1[k].numpy(), sd2[k].numpy()))

        # Ensure the new optimizer is now the same.
        mom1 = optimizer.state[optimizer.param_groups[0]['params'][0]]['momentum_buffer']
        mom2 = optimizer2.state[optimizer.param_groups[0]['params'][0]]['momentum_buffer']
        self.assertTrue(np.array_equal(mom1.numpy(), mom2.numpy()))
Ejemplo n.º 2
0
 def test_adam_weight_decay(self):
     self.hp.optimizer_name = 'adam'
     self.hp.weight_decay = 1e-4
     optimizer = optimizers.get_optimizer(self.hp, self.model)
     self.assertIsInstance(optimizer, torch.optim.Adam)
     self.assertEqual(optimizer.param_groups[0]['lr'], 0.1)
     self.assertEqual(optimizer.param_groups[0]['weight_decay'], 1e-4)
Ejemplo n.º 3
0
 def test_sgd_momentum(self):
     self.hp.momentum = 0.9
     optimizer = optimizers.get_optimizer(self.hp, self.model)
     self.assertIsInstance(optimizer, torch.optim.SGD)
     self.assertEqual(optimizer.param_groups[0]['momentum'], 0.9)
     self.assertEqual(optimizer.param_groups[0]['lr'], 0.1)
     self.assertEqual(optimizer.param_groups[0]['weight_decay'], 0.0)
Ejemplo n.º 4
0
    def setUp(self):
        super(TestLrScheduler, self).setUp()
        self.hp = hparams.TrainingHparams(optimizer_name='sgd',
                                          lr=0.1,
                                          training_steps='160ep')

        h = models.registry.get_default_hparams('cifar_resnet_20')
        self.model = models.registry.get(h.model_hparams)
        self.optimizer = optimizers.get_optimizer(self.hp, self.model)
Ejemplo n.º 5
0
def train(training_hparams: hparams.TrainingHparams,
          model: Model,
          train_loader: DataLoader,
          output_location: str,
          callbacks: typing.List[typing.Callable] = [],
          start_step: Step = None,
          end_step: Step = None):
    """The main training loop for this framework.

    Args:
      * training_hparams: The training hyperparameters whose schema is specified in hparams.py.
      * model: The model to train. Must be a models.base.Model
      * train_loader: The training data. Must be a datasets.base.DataLoader
      * output_location: The string path where all outputs should be stored.
      * callbacks: A list of functions that are called before each training step and once more
        after the last training step. Each function takes five arguments: the current step,
        the output location, the model, the optimizer, and the logger.
        Callbacks are used for running the test set, saving the logger, saving the state of the
        model, etc. The provide hooks into the training loop for customization so that the
        training loop itself can remain simple.
      * start_step: The step at which the training data and learning rate schedule should begin.
        Defaults to step 0.
      * end_step: The step at which training should cease. Otherwise, training will go for the
        full `training_hparams.training_steps` steps.
    """

    # Create the output location if it doesn't already exist.
    if not get_platform().exists(output_location) and get_platform(
    ).is_primary_process:
        get_platform().makedirs(output_location)

    # Get the optimizer and learning rate schedule.
    model.to(get_platform().torch_device)
    optimizer = optimizers.get_optimizer(training_hparams, model)
    step_optimizer = optimizer
    lr_schedule = optimizers.get_lr_schedule(training_hparams, optimizer,
                                             train_loader.iterations_per_epoch)

    # Adapt for FP16.
    if training_hparams.apex_fp16:
        if NO_APEX:
            raise ImportError('Must install nvidia apex to use this model.')
        model, step_optimizer = apex.amp.initialize(model,
                                                    optimizer,
                                                    loss_scale='dynamic',
                                                    verbosity=0)

    # Handle parallelism if applicable.
    if get_platform().is_distributed:
        model = DistributedDataParallel(model,
                                        device_ids=[get_platform().rank])
    elif get_platform().is_parallel:
        model = DataParallel(model)

    # Get the random seed for the data order.
    data_order_seed = training_hparams.data_order_seed

    # Restore the model from a saved checkpoint if the checkpoint exists.
    cp_step, cp_logger = restore_checkpoint(output_location, model, optimizer,
                                            train_loader.iterations_per_epoch)
    start_step = cp_step or start_step or Step.zero(
        train_loader.iterations_per_epoch)
    logger = cp_logger or MetricLogger()
    with warnings.catch_warnings():  # Filter unnecessary warning.
        warnings.filterwarnings("ignore", category=UserWarning)
        for _ in range(start_step.iteration):
            lr_schedule.step()

    # Determine when to end training.
    end_step = end_step or Step.from_str(training_hparams.training_steps,
                                         train_loader.iterations_per_epoch)
    if end_step <= start_step: return

    # The training loop.
    for ep in range(start_step.ep, end_step.ep + 1):

        # Ensure the data order is different for each epoch.
        train_loader.shuffle(None if data_order_seed is None else (
            data_order_seed + ep))

        for it, (examples, labels) in enumerate(train_loader):

            # Advance the data loader until the start epoch and iteration.
            if ep == start_step.ep and it < start_step.it: continue

            # Run the callbacks.
            step = Step.from_epoch(ep, it, train_loader.iterations_per_epoch)
            for callback in callbacks:
                callback(output_location, step, model, optimizer, logger)

            # Exit at the end step.
            if ep == end_step.ep and it == end_step.it: return

            # Otherwise, train.
            examples = examples.to(device=get_platform().torch_device)
            labels = labels.to(device=get_platform().torch_device)

            step_optimizer.zero_grad()
            model.train()
            loss = model.loss_criterion(model(examples), labels)
            if training_hparams.apex_fp16:
                with apex.amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Step forward. Ignore extraneous warnings that the lr_schedule generates.
            step_optimizer.step()
            with warnings.catch_warnings():  # Filter unnecessary warning.
                warnings.filterwarnings("ignore", category=UserWarning)
                lr_schedule.step()

    get_platform().barrier()
Ejemplo n.º 6
0
def distill(
    training_hparams: hparams.TrainingHparams,
    distill_hparams: hparams.DistillHparams,
    student: Model,
    teacher: Model,
    train_loader: DataLoader,
    output_location: str,
    callbacks: typing.List[typing.Callable] = [],
    start_step: Step = None,
    end_step: Step = None
):

    """The main training loop for this framework.

    Args:
      * training_hparams: The training hyperparameters whose schema is specified in hparams.py.
      * distll_hparams: The knowledge distillation hyperparameters whose schema is specified in hparams.py.
      * student: The student model to train. Must be a models.base.Model
      * teacher: The teacher model to distill the knowledge. Must be a models.base.Model
      * train_loader: The training data. Must be a datasets.base.DataLoader
      * output_location: The string path where all outputs should be stored.
      * callbacks: A list of functions that are called before each training step and once more
        after the last training step. Each function takes five arguments: the current step,
        the output location, the model, the optimizer, and the logger.
        Callbacks are used for running the test set, saving the logger, saving the state of the
        model, etc. The provide hooks into the training loop for customization so that the
        training loop itself can remain simple.
      * start_step: The step at which the training data and learning rate schedule should begin.
        Defaults to step 0.
      * end_step: The step at which training should cease. Otherwise, training will go for the
        full `training_hparams.training_steps` steps.
    """

    import torch
    import torch.nn as nn
    import torch.nn.functional as F

    # Create the output location if it doesn't already exist.
    if not get_platform().exists(output_location) and get_platform().is_primary_process:
        get_platform().makedirs(output_location)

    # Get the optimizer and learning rate schedule.
    student.to(get_platform().torch_device)
    teacher.to(get_platform().torch_device)
    optimizer = optimizers.get_optimizer(training_hparams, student)
    step_optimizer = optimizer
    lr_schedule = optimizers.get_lr_schedule(training_hparams, optimizer, train_loader.iterations_per_epoch)

    ce_loss_fct = nn.KLDivLoss(reduction="batchmean")
    if distill_hparams.alpha_mse > 0.0:
        mse_loss_fct = nn.MSELoss(reduction='sum')
    if distill_hparams.alpha_cos > 0.0:
        cos_loss_fct = nn.CosineEmbeddingLoss(reduction='mean')

    # Adapt for FP16.
    if training_hparams.apex_fp16:
        if NO_APEX: raise ImportError('Must install nvidia apex to use this model.')
        (student, teacher), step_optimizer = apex.amp.initialize(
            [student, teacher], optimizer, loss_scale='dynamic', verbosity=0
        )

    # Handle parallelism if applicable.
    if get_platform().is_distributed:
        student = DistributedDataParallel(student, device_ids=[get_platform().rank])
        teacher = DistributedDataParallel(teacher, device_ids=[get_platform().rank])
    elif get_platform().is_parallel:
        student = DataParallel(student)
        teacher = DataParallel(teacher)

    # Get the random seed for the data order.
    data_order_seed = training_hparams.data_order_seed

    # Restore the model from a saved checkpoint if the checkpoint exists.
    cp_step, cp_logger = restore_checkpoint(output_location, student, optimizer, train_loader.iterations_per_epoch)
    start_step = cp_step or start_step or Step.zero(train_loader.iterations_per_epoch)
    logger = cp_logger or MetricLogger()
    with warnings.catch_warnings():  # Filter unnecessary warning.
        warnings.filterwarnings("ignore", category=UserWarning)
        for _ in range(start_step.iteration): lr_schedule.step()

    # Determine when to end training.
    end_step = end_step or Step.from_str(training_hparams.training_steps, train_loader.iterations_per_epoch)
    if end_step <= start_step: return

    # The training loop.
    for ep in range(start_step.ep, end_step.ep + 1):

        # Ensure the data order is different for each epoch.
        train_loader.shuffle(None if data_order_seed is None else (data_order_seed + ep))

        for it, (examples, labels) in enumerate(train_loader):

            # Advance the data loader until the start epoch and iteration.
            if ep == start_step.ep and it < start_step.it: continue

            # Run the callbacks.
            step = Step.from_epoch(ep, it, train_loader.iterations_per_epoch)
            for callback in callbacks: callback(output_location, step, student, optimizer, logger)

            # Exit at the end step.
            if ep == end_step.ep and it == end_step.it: return

            # Otherwise, train.
            examples = examples.to(device=get_platform().torch_device)
            labels = labels.to(device=get_platform().torch_device)

            loss = 0.0
            step_optimizer.zero_grad()
            student.train()
            teacher.eval()

            student_outputs = student(examples)
            with torch.no_grad():
                teacher_outputs = teacher(examples)

            s_logits = student_outputs
            t_logits = teacher_outputs

            # KL Divergence loss for the knowledge distillation
            loss_ce = ce_loss_fct(
                F.log_softmax(s_logits / distill_hparams.temperature, dim=-1),
                F.softmax(t_logits / distill_hparams.temperature, dim=-1),
            ) * distill_hparams.temperature**2
            loss += distill_hparams.alpha_ce * loss_ce

            if distill_hparams.alpha_cls > 0.0:
                loss_cls = student.loss_criterion(student_outputs, labels)
                loss += distill_hparams.alpha_cls * loss_cls

            if distill_hparams.alpha_mse > 0.0:
                loss_mse = mse_loss_fct(s_logits, t_logits) / s_logits.size(0)
                loss += distill_hparams.alpha_mse * loss_mse

            if training_hparams.apex_fp16:
                with apex.amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Step forward. Ignore extraneous warnings that the lr_schedule generates.
            step_optimizer.step()
            with warnings.catch_warnings():  # Filter unnecessary warning.
                warnings.filterwarnings("ignore", category=UserWarning)
                lr_schedule.step()

    get_platform().barrier()
Ejemplo n.º 7
0
def build_runner(conf, cuda, mode, resume=False):
  gen_model_conf = Configuration.from_dict(conf.generator_model)
  gen_model = construct_model(gen_model_conf, gen_model_conf.name)

  val_metric_transform = get_output_transform(conf, conf.application, 'test')
  val_metric_fns = {name: get_metric_fn(name)
                    for name in conf.get_attr('validation_metrics',
                                              default=[])}
  output_transform = get_output_transform(conf, conf.application, 'output')

  if mode == 'train':
    disc_model_conf = Configuration.from_dict(conf.discriminator_model)
    disc_model = construct_model(disc_model_conf, disc_model_conf.name)

    gen_adv_criteria = {loss_name: get_criterion(conf, loss_name, cuda, 'gen')
                        for loss_name in conf.generator_adversarial_losses}
    gen_criteria = {loss_name: get_criterion(conf, loss_name, cuda)
                    for loss_name in conf.generator_losses}
    disc_adv_criteria = {loss_name: get_criterion(conf, loss_name, cuda,
                                                  'disc')
                         for loss_name in conf.discriminator_losses}

    if cuda != '':
      utils.cudaify([gen_model, disc_model] +
                    list(gen_adv_criteria.values()) +
                    list(gen_criteria.values()) +
                    list(disc_adv_criteria.values()))

    # Important: construct optimizers after moving model to GPU!
    gen_opt_conf = Configuration.from_dict(conf.generator_optimizer)
    gen_optimizer = get_optimizer(gen_opt_conf, gen_opt_conf.name,
                                  gen_model.parameters())
    gen_lr_scheduler = None
    if gen_opt_conf.has_attr('lr_scheduler'):
      gen_lr_scheduler = get_lr_scheduler(gen_opt_conf,
                                          gen_opt_conf.lr_scheduler,
                                          gen_optimizer)

    disc_opt_conf = Configuration.from_dict(conf.discriminator_optimizer)
    disc_optimizer = get_optimizer(disc_opt_conf, disc_opt_conf.name,
                                   disc_model.parameters())
    disc_lr_scheduler = None
    if disc_opt_conf.has_attr('lr_scheduler'):
      disc_lr_scheduler = get_lr_scheduler(disc_opt_conf,
                                           disc_opt_conf.lr_scheduler,
                                           disc_optimizer)

    train_disc_metrics = conf.get_attr('train_discriminator_metrics',
                                       default=[])
    train_disc_metric_fns = {name: get_metric_fn(name)
                             for name in train_disc_metrics}

    train_gen_metric_transform = get_output_transform(conf, conf.application,
                                                      'train')
    train_gen_metrics = conf.get_attr('train_generator_metrics', default=[])
    train_gen_metric_fns = {name: get_metric_fn(name)
                            for name in train_gen_metrics}

    input_method = disc_model_conf.get_attr('input_method',
                                            default=DEFAULT_INPUT_METHOD)

    runner = AdversarialRunner(gen_model, disc_model,
                               gen_optimizer, disc_optimizer,
                               gen_lr_scheduler, disc_lr_scheduler,
                               gen_adv_criteria, gen_criteria,
                               disc_adv_criteria,
                               conf.get_attr('generator_loss_weights', {}),
                               conf.get_attr('discriminator_loss_weights', {}),
                               cuda,
                               train_gen_metric_fns,
                               train_gen_metric_transform,
                               train_disc_metric_fns,
                               val_metric_fns,
                               val_metric_transform,
                               output_transform,
                               input_method)
    if gen_model_conf.has_attr('pretrained_weights') and not resume:
      runner.initialize_pretrained_model(gen_model_conf, runner.gen,
                                         cuda, conf.file)

    if disc_model_conf.has_attr('pretrained_weights') and not resume:
      runner.initialize_pretrained_model(disc_model_conf, runner.disc,
                                         cuda, conf.file)
  else:
    if cuda != '':
      utils.cudaify(gen_model)
    runner = AdversarialRunner(gen_model,
                               cuda=cuda,
                               val_metric_fns=val_metric_fns,
                               val_metric_transform=val_metric_transform,
                               output_transform=output_transform)

  return runner
Ejemplo n.º 8
0
def build_runner(conf, cuda, mode='train', resume=False):
    model_conf = Configuration.from_dict(conf.model)

    model = construct_model(model_conf, model_conf.name)

    val_metric_transform = get_output_transform(conf, conf.application, 'test')
    val_metric_fns = {
        name: get_metric_fn(name)
        for name in conf.get_attr('validation_metrics', default=[])
    }
    output_transform = get_output_transform(conf, conf.application, 'output')

    if mode == 'train':
        criteria = {}
        if conf.has_attr('loss_name'):
            criteria[conf.loss_name] = get_criterion(conf, conf.loss_name,
                                                     cuda)
        else:
            for loss_name in conf.losses:
                criteria[loss_name] = get_criterion(conf, loss_name, cuda)

        assert len(
            criteria) > 0, 'Need at least one loss to optimize something!'

        if cuda != '':
            utils.cudaify([model] + list(criteria.values()))

        # Important: construct optimizer after moving model to GPU!
        opt_conf = Configuration.from_dict(conf.optimizer)
        optimizer = get_optimizer(opt_conf, opt_conf.name, model.parameters())

        lr_scheduler = None
        if opt_conf.has_attr('lr_scheduler'):
            lr_scheduler = get_lr_scheduler(opt_conf, opt_conf.lr_scheduler,
                                            optimizer)

        train_metric_transform = get_output_transform(conf, conf.application,
                                                      'train')
        train_metric_fns = {
            name: get_metric_fn(name)
            for name in conf.get_attr('train_metrics', default=[])
        }

        runner = Runner(model, criteria, conf.get_attr('loss_weights', {}),
                        optimizer, lr_scheduler, cuda, train_metric_fns,
                        train_metric_transform, val_metric_fns,
                        val_metric_transform, output_transform)

        if model_conf.has_attr('pretrained_weights') and not resume:
            runner.initialize_pretrained_model(model_conf, runner.model, cuda,
                                               conf.file)
    else:
        if cuda != '':
            utils.cudaify(model)
        runner = Runner(model,
                        cuda=cuda,
                        val_metric_fns=val_metric_fns,
                        val_metric_transform=val_metric_transform,
                        output_transform=output_transform)

    return runner
Ejemplo n.º 9
0
 def test_nonexistent_optimizer(self):
     self.hp.optimizer_name = 'metagrad'
     with self.assertRaises(ValueError):
         optimizers.get_optimizer(self.hp, self.model)
def build_runner(conf, cuda, mode):
    gen_model_conf = Configuration.from_dict(conf.generator_model, conf)
    gen_model = construct_model(gen_model_conf, gen_model_conf.name, cuda)

    val_metric_fns = {
        name: get_metric_fn(conf, name, cuda, 'test')
        for name in conf.get_attr('validation_metrics', default=[])
    }
    output_transform = get_output_transform(conf, conf.application,
                                            'inference')
    test_input_batch_transform = get_input_batch_transform(
        conf, conf.application, 'test')

    if mode == 'train':
        disc_model_conf = Configuration.from_dict(conf.discriminator_model,
                                                  conf)
        disc_model = construct_model(disc_model_conf, disc_model_conf.name,
                                     cuda)

        gen_adv_criteria = {
            loss_name: get_criterion(conf, loss_name, cuda, loss_type='gen')
            for loss_name in conf.generator_adversarial_losses
        }
        gen_criteria = {
            loss_name: get_criterion(conf, loss_name, cuda)
            for loss_name in conf.generator_losses
        }
        disc_adv_criteria = {
            loss_name: get_criterion(conf, loss_name, cuda, loss_type='disc')
            for loss_name in conf.discriminator_losses
        }

        if cuda != '':
            # Potentially split models over GPUs
            gen_model, disc_model = utils.cudaify([gen_model, disc_model],
                                                  cuda)
            utils.cudaify(
                list(gen_adv_criteria.values()) + list(gen_criteria.values()) +
                list(disc_adv_criteria.values()))

        # Important: construct optimizers after moving model to GPU!
        gen_opt_conf = Configuration.from_dict(conf.generator_optimizer, conf)
        gen_optimizer = get_optimizer(gen_opt_conf, gen_opt_conf.name,
                                      gen_model.parameters())
        gen_lr_scheduler = None
        if gen_opt_conf.has_attr('lr_scheduler'):
            gen_lr_scheduler = get_lr_scheduler(gen_opt_conf,
                                                gen_opt_conf.lr_scheduler,
                                                gen_optimizer)

        disc_opt_conf = Configuration.from_dict(conf.discriminator_optimizer,
                                                conf)
        disc_optimizer = get_optimizer(disc_opt_conf, disc_opt_conf.name,
                                       disc_model.parameters())
        disc_lr_scheduler = None
        if disc_opt_conf.has_attr('lr_scheduler'):
            disc_lr_scheduler = get_lr_scheduler(disc_opt_conf,
                                                 disc_opt_conf.lr_scheduler,
                                                 disc_optimizer)

        train_input_batch_transform = get_input_batch_transform(
            conf, conf.application, 'train')
        train_disc_metrics = conf.get_attr('train_discriminator_metrics',
                                           default=[])
        train_disc_metric_fns = {
            name: get_metric_fn(conf, name, cuda, 'train')
            for name in train_disc_metrics
        }
        val_disc_metric_key = 'validation_discriminator_metrics'
        val_disc_metric_fns = {
            name: get_metric_fn(conf, name, cuda, 'test')
            for name in conf.get_attr(val_disc_metric_key, default=[])
        }

        train_gen_metrics = conf.get_attr('train_generator_metrics',
                                          default=[])
        train_gen_metric_fns = {
            name: get_metric_fn(conf, name, cuda, 'train')
            for name in train_gen_metrics
        }

        disc_input_fn = get_discriminator_input_fn(conf, disc_model_conf)
        val_disc_input_fn = get_discriminator_input_fn(conf,
                                                       disc_model_conf,
                                                       no_pool=True)

        pretr_generator_epochs = conf.get_attr('pretrain_generator_epochs')
        pretr_discriminator_epochs = conf.get_attr(
            'pretrain_discriminator_epochs')

        runner = AdversarialRunner(
            gen_model, disc_model, gen_optimizer, disc_optimizer,
            gen_lr_scheduler, disc_lr_scheduler, gen_adv_criteria,
            gen_criteria, disc_adv_criteria,
            conf.get_attr('generator_loss_weights', {}),
            conf.get_attr('discriminator_loss_weights', {}), cuda,
            train_gen_metric_fns, train_disc_metric_fns, val_metric_fns,
            val_disc_metric_fns, output_transform, train_input_batch_transform,
            test_input_batch_transform,
            gen_opt_conf.get_attr('updates_per_step', 1),
            disc_opt_conf.get_attr('updates_per_step',
                                   1), disc_input_fn, val_disc_input_fn,
            pretr_generator_epochs, pretr_discriminator_epochs)
        if gen_model_conf.has_attr('pretrained_weights'):
            initialize_pretrained_model(gen_model_conf, runner.gen, cuda,
                                        conf.file)

        if disc_model_conf.has_attr('pretrained_weights'):
            initialize_pretrained_model(disc_model_conf, runner.disc, cuda,
                                        conf.file)
    else:
        if cuda != '':
            utils.cudaify(gen_model)
        runner = AdversarialRunner(
            gen_model,
            cuda=cuda,
            val_metric_fns=val_metric_fns,
            output_transform=output_transform,
            test_input_batch_transform=test_input_batch_transform)

    return runner