Beispiel #1
0
def main(args):
    """Main train and evaluation function.

    Parameters
    ----------
    args: argparse.Namespace
        Arguments
    """
    formatter = logging.Formatter(
        '%(asctime)s %(levelname)s - %(funcName)s: %(message)s', "%H:%M:%S")
    logger = logging.getLogger(__name__)
    logger.setLevel(args.log_level.upper())
    stream = logging.StreamHandler()
    stream.setLevel(args.log_level.upper())
    stream.setFormatter(formatter)
    logger.addHandler(stream)

    set_seed(args.seed)
    device = get_device(is_gpu=not args.no_cuda)
    exp_dir = os.path.join(RES_DIR, args.name)
    logger.info("Root directory for saving and loading experiments: {}".format(
        exp_dir))

    if not args.is_eval_only:

        create_safe_directory(exp_dir, logger=logger)

        if args.loss == "factor":
            logger.info(
                "FactorVae needs 2 batches per iteration. To replicate this behavior while being consistent, we double the batch size and the the number of epochs."
            )
            args.batch_size *= 2
            args.epochs *= 2

        # PREPARES DATA
        train_loader = get_dataloaders(args.dataset,
                                       batch_size=args.batch_size,
                                       logger=logger)
        logger.info("Train {} with {} samples".format(
            args.dataset, len(train_loader.dataset)))

        # PREPARES MODEL
        args.img_size = get_img_size(args.dataset)  # stores for metadata
        model = init_specific_model(args.model_type, args.img_size,
                                    args.latent_dim)
        logger.info('Num parameters in model: {}'.format(get_n_param(model)))

        # TRAINS
        optimizer = optim.Adam(model.parameters(), lr=args.lr)

        model = model.to(device)  # make sure trainer and viz on same device
        gif_visualizer = GifTraversalsTraining(model, args.dataset, exp_dir)
        loss_f = get_loss_f(args.loss,
                            n_data=len(train_loader.dataset),
                            device=device,
                            **vars(args))
        trainer = Trainer(model,
                          optimizer,
                          loss_f,
                          device=device,
                          logger=logger,
                          save_dir=exp_dir,
                          is_progress_bar=not args.no_progress_bar,
                          gif_visualizer=gif_visualizer)
        trainer(
            train_loader,
            epochs=args.epochs,
            checkpoint_every=args.checkpoint_every,
        )

        # SAVE MODEL AND EXPERIMENT INFORMATION
        save_model(trainer.model, exp_dir, metadata=vars(args))

    if args.is_metrics or not args.no_test:
        model = load_model(exp_dir, is_gpu=not args.no_cuda)
        metadata = load_metadata(exp_dir)
        # TO-DO: currently uses train datatset

        test_loader = get_dataloaders(metadata["dataset"],
                                      batch_size=args.eval_batchsize,
                                      shuffle=False,
                                      logger=logger)
        loss_f = get_loss_f(args.loss,
                            n_data=len(test_loader.dataset),
                            device=device,
                            **vars(args))

        use_wandb = False
        if use_wandb:
            loss = args.loss
            wandb.init(project="atmlbetavae", config={"VAE_loss": args.loss})
            if loss == "betaH":
                beta = loss_f.beta
                wandb.config["Beta"] = beta
        evaluator = Evaluator(model,
                              loss_f,
                              device=device,
                              logger=logger,
                              save_dir=exp_dir,
                              is_progress_bar=not args.no_progress_bar,
                              use_wandb=use_wandb)

        evaluator(test_loader,
                  is_metrics=args.is_metrics,
                  is_losses=not args.no_test)
                            loss_name=args.loss,
                            device=device,
                            logger=logger,
                            save_dir=exp_dir,
                            is_progress_bar=not args.no_progress_bar,
                            gif_visualizer=gif_visualizer)
            trainer(train_loader,
                    epochs=args.epochs,
                    checkpoint_every=args.checkpoint_every)

            # SAVE MODEL AND EXPERIMENT INFORMATION
            save_model(trainer.model, exp_dir, metadata=vars(args))

        if args.is_metrics or not args.no_test:
            model = load_model(exp_dir, is_gpu=not args.no_cuda)
            metadata = load_metadata(exp_dir)
            # TO-DO: currently uses train datatset
            test_loader = get_dataloaders(metadata["dataset"],
                                        batch_size=args.eval_batchsize,
                                        shuffle=False,
                                        logger=logger)
            loss_f = get_loss_f(args.loss,
                                n_data=len(test_loader.dataset),
                                device=device,
                                **vars(args))
            evaluator = Evaluator(model, loss_f,
                                device=device,
                                logger=logger,
                                save_dir=exp_dir,
                                is_progress_bar=not args.no_progress_bar)