def run_trials():
    """Run one TPE meta optimisation step and save its results."""
    max_evals = nb_evals = 15

    logger = get_logger(save_path + '/trials.log', name='trials')

    logger.info("Attempt to resume a past training if it exists:")

    try:
        # https://github.com/hyperopt/hyperopt/issues/267
        trials = pickle.load(open(save_path + "/results.pkl", "rb"))
        logger.info("Found saved Trials! Loading...")
        max_evals = len(trials.trials) + nb_evals
        logger.info("Rerunning from {} trials to add another one.".format(
            len(trials.trials)))
    except:
        trials = Trials()
        logger.info("Starting from scratch: new trials.")

    best = fmin(
        train_challenge2020,
        space,
        algo=tpe.suggest,
        trials=trials,
        max_evals=max_evals,
    )

    logger.info("Best: {}".format(best))
    pickle.dump(trials, open(save_path + "/results.pkl", "wb"))
    logger.info("\nOPTIMIZATION STEP COMPLETE.\n")
    logger.info("Trials:")

    for trial in trials:
        logger.info(trial)
def train_challenge2020(hype_space):
    # Paths to save log, checkpoint, tensorboard logs and results
    run_id = datetime.now().strftime(r'%m%d_%H%M%S')
    base_path = save_path + '/' + run_id
    os.makedirs(base_path)
    write_json(hype_space, base_path + '/hype_space.json')

    checkpoint_dir = base_path + '/checkpoints'
    log_dir = base_path + '/log'
    tb_dir = base_path + '/tb_log'
    result_dir = base_path + '/results'

    os.makedirs(result_dir)
    os.makedirs(log_dir)
    os.makedirs(checkpoint_dir)
    os.makedirs(tb_dir)

    # Logger for train
    logger = get_logger(log_dir + '/info.log', name='train' + run_id)
    logger.info(hype_space)

    # Tensorboard
    train_writer = SummaryWriter(tb_dir + '/train')
    val_writer = SummaryWriter(tb_dir + '/valid')

    try:
        # Hyper Parameters
        split_index = "../process/data_split/" + hype_space['data_split']

        # Setup Cuda
        use_cuda = torch.cuda.is_available()
        device = torch.device("cuda" if use_cuda else "cpu")

        # Data_loader
        train_loader = ChallengeDataLoader2(
            label_dir,
            data_dir,
            split_index,
            batch_size=hype_space['trainer']['batch_size'],
            normalization=hype_space['data_normalization'],
            augmentations=hype_space['augmentation']['method'],
            p=hype_space['augmentation']['prob'])
        valid_loader = train_loader.valid_data_loader
        test_loader = train_loader.test_data_loader

        # Build model architecture
        global model
        for file, types in files_models.items():
            for type in types:
                if hype_space["arch"]["type"] == type:
                    model = init_obj(hype_space, 'arch',
                                     eval("module_arch_" + file))

        dummy_input = Variable(torch.rand(16, 12, 3000))
        train_writer.add_graph(model, (dummy_input, ))

        model.to(device)

        # Get function handles of loss and metrics
        criterion = getattr(module_loss, hype_space['loss']['type'])

        # Get function handles of metrics
        challenge_metrics = ChallengeMetric(label_dir)
        metric = challenge_metrics.challenge_metric

        # Get indices of the scored labels
        if hype_space['only_scored']:
            indices = challenge_metrics.indices
        else:
            indices = None

        # Build optimizer, learning rate scheduler
        trainable_params = filter(lambda p: p.requires_grad,
                                  model.parameters())
        optimizer = init_obj(hype_space, 'optimizer', torch.optim,
                             trainable_params)
        if hype_space['lr_scheduler']['type'] == 'GradualWarmupScheduler':
            params = hype_space["lr_scheduler"]["args"]
            scheduler_steplr_args = dict(params["after_scheduler"]["args"])
            scheduler_steplr = getattr(torch.optim.lr_scheduler,
                                       params["after_scheduler"]["type"])(
                                           optimizer, **scheduler_steplr_args)
            lr_scheduler = GradualWarmupScheduler(
                optimizer,
                multiplier=params["multiplier"],
                total_epoch=params["total_epoch"],
                after_scheduler=scheduler_steplr)
        else:
            lr_scheduler = init_obj(hype_space, 'lr_scheduler',
                                    torch.optim.lr_scheduler, optimizer)

        # Begin training process
        trainer = hype_space['trainer']
        epochs = trainer['epochs']

        # Full train and valid logic
        mnt_metric_name, mnt_mode, mnt_best, early_stop = get_mnt_mode(trainer)
        not_improved_count = 0

        for epoch in range(epochs):
            best = False
            train_loss, train_metric = train(model,
                                             optimizer,
                                             train_loader,
                                             criterion,
                                             metric,
                                             indices,
                                             epoch,
                                             device=device)
            val_loss, val_metric = valid(model,
                                         valid_loader,
                                         criterion,
                                         metric,
                                         indices,
                                         device=device)

            if hype_space['lr_scheduler']['type'] == 'ReduceLROnPlateau':
                # if hype_space['lr_scheduler']['args']['mode'] == 'min':
                #     lr_scheduler.step(train_loss)
                # else:
                #     lr_scheduler.step(train_metric)
                lr_scheduler.step(val_loss)
            elif hype_space['lr_scheduler'][
                    'type'] == 'GradualWarmupScheduler':
                lr_scheduler.step(epoch, val_loss)
            else:
                lr_scheduler.step()

            logger.info(
                'Epoch:[{}/{}]\t {:10s}: {:.5f}\t {:10s}: {:.5f}'.format(
                    epoch, epochs, 'loss', train_loss, 'metric', train_metric))
            logger.info(
                '             \t {:10s}: {:.5f}\t {:10s}: {:.5f}'.format(
                    'val_loss', val_loss, 'val_metric', val_metric))
            logger.info('             \t learning_rate: {}'.format(
                optimizer.param_groups[0]['lr']))

            # check whether model performance improved or not, according to specified metric(mnt_metric)
            if mnt_mode != 'off':
                mnt_metric = val_loss if mnt_metric_name == 'val_loss' else val_metric
                improved = (mnt_mode == 'min' and mnt_metric <= mnt_best) or \
                           (mnt_mode == 'max' and mnt_metric >= mnt_best)
                if improved:
                    mnt_best = mnt_metric
                    not_improved_count = 0
                    best = True
                else:
                    not_improved_count += 1

                if not_improved_count > early_stop:
                    logger.info(
                        "Validation performance didn\'t improve for {} epochs. Training stops."
                        .format(early_stop))
                    break

            if best == True:
                save_checkpoint(model,
                                epoch,
                                optimizer,
                                mnt_best,
                                hype_space,
                                checkpoint_dir,
                                save_best=True)
                logger.info("Saving current best: model_best.pth ...")

            # Tensorboard log
            train_writer.add_scalar('loss', train_loss, epoch)
            train_writer.add_scalar('metric', train_metric, epoch)
            train_writer.add_scalar('learning_rate',
                                    optimizer.param_groups[0]['lr'], epoch)

            val_writer.add_scalar('loss', val_loss, epoch)
            val_writer.add_scalar('metric', val_metric, epoch)

        # Logger for test
        logger = get_logger(result_dir + '/info.log', name='test' + run_id)
        logger.propagate = False

        # Load model_best checkpoint
        model = load_checkpoint(model, checkpoint_dir + '/model_best.pth',
                                logger)

        # Testing
        test_loss, test_metric = test(model,
                                      test_loader,
                                      criterion,
                                      metric,
                                      device=device)
        logger.info('    {:10s}: {:.5f}\t {:10s}: {:.5f}'.format(
            'loss', test_loss, 'metric', test_metric))

        challenge_metrics.return_metric_list()
        analyze(model,
                test_loader,
                criterion,
                challenge_metrics,
                logger,
                result_dir,
                device=device)

        write_json(hype_space,
                   '{}/{}_{:.5f}.json'.format(save_path, run_id, test_metric))

    except:
        test_metric = -10

    return -test_metric