Example #1
0
def get_xp(args, optimizer):
    # various useful information to store
    args.command_line = 'python ' + ' '.join(sys.argv)
    args.pid = os.getpid()
    args.cwd = os.getcwd()
    args.hostname = socket.gethostname()

    xp = logger.Experiment(args.xp_name,
                           use_visdom=args.visdom,
                           visdom_opts={'server': 'http://localhost',
                                        'port': args.port},
                           time_indexing=False, xlabel='Epoch')

    xp.SumMetric(name='epoch', to_plot=False)

    xp.AvgMetric(name='error', tag='train')
    xp.AvgMetric(name='error', tag='val')
    xp.AvgMetric(name='error', tag='test')

    xp.TimeMetric(name='timer', tag='train')
    xp.TimeMetric(name='timer', tag='val')
    xp.TimeMetric(name='timer', tag='test')

    xp.AvgMetric(name='obj', tag='train')

    xp.log_config(vars(args))

    return xp
Example #2
0
def get_xp(args, model, optimizer):

    # various useful information to store
    args.command_line = 'python ' + ' '.join(sys.argv)
    args.pid = os.getpid()
    args.cwd = os.getcwd()
    args.hostname = socket.gethostname()

    xp = logger.Experiment(args.xp_name,
                           use_visdom=args.visdom,
                           visdom_opts={
                               'server': args.server,
                               'port': args.port
                           },
                           time_indexing=False,
                           xlabel='Epoch')

    xp.SumMetric(name='epoch', to_plot=False)

    xp.AvgMetric(name='acc', tag='train')
    xp.AvgMetric(name='acc', tag='val')
    xp.AvgMetric(name='acc', tag='test')
    xp.BestMetric(name='acc', tag='valbest')

    xp.TimeMetric(name='timer', tag='train')
    xp.TimeMetric(name='timer', tag='val')
    xp.TimeMetric(name='timer', tag='test')

    xp.AvgMetric(name='loss', tag='train')
    xp.AvgMetric(name='obj', tag='train')
    xp.AvgMetric(name='reg')

    xp.log_config(vars(args))

    xp.AvgMetric(name="gamma")
    xp.SimpleMetric(name='eta')

    if args.log:
        # log at each epoch
        xp.Epoch.add_hook(
            lambda: xp.to_json('{}/results.json'.format(xp.name_and_dir)))
        # log after final evaluation on test set
        xp.Acc_Test.add_hook(
            lambda: xp.to_json('{}/results.json'.format(xp.name_and_dir)))
        # save with different names at each epoch if needed
        if args.dump_epoch:
            filename = lambda: '{}-{}/model.pkl'.format(
                xp.name_and_dir, int(xp.Epoch.value))
        else:
            filename = lambda: '{}/model.pkl'.format(xp.name_and_dir)
        xp.Epoch.add_hook(lambda: save_state(model, optimizer, filename()))

        # save results and model for best validation performance
        xp.Acc_Valbest.add_hook(
            lambda: xp.to_json('{}/best_results.json'.format(xp.name_and_dir)))
        xp.Acc_Valbest.add_hook(lambda: save_state(
            model, optimizer, '{}/best_model.pkl'.format(xp.name_and_dir)))

    return xp
Example #3
0
def logger_from_dict(config, use_visdom=True):
    """
    Create a logger from a dictionary
    :param config: Dictionary containing the experiment hyper-parameters
    :param use_visdom: Wether to use Visdom
    :return: Logger
    """
    experiment_name = datetime.datetime.now().strftime("%Y-%m-%d-")
    ordered_config = collections.OrderedDict(sorted(config.items()))

    for key, value in ordered_config.items():
        key = helpers.to_camel_case(key.replace(' ', '_'))

        if value is str:
            value = helpers.to_camel_case(value.replace(' ', '_'))

        if key == 'Transforms' or key == 'trainType':
            continue

        if value is not None and key is not None:
            experiment_name += "{}_{}-".format(key, value)

    # Delete last dash
    experiment_name = experiment_name[:-1]

    # Create logger
    log = logger.Experiment(name=experiment_name,
                            use_visdom=use_visdom,
                            visdom_opts={
                                'server': 'http://localhost',
                                'port': 8097
                            },
                            time_indexing=False,
                            xlabel='Epoch')

    log.log_config(config)

    # create parent metric for training metrics (easier interface)
    log.ParentWrapper(
        tag='train',
        name='parent',
        children=[log.AvgMetric(name='loss'),
                  log.AvgMetric(name='acc')])
    # same for validation metrics (note all children inherit tag from parent)
    log.ParentWrapper(
        tag='val',
        name='parent',
        children=[log.AvgMetric(name='loss'),
                  log.AvgMetric(name='acc')])
    # Add a best metric for the validation accuracy
    log.ParentWrapper(tag='best',
                      name='parent',
                      children=[log.BestMetric(name='acc')])

    return log
Example #4
0
File: utils.py Project: galsina/lml
def create_experiment(args):

    xp = logger.Experiment(args.out_name,
                           use_visdom=args.visdom,
                           visdom_opts=dict(server=args.server,
                                            port=args.port),
                           time_indexing=False,
                           xlabel='Epoch')
    xp.ParentWrapper(tag='train',
                     name='parent',
                     children=(xp.AvgMetric(name='loss'),
                               xp.AvgMetric(name='acc1'),
                               xp.AvgMetric(name='acck'),
                               xp.SimpleMetric(name='obj'),
                               xp.TimeMetric(name='timer')))

    xp.ParentWrapper(tag='val',
                     name='parent',
                     children=(xp.AvgMetric(name='acck'),
                               xp.AvgMetric(name='acc1'),
                               xp.TimeMetric(name='timer')))

    xp.ParentWrapper(tag='test',
                     name='parent',
                     children=(xp.AvgMetric(name='acc1'),
                               xp.AvgMetric(name='acck'),
                               xp.TimeMetric(name='timer')))

    xp.SumMetric(name='epoch', to_plot=False)
    xp.DynamicMetric(name='learning_rate')
    xp.DynamicMetric(name='temperature', to_plot=False)
    xp.DynamicMetric(name='mu', to_plot=False)

    xp.BestMetric(tag='val_best', name='acc1', mode='max')
    xp.BestMetric(tag='val_best', name='acck', mode='max')

    if args.visdom:
        xp.plotter.set_win_opts(name='acc1', opts={'title': 'Accuracy@1'})
        xp.plotter.set_win_opts(name='acck', opts={'title': 'Accuracy@k'})
        xp.plotter.set_win_opts(name='loss', opts={'title': 'Loss Function'})
        xp.plotter.set_win_opts(name='obj',
                                opts={'title': 'Objective Function'})
        xp.plotter.set_win_opts(name='learning_rate',
                                opts={'title': 'Learning Rate'})
        xp.plotter.set_win_opts(name='Timer',
                                opts={'title': 'Time (s) / epoch'})

    xp.log_config(vars(args))

    return xp
Example #5
0
def init_visdom(env_name, config):
    assert type(config) == dict

    visdom_opts = {"server": 'http://localhost', "port": 8787}
    stats = logger.Experiment(env_name,
                              log_git_hash=False,
                              use_visdom=True,
                              visdom_opts=visdom_opts,
                              time_indexing=False)

    val_metrics = stats.ParentWrapper(
        tag="validation",
        name="parent",
        children=(stats.SimpleMetric(name='loss'),
                  stats.SimpleMetric(name="accuracy")))

    train_metrics = stats.ParentWrapper(
        tag="training",
        name="parent",
        children=(stats.AvgMetric(name='loss'),
                  stats.AvgMetric(name="accuracy")))

    stats.log_config(config)

    def update_metrics(loss, acc, key='train'):
        if key == 'train':
            train_metrics.update(loss=loss, accuracy=acc)
        elif key == "val":
            val_metrics.update(loss=loss, accuracy=acc)

    def log_metrics():
        stats.log_metric(train_metrics)
        stats.log_metric(val_metrics)
        train_metrics.reset()
        val_metrics.reset()

    #
    norm = stats.ParentWrapper(tag="norm",
                               name="parent2",
                               children=(stats.SimpleMetric(name="norm"), ))

    def plot_norm(val):
        norm.update(norm=val)
        stats.log_metric(norm)
        norm.reset()

    return update_metrics, log_metrics, plot_norm
Example #6
0
def set_defaults(args):
    # force no logging in debug mode
    if args.debug:
        args.visdom = False
        args.log = False
        args.out_name = '../xp/debug'
        # remove previous log in debug mode
        if os.path.exists('../xp/debug_log.txt'):
            os.remove('../xp/debug_log.txt')

    elif args.eval:
        args.visdom = False
        args.log = False
        args.out_name = '../xp/{}'.format(args.dataset)
        args.epochs = 0
        # find settings of experiment
        _xp = logger.Experiment("")
        _xp.from_json(args.load_model.replace(".pkl", ".json"))
        for k in ('topk', 'model'):
            setattr(args, k, _xp.config[k])
        if args.multiple_crops:
            args.test_batch_size = 32

    assert args.dataset in ('imagenet', 'cifar100')
    if args.dataset == 'imagenet':
        my_dict = imagenet_defaults
    if args.dataset == 'cifar100':
        my_dict = cifar100_defaults

    # set number of classes
    args.num_classes = my_dict['num_classes']

    # replace None values with default ones
    for (k, v) in my_dict.items():
        if getattr(args, k) is None:
            setattr(args, k, v)

    # store full command line in args
    args.command_line = ' '.join(sys.argv)

    # store cuurent directory and hostname in args
    args.cwd = os.getcwd()
    args.hostname = socket.gethostname()
Example #7
0
def create_xp(args):
    logger.set_default_indexing('increment')
    xp_name = "{}_{}_{}".format(args.opt, args.lr, args.weight_decay)
    if args.proj is not None:
        xp_name += "_proj{}".format(args.proj)
    setproctitle.setproctitle(xp_name)
    plotter = logger.Plotter(visdom_opts={
        'server': 'http://atlas.robots.ox.ac.uk',
        'port': 9006,
        'env': xp_name
    },
                             mode='automatic')
    xp = logger.Experiment(name=xp_name, plotter=plotter, track_git=True)

    # log the hyperparameters of the experiment
    xp.config.update(**vars(args))
    xp.config.record()
    xp.gamma = logger.SimpleMetric()
    xp.gamma_unclipped = logger.SimpleMetric()
    xp.epsilon = logger.SimpleMetric()
    return xp
Example #8
0
    acck = np.random.rand() + 90

    return loss, acc1, acck


# some hyper-parameters of the experiment
lr = 0.01
n_epochs = 10

#----------------------------------------------------------
# Prepare logging
#----------------------------------------------------------

# create Experiment
xp = logger.Experiment("xp_name", use_visdom=True,
                       visdom_opts={'server': 'http://localhost', 'port': 8097},
                       time_indexing=False, xlabel='Epoch')
# log the hyperparameters of the experiment
xp.log_config({'lr': lr, 'n_epochs': n_epochs})
# create parent metric for training metrics (easier interface)
xp.ParentWrapper(tag='train', name='parent',
                 children=(xp.AvgMetric(name='loss'),
                           xp.AvgMetric(name='acc1'),
                           xp.AvgMetric(name='acck')))
# same for validation metrics (note all children inherit tag from parent)
xp.ParentWrapper(tag='val', name='parent',
                 children=(xp.AvgMetric(name='loss'),
                           xp.AvgMetric(name='acc1'),
                           xp.AvgMetric(name='acck')))
best1 = xp.BestMetric(tag="val-best", name="acc1")
bestk = xp.BestMetric(tag="val-best", name="acck")
Example #9
0
np.random.seed(opt.seed)
torch.manual_seed(opt.seed)

if opt.cuda:
    cudnn.benchmark = True
    torch.cuda.manual_seed_all(opt.seed)

# create logger
LOG_DIR = '{0}/logger'.format(opt.experiment)

# some hyperparameters we wish to save for this experiment
hyperparameters = dict(regularization=1, n_epochs=opt.epochs)
# options for the remote visualization backend
visdom_opts = dict(server='http://localhost', port=8097)
# create logger for visdom
xp = logger.Experiment('xp_name', use_visdom=True, visdom_opts=visdom_opts)
# log the hyperparameters of the experiment
xp.log_config(hyperparameters)
# create parent metric for training metrics (easier interface)
train_metrics = xp.ParentWrapper(tag='train',
                                 name='parent',
                                 children=(xp.AvgMetric(name='lossD'),
                                           xp.AvgMetric(name='lossG')))

if torch.cuda.is_available() and not opt.cuda:
    print(
        "WARNING: You have a CUDA device, so you should probably run with --cuda"
    )

# setup transformations
transforms = transforms.Compose([
Example #10
0
                if len(real_paths[0]) > 0:
                    sg = model.graph.subgraph(real_paths[0])

                    drawer.draw(sg, vis_opts={'title': 'Real', **draw_ops}, weights=1.0, vis_win='Clean')

                    for ce_name, ce in cost_evaluators.items():
                        if ce_name.startswith('parallel'):
                            n_steps = max(ce.node_steps[0].values())
                            drawer.draw(sg, vis_opts={'title': 'Alloc ' + ce_name, **draw_ops},
                                        weights=ce.node_steps[0],
                                        colormap=drawing_utils.get_colormap(n_steps),
                                        vis_win='Alloc ' + ce_name)

                drawer.draw(model.graph, vis_opts={'title': 'Mean', **draw_ops},
                            weights=path_recorder.get_posterior_weights(), vis_win='mean')

        logger.info('EPOCH DONE')


if __name__ == '__main__':
    logger.info('Executing main from {}'.format(os.getcwd()))
    exp_name = 'Direct Launch'
    args = vars(argument_parser())

    vis_conf = external.get_visdom_conf()
    xp_logger = data_logger.Experiment(exp_name, use_visdom=True,
                                       visdom_opts={'server': vis_conf['url'], 'port': vis_conf['port'],
                                                    'env': args['draw_env']},
                                       time_indexing=False, xlabel='Epoch')
    main(args, xp_logger)
Example #11
0
import logger
import os


for root, dirs, files in os.walk("./runs"):
    for file in files:
        if file.endswith('.json') and 'archive' not in root:
            file_path = os.path.join(root, file)
            print('Loading: ', file_path)
            xp = logger.Experiment('dummy_name')
            xp.from_json(file_path)
            for opts in xp.visdom_win_opts.values():
                if 'legend' in opts:
                    opts.pop('legend')
            xp.to_visdom(visdom_opts={'server': 'http://localhost', 'port': 8098})
            print('Success!')
Example #12
0
def main(_run, nepochs, device, use_visdom, visdom_conf, n_classes,
         lambda_reward, r_beta, r_gamma, _config):
    exp_name = format_exp_name(_run._id, _config)
    if use_visdom:
        visdom_conf.update(env=exp_name)
        _run.info['visdom_server'] = "{server}:{port}/env/{env}".format(
            **visdom_conf)
    else:
        _run.info['visdom_server'] = "No visdom"

    _run.info['exp_name'] = exp_name
    front = _run.info['front'] = {}

    xp_logger = data_logger.Experiment(exp_name,
                                       use_visdom=use_visdom,
                                       visdom_opts=visdom_conf,
                                       time_indexing=False,
                                       xlabel='Epoch',
                                       log_git_hash=False)
    xp_logger.add_log_hook(_run.log_scalar)
    if use_visdom:
        xp_logger.plotter.windows_opts = defaultdict(
            lambda: dict(showlegend=True))

    viz = Visdom(**visdom_conf) if use_visdom else None

    # Dataset creation
    logger.info('### Dataset ###')

    ds, batch_first, class_w = create_dataset()
    _run.info['class_weights'] = class_w.tolist()

    confusion_matrix_opts = {
        'columnnames': ds['train'].dataset.ordered_class_names,
        'rownames': ds['train'].dataset.ordered_class_names
    }

    # Model Creation
    logger.info('### Model ###')

    adaptive_model = create_model()
    adaptive_model.loss = torch.nn.CrossEntropyLoss(weight=class_w,
                                                    reduction='none',
                                                    ignore_index=-7)

    path_recorder = PathRecorder(adaptive_model.stochastic_model)
    cost_evaluator = ComputationCostEvaluator(
        node_index=path_recorder.node_index, bw=False)
    # cost_evaluator = SimpleEdgeCostEvaluator(node_index=path_recorder.node_index, bw=False)

    cost_evaluator.init_costs(adaptive_model.stochastic_model)
    logger.info('Cost: {:.5E}'.format(cost_evaluator.total_cost))

    adaptive_model.to(device)

    # Optim Creation
    logger.info('### Optim ###')
    optimizer, schedulder = create_optim(
        params=adaptive_model.get_param_groups())

    # Check the param_groups order, to be sure to get the learning rates in the right order for logging
    assert [pg['name'] for pg in optimizer.param_groups
            ] == ['arch_params', 'pred_params']

    def optim_closure(loss):
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Logger creation
    splits = ['train', 'validation', 'test']
    metrics = [
        'classif_loss', 'arch_loss', 'reward', 'lambda_reward',
        'silence_ratio', 'accuracy', 'average_cost', 'learning_rate_pred',
        'learning_rate_arch'
    ]

    for split in splits:
        xp_logger.ParentWrapper(tag=split,
                                name='parent'.format(split),
                                children=[
                                    xp_logger.SimpleMetric(name=metric)
                                    for metric in metrics
                                ])

    train_cost_loggers = dict(
        (i, xp_logger.AvgMetric(name='train_cost', tag=name))
        for i, name in enumerate(ds['train'].dataset.ordered_class_names))
    train_cost_loggers_perc = dict(
        (i, xp_logger.AvgMetric(name='train_cost_perceived', tag=name))
        for i, name in enumerate(ds['train'].dataset.ordered_class_names))

    node_names = adaptive_model.stochastic_model.ordered_node_names
    # entropy_loggers = [xp_logger.SimpleMetric(name='entropy', tag=name) for name in node_names]
    entropy_loggers = OrderedDict(
        (i, xp_logger.SimpleMetric(name='entropy_per_node', tag=name))
        for i, name in enumerate(node_names))
    # proba_loggers = [xp_logger.SimpleMetric(name='proba', tag=name) for name in node_names]
    proba_loggers = OrderedDict(
        (i, xp_logger.SimpleMetric(name='proba_per_node', tag=name))
        for i, name in enumerate(node_names))

    val_cost_loggers = dict(
        (i, xp_logger.AvgMetric(name='val_cost', tag=name))
        for i, name in enumerate(ds['validation'].dataset.ordered_class_names))
    val_cost_loggers_perc = dict(
        (i, xp_logger.AvgMetric(name='val_cost_perceived', tag=name))
        for i, name in enumerate(ds['validation'].dataset.ordered_class_names))

    test_cost_loggers = dict(
        (i, xp_logger.AvgMetric(name='test_cost', tag=name))
        for i, name in enumerate(ds['test'].dataset.ordered_class_names))
    test_cost_loggers_perc = dict(
        (i, xp_logger.AvgMetric(name='test_cost_perceived', tag=name))
        for i, name in enumerate(ds['test'].dataset.ordered_class_names))

    if use_visdom:
        print_properties(viz, _config)
        print_properties(viz, _run.info)

    ema_reward = EMA(r_beta)  # Init the exponential moving average
    for n in range(1, nepochs + 1):
        logger.info('### Sarting epoch n°{} ### {}'.format(
            n, _run.info['visdom_server']))
        logger.info(' '.join(sys.argv))

        if schedulder:
            schedulder.step(n)
            arch_lr, pred_lr = schedulder.get_lr()
            xp_logger.Parent_Train.update(learning_rate_pred=pred_lr,
                                          learning_rate_arch=arch_lr)

        # Training
        adaptive_model.train()
        train_cm, train_costcm, train_costcm_norm, train_cost_per_step, logs, train_cost_per_signal_level, train_stats = evaluate_model(
            adaptive_model,
            ds['train'],
            batch_first,
            device,
            path_recorder,
            cost_evaluator,
            train_cost_loggers,
            train_cost_loggers_perc,
            n_classes,
            lambda_reward,
            ema_reward,
            r_gamma,
            optim_closure,
            name='Train')

        xp_logger.Parent_Train.update(**dict(
            (k, v.value()[0]) for k, v in logs.items()))

        for node_idx, ent in train_stats['en'].items():
            entropy_loggers[node_idx].update(ent.value()[0])

        for node_idx, prob in train_stats['pn'].items():
            proba_loggers[node_idx].update(prob.value()[0])

        # Evaluation
        adaptive_model.eval()
        val_cm, val_costcm, val_costcm_norm, val_cost_per_step, logs, val_cost_per_signal_level, val_stats = evaluate_model(
            adaptive_model,
            ds['validation'],
            batch_first,
            device,
            path_recorder,
            cost_evaluator,
            val_cost_loggers,
            val_cost_loggers_perc,
            n_classes,
            lambda_reward,
            ema_reward,
            r_gamma,
            name='Validation')

        xp_logger.Parent_Validation.update(**dict(
            (k, v.value()[0]) for k, v in logs.items()))

        test_cm, test_costcm, test_costcm_norm, test_cost_per_step, logs, test_cost_per_signal_level, test_stats = evaluate_model(
            adaptive_model,
            ds['test'],
            batch_first,
            device,
            path_recorder,
            cost_evaluator,
            test_cost_loggers,
            test_cost_loggers_perc,
            n_classes,
            lambda_reward,
            ema_reward,
            r_gamma,
            name='Test')
        xp_logger.Parent_Test.update(**dict(
            (k, v.value()[0]) for k, v in logs.items()))

        if use_visdom:
            # Log
            plot_(viz,
                  train_stats['es'],
                  node_names,
                  f'Entropy per step {n} - Train',
                  'train_eps',
                  log_func=_run.log_scalar)
            plot_(viz,
                  train_stats['ps'],
                  node_names,
                  f'Probability per step {n} - Train',
                  'train_pps',
                  log_func=_run.log_scalar)
            try:
                viz.heatmap(train_cm,
                            win='train_cm',
                            opts={
                                **confusion_matrix_opts, 'title':
                                'Train Confusion matrix'
                            })
                viz.heatmap(val_cm,
                            win='val_cm',
                            opts={
                                **confusion_matrix_opts, 'title':
                                'Val Confusion matrix'
                            })
                viz.heatmap(test_cm,
                            win='test_cm',
                            opts={
                                **confusion_matrix_opts, 'title':
                                'Test Confusion matrix'
                            })

                # viz.heatmap(train_costcm, win='train_cost_matrix',
                #             opts={**confusion_matrix_opts, 'title': 'Train cost matrix'})
                # viz.heatmap(val_costcm, win='val_cost_matrix', opts={**confusion_matrix_opts, 'title': 'Val cost matrix'})
                # viz.heatmap(test_costcm, win='test_cost_matrix',
                #             opts={**confusion_matrix_opts, 'title': 'Test cost matrix'})

                viz.heatmap(train_costcm_norm,
                            win='train_cost_matrix_norm',
                            opts={
                                **confusion_matrix_opts, 'title':
                                'Train cost matrix Normalized'
                            })
                viz.heatmap(val_costcm_norm,
                            win='val_cost_matrix_norm',
                            opts={
                                **confusion_matrix_opts, 'title':
                                'Val cost matrix Normalized'
                            })
                viz.heatmap(test_costcm_norm,
                            win='test_cost_matrix_norm',
                            opts={
                                **confusion_matrix_opts, 'title':
                                'Test cost matrix Normalized'
                            })

            except ConnectionError as err:
                logger.warning('Error in heatmaps:')
                logger.warning(err)
                traceback.print_exc()

            plot_meters(viz,
                        train_cost_per_step,
                        'train_cps',
                        'Cost per step {}'.format(n),
                        win='cps',
                        log_func=_run.log_scalar)
            plot_meters(viz,
                        val_cost_per_step,
                        'val_cps',
                        win='cps',
                        log_func=_run.log_scalar)
            plot_meters(viz,
                        test_cost_per_step,
                        'test_cps',
                        win='cps',
                        log_func=_run.log_scalar)

            plot_meters(viz,
                        train_cost_per_signal_level,
                        'cost/sig_train',
                        'Cost per signal {}'.format(n),
                        win='cpsig',
                        error_bars=False,
                        log_func=_run.log_scalar)
            plot_meters(viz,
                        val_cost_per_signal_level,
                        'cost/sig_val',
                        win='cpsig',
                        error_bars=False,
                        log_func=_run.log_scalar)
            plot_meters(viz,
                        test_cost_per_signal_level,
                        'cost/sig_test',
                        win='cpsig',
                        error_bars=False,
                        log_func=_run.log_scalar)

        xp_logger.log_with_tag(tag='*', reset=True)

        msg = 'Losses: {:.3f}({:.3E})-{:.3f}-{:.3f}, Accuracies: {:.3f}-{:.3f}-{:.3f}, Avg cost: {:.3E}-{:.3E}-{:.3E}'
        msg = msg.format(xp_logger.classif_loss_train, xp_logger.reward_train,
                         xp_logger.classif_loss_validation,
                         xp_logger.classif_loss_test, xp_logger.accuracy_train,
                         xp_logger.accuracy_validation,
                         xp_logger.accuracy_test, xp_logger.average_cost_train,
                         xp_logger.average_cost_validation,
                         xp_logger.average_cost_test)
        logger.info(msg)

        pareto_data = {
            'cost': xp_logger.logged['average_cost_validation'].values(),
            'acc': xp_logger.logged['accuracy_validation'].values(),
            '_orig_': xp_logger.logged['average_cost_validation'].keys()
        }

        pareto = paretize_exp(pareto_data, x_name='cost', crit_name='acc')

        if n in pareto['_orig_']:
            logger.info('New on front !')
            front.update(**pareto)
            save_checkpoint(adaptive_model, ex, n)
        elif n > 0 and n % 50 == 0:
            logger.info('Checkpointing')
            save_checkpoint(adaptive_model, ex, n)

        logger.info(pareto['_orig_'])
        best_epoch = pareto['_orig_'][-1]
        logger.info('Best \tVal: {:.3f} - Test: {:.3f} (Epoch {})\n'.format(
            xp_logger.logged['accuracy_validation'][best_epoch],
            xp_logger.logged['accuracy_test'][best_epoch], best_epoch))
Example #13
0
def train(cfg, writer, logger_old, args):

    # Setup seeds
    torch.manual_seed(cfg.get('seed', 1337))
    torch.cuda.manual_seed(cfg.get('seed', 1337))
    np.random.seed(cfg.get('seed', 1337))
    random.seed(cfg.get('seed', 1337))

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # Setup Augmentations
    augmentations = cfg['training'].get('augmentations', None)
    data_aug = get_composed_augmentations(augmentations)

    # Setup Dataloader
    data_loader = get_loader(cfg['data']['dataset'])
    data_path = cfg['data']['path']

    if isinstance(cfg['training']['loss']['superpixels'], int):
        use_superpixels = True
        cfg['data']['train_split'] = 'train_super'
        cfg['data']['val_split'] = 'val_super'
        setup_superpixels(cfg['training']['loss']['superpixels'])
    elif cfg['training']['loss']['superpixels'] is not None:
        raise Exception(
            "cfg['training']['loss']['superpixels'] is of the wrong type")
    else:
        use_superpixels = False

    t_loader = data_loader(data_path,
                           is_transform=True,
                           split=cfg['data']['train_split'],
                           superpixels=cfg['training']['loss']['superpixels'],
                           img_size=(cfg['data']['img_rows'],
                                     cfg['data']['img_cols']),
                           augmentations=data_aug)

    v_loader = data_loader(
        data_path,
        is_transform=True,
        split=cfg['data']['val_split'],
        superpixels=cfg['training']['loss']['superpixels'],
        img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
    )

    n_classes = t_loader.n_classes
    trainloader = data.DataLoader(t_loader,
                                  batch_size=cfg['training']['batch_size'],
                                  num_workers=cfg['training']['n_workers'],
                                  shuffle=True)

    valloader = data.DataLoader(v_loader,
                                batch_size=cfg['training']['batch_size'],
                                num_workers=cfg['training']['n_workers'])

    # Setup Metrics
    running_metrics_val = runningScore(n_classes)
    running_metrics_train = runningScore(n_classes)

    # Setup Model
    model = get_model(cfg['model'], n_classes).to(device)

    model = torch.nn.DataParallel(model,
                                  device_ids=range(torch.cuda.device_count()))

    # Setup optimizer, lr_scheduler and loss function
    optimizer_cls = get_optimizer(cfg)
    optimizer_params = {
        k: v
        for k, v in cfg['training']['optimizer'].items() if k != 'name'
    }

    optimizer = optimizer_cls(model.parameters(), **optimizer_params)
    logger_old.info("Using optimizer {}".format(optimizer))

    scheduler = get_scheduler(optimizer, cfg['training']['lr_schedule'])

    loss_fn = get_loss_function(cfg)
    logger_old.info("Using loss {}".format(loss_fn))

    start_iter = 0
    if cfg['training']['resume'] is not None:
        if os.path.isfile(cfg['training']['resume']):
            logger_old.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    cfg['training']['resume']))
            checkpoint = torch.load(cfg['training']['resume'])
            model.load_state_dict(checkpoint["model_state"])
            optimizer.load_state_dict(checkpoint["optimizer_state"])
            scheduler.load_state_dict(checkpoint["scheduler_state"])
            start_iter = checkpoint["epoch"]
            logger_old.info("Loaded checkpoint '{}' (iter {})".format(
                cfg['training']['resume'], checkpoint["epoch"]))
        else:
            logger_old.info("No checkpoint found at '{}'".format(
                cfg['training']['resume']))

    val_loss_meter = averageMeter()
    train_loss_meter = averageMeter()
    time_meter = averageMeter()

    train_len = t_loader.train_len
    val_static = 0
    best_iou = -100.0
    i = start_iter
    j = 0
    flag = True

    # Prepare logging
    xp_name = cfg['model']['arch'] + '_' + \
        cfg['training']['loss']['name'] + '_' + args.name
    xp = logger.Experiment(xp_name,
                           use_visdom=True,
                           visdom_opts={
                               'server': 'http://localhost',
                               'port': 8098
                           },
                           time_indexing=False,
                           xlabel='Epoch')
    # log the hyperparameters of the experiment
    xp.log_config(flatten(cfg))
    # create parent metric for training metrics (easier interface)
    xp.ParentWrapper(tag='train',
                     name='parent',
                     children=(xp.AvgMetric(name="loss"),
                               xp.AvgMetric(name='acc'),
                               xp.AvgMetric(name='acccls'),
                               xp.AvgMetric(name='fwavacc'),
                               xp.AvgMetric(name='meaniu')))
    xp.ParentWrapper(tag='val',
                     name='parent',
                     children=(xp.AvgMetric(name="loss"),
                               xp.AvgMetric(name='acc'),
                               xp.AvgMetric(name='acccls'),
                               xp.AvgMetric(name='fwavacc'),
                               xp.AvgMetric(name='meaniu')))
    best_loss = xp.BestMetric(tag='val-best', name='loss', mode='min')
    best_acc = xp.BestMetric(tag='val-best', name='acc')
    best_acccls = xp.BestMetric(tag='val-best', name='acccls')
    best_fwavacc = xp.BestMetric(tag='val-best', name='fwavacc')
    best_meaniu = xp.BestMetric(tag='val-best', name='meaniu')

    xp.plotter.set_win_opts(name="loss", opts={'title': 'Loss'})
    xp.plotter.set_win_opts(name="acc", opts={'title': 'Micro-Average'})
    xp.plotter.set_win_opts(name="acccls", opts={'title': 'Macro-Average'})
    xp.plotter.set_win_opts(name="fwavacc", opts={'title': 'FreqW Accuracy'})
    xp.plotter.set_win_opts(name="meaniu", opts={'title': 'Mean IoU'})

    it_per_step = cfg['training']['acc_batch_size']
    eff_batch_size = cfg['training']['batch_size'] * it_per_step
    while i <= train_len * (cfg['training']['epochs']) and flag:
        for (images, labels, labels_s, masks) in trainloader:
            i += 1
            j += 1
            start_ts = time.time()
            scheduler.step()
            model.train()
            images = images.to(device)
            labels = labels.to(device)
            labels_s = labels_s.to(device)
            masks = masks.to(device)

            outputs = model(images)
            if use_superpixels:
                outputs_s, labels_s, sizes = convert_to_superpixels(
                    outputs, labels_s, masks)
                loss = loss_fn(input=outputs_s, target=labels_s, size=sizes)
                outputs = convert_to_pixels(outputs_s, outputs, masks)
            else:
                loss = loss_fn(input=outputs, target=labels)

            # accumulate train metrics during train
            pred = outputs.data.max(1)[1].cpu().numpy()
            gt = labels.data.cpu().numpy()
            running_metrics_train.update(gt, pred)
            train_loss_meter.update(loss.item())

            if args.evaluate:
                decoded = t_loader.decode_segmap(np.squeeze(pred, axis=0))
                misc.imsave("./{}.png".format(i), decoded)
                image_save = np.transpose(
                    np.squeeze(images.data.cpu().numpy(), axis=0), (1, 2, 0))
                misc.imsave("./{}.jpg".format(i), image_save)

            # accumulate gradients based on the accumulation batch size
            if i % it_per_step == 1 or it_per_step == 1:
                optimizer.zero_grad()

            grad_rescaling = torch.tensor(1. / it_per_step).type_as(loss)
            loss.backward(grad_rescaling)
            if (i + 1) % it_per_step == 1 or it_per_step == 1:
                optimizer.step()
                optimizer.zero_grad()

            time_meter.update(time.time() - start_ts)
            # training logs
            if (j + 1) % (cfg['training']['print_interval'] *
                          it_per_step) == 0:
                fmt_str = "Epoch [{}/{}] Iter [{}/{:d}] Loss: {:.4f}  Time/Image: {:.4f}"
                total_iter = int(train_len / eff_batch_size)
                total_epoch = int(cfg['training']['epochs'])
                current_epoch = ceil((i + 1) / train_len)
                current_iter = int((j + 1) / it_per_step)
                print_str = fmt_str.format(
                    current_epoch, total_epoch, current_iter, total_iter,
                    loss.item(),
                    time_meter.avg / cfg['training']['batch_size'])

                print(print_str)
                logger_old.info(print_str)
                writer.add_scalar('loss/train_loss', loss.item(), i + 1)
                time_meter.reset()
            # end of epoch evaluation
            if (i + 1) % train_len == 0 or \
               (i + 1) == train_len * (cfg['training']['epochs']):
                optimizer.step()
                optimizer.zero_grad()
                model.eval()
                with torch.no_grad():
                    for i_val, (images_val, labels_val, labels_val_s,
                                masks_val) in tqdm(enumerate(valloader)):
                        images_val = images_val.to(device)
                        labels_val = labels_val.to(device)
                        labels_val_s = labels_val_s.to(device)
                        masks_val = masks_val.to(device)

                        outputs = model(images_val)
                        if use_superpixels:
                            outputs_s, labels_val_s, sizes_val = convert_to_superpixels(
                                outputs, labels_val_s, masks_val)
                            val_loss = loss_fn(input=outputs_s,
                                               target=labels_val_s,
                                               size=sizes_val)
                            outputs = convert_to_pixels(
                                outputs_s, outputs, masks_val)
                        else:
                            val_loss = loss_fn(input=outputs,
                                               target=labels_val)
                        pred = outputs.data.max(1)[1].cpu().numpy()
                        gt = labels_val.data.cpu().numpy()

                        running_metrics_val.update(gt, pred)
                        val_loss_meter.update(val_loss.item())

                writer.add_scalar('loss/val_loss', val_loss_meter.avg, i + 1)
                writer.add_scalar('loss/train_loss', train_loss_meter.avg,
                                  i + 1)
                logger_old.info("Epoch %d Val Loss: %.4f" % (int(
                    (i + 1) / train_len), val_loss_meter.avg))
                logger_old.info("Epoch %d Train Loss: %.4f" % (int(
                    (i + 1) / train_len), train_loss_meter.avg))

                score, class_iou = running_metrics_train.get_scores()
                print("Training metrics:")
                for k, v in score.items():
                    print(k, v)
                    logger_old.info('{}: {}'.format(k, v))
                    writer.add_scalar('train_metrics/{}'.format(k), v, i + 1)

                for k, v in class_iou.items():
                    logger_old.info('{}: {}'.format(k, v))
                    writer.add_scalar('train_metrics/cls_{}'.format(k), v,
                                      i + 1)

                xp.Parent_Train.update(loss=train_loss_meter.avg,
                                       acc=score['Overall Acc: \t'],
                                       acccls=score['Mean Acc : \t'],
                                       fwavacc=score['FreqW Acc : \t'],
                                       meaniu=score['Mean IoU : \t'])

                score, class_iou = running_metrics_val.get_scores()
                print("Validation metrics:")
                for k, v in score.items():
                    print(k, v)
                    logger_old.info('{}: {}'.format(k, v))
                    writer.add_scalar('val_metrics/{}'.format(k), v, i + 1)

                for k, v in class_iou.items():
                    logger_old.info('{}: {}'.format(k, v))
                    writer.add_scalar('val_metrics/cls_{}'.format(k), v, i + 1)

                xp.Parent_Val.update(loss=val_loss_meter.avg,
                                     acc=score['Overall Acc: \t'],
                                     acccls=score['Mean Acc : \t'],
                                     fwavacc=score['FreqW Acc : \t'],
                                     meaniu=score['Mean IoU : \t'])

                xp.Parent_Val.log_and_reset()
                xp.Parent_Train.log_and_reset()
                best_loss.update(xp.loss_val).log()
                best_acc.update(xp.acc_val).log()
                best_acccls.update(xp.acccls_val).log()
                best_fwavacc.update(xp.fwavacc_val).log()
                best_meaniu.update(xp.meaniu_val).log()

                visdir = os.path.join('runs', cfg['training']['loss']['name'],
                                      args.name, 'plots.json')
                xp.to_json(visdir)

                val_loss_meter.reset()
                train_loss_meter.reset()
                running_metrics_val.reset()
                running_metrics_train.reset()
                j = 0

                if score["Mean IoU : \t"] >= best_iou:
                    val_static = 0
                    best_iou = score["Mean IoU : \t"]
                    state = {
                        "epoch": i + 1,
                        "model_state": model.state_dict(),
                        "optimizer_state": optimizer.state_dict(),
                        "scheduler_state": scheduler.state_dict(),
                        "best_iou": best_iou,
                    }
                    save_path = os.path.join(
                        writer.file_writer.get_logdir(),
                        "{}_{}_best_model.pkl".format(cfg['model']['arch'],
                                                      cfg['data']['dataset']))
                    torch.save(state, save_path)
                else:
                    val_static += 1

            if (i + 1) == train_len * (
                    cfg['training']['epochs']) or val_static == 10:
                flag = False
                break
    return best_iou