Example #1
0
 def __init__(self, with_val=True):
     meters = {
         'config': ValueMeter(skip_reset=True),
         'host_info': ValueMeter(skip_reset=True),
         'epoch': ValueMeter(),
         'data_load_time': MeanValueMeter(),
         'data_transfer_time': MeanValueMeter(),
         'forward_time': MeanValueMeter(),
         'backward_time': MeanValueMeter(),
         'optim_time': MeanValueMeter(),
         'eval_time': MeanValueMeter(),
         'train_loss': MeanValueMeter(),
         'train_mpjpe': MeanValueMeter(),
         'train_pck': MeanValueMeter(),
         'train_examples': ValueMeter(),
     }
     if with_val:
         meters.update({
             'val_loss': MeanValueMeter(),
             'val_mpjpe': MeanValueMeter(),
             'val_pck': MeanValueMeter(),
             'val_examples': ValueMeter(),
         })
     self.with_val = with_val
     self.telemetry = tele.Telemetry(meters)
Example #2
0
 def __init__(self, train_eval, val_eval):
     self.telemetry = tele.Telemetry({
         'experiment_id':
         tele.meter.ValueMeter(skip_reset=True),
         'epoch':
         tele.meter.ValueMeter(),
         'train_loss':
         torchnet.meter.AverageValueMeter(),
         'val_loss':
         torchnet.meter.AverageValueMeter(),
         'epoch_time':
         torchnet.meter.TimeMeter(unit=False),
         'train_data_load_time':
         torchnet.meter.AverageValueMeter(),
         'train_data_transfer_time':
         torchnet.meter.AverageValueMeter(),
         'train_forward_time':
         torchnet.meter.AverageValueMeter(),
         'train_criterion_time':
         torchnet.meter.AverageValueMeter(),
         'train_backward_time':
         torchnet.meter.AverageValueMeter(),
         'train_optim_time':
         torchnet.meter.AverageValueMeter(),
         'train_eval_time':
         torchnet.meter.AverageValueMeter(),
         'train_sample':
         tele.meter.ValueMeter(),
         'val_sample':
         tele.meter.ValueMeter(),
         'train_heatmaps':
         tele.meter.ValueMeter(),
         'val_heatmaps':
         tele.meter.ValueMeter(),
         'args':
         tele.meter.ValueMeter(skip_reset=True),
         'train_pckh_total':
         train_eval.meters['total_mpii'],
         'val_pckh_total':
         val_eval.meters['total_mpii'],
         'train_pckh_all':
         train_eval.meters['all'],
         'val_pckh_all':
         val_eval.meters['all'],
         'val_preds':
         tele.meter.ValueMeter(),
         'best_val_preds':
         tele.meter.ValueMeter(skip_reset=True),
         'model_graph':
         tele.meter.ValueMeter(skip_reset=True),
     })
Example #3
0
def main():
    args = parse_args()

    seed_random_number_generators(args.seed)

    model_desc = {
        'base': args.base_model,
        'dilate': args.dilate,
        'truncate': args.truncate,
        'output_strat': args.output_strat,
        'preact': args.preact,
        'reg': args.reg,
        'reg_coeff': args.reg_coeff,
        'hm_sigma': args.hm_sigma,
    }
    model = build_mpii_pose_model(**model_desc)
    model.cuda()

    train_data = MPIIDataset('/datasets/mpii',
                             'train',
                             use_aug=True,
                             image_specs=model.image_specs)
    sampler = make_data_sampler(args.max_iters * args.batch_size,
                                len(train_data))
    train_loader = DataLoader(train_data,
                              args.batch_size,
                              num_workers=4,
                              drop_last=True,
                              sampler=sampler)
    data_iter = iter(train_loader)

    print(json.dumps(model_desc, sort_keys=True, indent=2))

    def do_training_iteration(optimiser):
        batch = next(data_iter)

        in_var = Variable(batch['input'].cuda(), requires_grad=False)
        target_var = Variable(batch['part_coords'].cuda(), requires_grad=False)
        mask_var = Variable(batch['part_mask'].type(torch.cuda.FloatTensor),
                            requires_grad=False)

        # Calculate predictions and loss
        out_var = model(in_var)
        loss = model.forward_loss(out_var, target_var, mask_var)

        # Calculate gradients
        optimiser.zero_grad()
        loss.backward()

        # Update parameters
        optimiser.step()

        return loss.data[0]

    optimiser = SGD(model.parameters(),
                    lr=1,
                    weight_decay=args.weight_decay,
                    momentum=args.momentum)

    tel = tele.Telemetry({
        'cli_args': ValueMeter(skip_reset=True),
        'loss_lr': ValueMeter(),
    })

    tel['cli_args'].set_value(vars(args))

    if args.showoff:
        client = pyshowoff.Client('http://' + args.showoff)
        notebook = client.add_notebook(
            'Hyperparameter search ({}-d{}-t{}, {}, reg={})'.format(
                args.base_model, args.dilate, args.truncate, args.output_strat,
                args.reg)).result()

        tel.sink(tele.showoff.Conf(notebook), [
            Inspect(['cli_args'], 'CLI arguments', flatten=True),
            XYGraph(['loss_lr'], 'Loss vs learning rate graph'),
        ])

    lrs = np.geomspace(args.lr_min, args.lr_max, args.max_iters)
    avg_loss = 0
    min_loss = np.inf
    for i, lr in enumerate(tqdm(lrs, ascii=True)):
        for param_group in optimiser.param_groups:
            param_group['lr'] = lr
        loss = do_training_iteration(optimiser)
        avg_loss = args.ema_beta * avg_loss + (1 - args.ema_beta) * loss
        smoothed_loss = avg_loss / (1 - args.ema_beta**(i + 1))
        if min_loss > 0 and smoothed_loss > 4 * min_loss:
            break
        min_loss = min(smoothed_loss, min_loss)

        tel['loss_lr'].set_value((lr, smoothed_loss))

        tel.step()
Example #4
0
def sacred_main(_run: Run, seed, showoff, batch_size, model_desc, deterministic, train_datasets,
         lr_min, lr_max, max_iters, ema_beta, weight_decay, momentum):
    seed_all(seed)
    init_algorithms(deterministic=deterministic)

    model = create_model(model_desc).to(global_opts['device'])
    data_loader = create_train_dataloader(train_datasets, model.data_specs, batch_size,
                                          examples_per_epoch=(max_iters * batch_size))
    data_iter = iter(data_loader)

    print(json.dumps(model_desc, sort_keys=True, indent=2))

    def do_training_iteration(optimiser):
        batch = next(data_iter)

        in_var = batch['input'].to(global_opts['device'], torch.float32)
        target_var = batch['target'].to(global_opts['device'], torch.float32)
        mask_var = batch['joint_mask'].to(global_opts['device'], torch.float32)

        # Calculate predictions and loss
        out_var = model(in_var)
        loss = forward_loss(model, out_var, target_var, mask_var, batch['valid_depth'])

        # Calculate gradients
        optimiser.zero_grad()
        loss.backward()

        # Update parameters
        optimiser.step()

        return loss.item()

    optimiser = SGD(model.parameters(), lr=1, weight_decay=weight_decay, momentum=momentum)

    tel = tele.Telemetry({
        'config': ValueMeter(skip_reset=True),
        'host_info': ValueMeter(skip_reset=True),
        'loss_lr_fig': ValueMeter(),
    })

    notebook = None
    if showoff:
        title = 'Hyperparameter search ({}@{})'.format(model_desc['type'], model_desc['version'])
        notebook = create_showoff_notebook(title, ['lrfinder'])

        from tele.showoff import views

        tel.sink(tele.showoff.Conf(notebook), [
            views.Inspect(['config'], 'Experiment configuration', flatten=True),
            views.Inspect(['host_info'], 'Host information', flatten=True),
            views.FrameContent(['loss_lr_fig'], 'Loss vs learning rate graph', 'plotly'),
        ])

    def set_progress(value):
        if notebook is not None:
            notebook.set_progress(value)

    tel['config'].set_value(_run.config)
    tel['host_info'].set_value(get_host_info())

    lrs = np.geomspace(lr_min, lr_max, max_iters)
    losses = []
    avg_loss = 0
    min_loss = np.inf
    for i, lr in enumerate(tqdm(lrs, ascii=True)):
        set_progress(i / len(lrs))

        for param_group in optimiser.param_groups:
            param_group['lr'] = lr
        loss = do_training_iteration(optimiser)
        avg_loss = ema_beta * avg_loss + (1 - ema_beta) * loss
        smoothed_loss = avg_loss / (1 - ema_beta ** (i + 1))
        if min_loss > 0 and smoothed_loss > 4 * min_loss:
            break
        min_loss = min(smoothed_loss, min_loss)
        losses.append(smoothed_loss)

        if i % 10 == 0:
            fig = go.Figure(
                data=[go.Scatter(x=lrs[:len(losses)].tolist(), y=losses, mode='lines')],
                layout=go.Layout(
                    margin=go.Margin(l=60, r=40, b=80, t=20, pad=4),
                    xaxis=go.XAxis(title='Learning rate', type='log', exponentformat='power'),
                    yaxis=go.YAxis(title='Training loss'),
                )
            )
            tel['loss_lr_fig'].set_value(fig)
            tel.step()

    set_progress(1)