def test_len(self): test_set = MPIIDataset('/datasets/mpii', 'test') test_set_len = 11731 self.assertEqual(test_set_len, len(test_set)) train_set = MPIIDataset('/datasets/mpii', 'train') train_set_len = 25925 self.assertEqual(train_set_len, len(train_set))
def main(): """Main benchmark entrypoint function.""" args = parse_args() seed_random_number_generators(args.seed) model_file = args.model preds_file = args.output subset = args.subset model_state = torch.load(model_file) model = build_mpii_pose_model(**model_state['model_desc']) model.load_state_dict(model_state['state_dict']) print(model_state['model_desc']) use_flipped = not args.disable_flip print('Use flip augmentations: {}'.format(use_flipped)) dataset = MPIIDataset('/datasets/mpii', subset, use_aug=False, image_specs=model.image_specs) inference_time_meter = MedianValueMeter() preds = generate_predictions(model, dataset, use_flipped=use_flipped, time_meter=inference_time_meter, batch_size=1) # Save predictions to file if preds_file: with h5py.File(preds_file, 'w') as f: f.create_dataset('preds', data=preds.float().numpy()) # PyCharm console output breaks unless we pause here briefly time.sleep(0.2) # Print inference time per image time_mean, time_std = inference_time_meter.value() print() print('Inference time: {:0.2f}±{:0.2f} ms'.format(time_mean * 1000, time_std * 1000)) # Calculate and print PCKh accuracy evaluator = PCKhEvaluator() evaluate_mpii_predictions(preds, subset, evaluator) print() print('# Accuracy (PCKh)') print('all: {:0.6f}'.format(evaluator.meters['all'].value()[0])) print('total_mpii: {:0.6f}'.format( evaluator.meters['total_mpii'].value()[0])) print('total_anewell: {:0.6f}'.format( evaluator.meters['total_anewell'].value()[0]))
def test_image_specs(self): image_specs = ImageSpecs(size=128, subtract_mean=True, divide_stddev=False) dataset = MPIIDataset( '/datasets/mpii', 'train', use_aug=False, image_specs=image_specs) sample = dataset[42] self.assertEqual((3, 128, 128), sample['input'].size()) self.assertEqual(-0.444027, sample['input'].min()) self.assertEqual(0.567317, sample['input'].max())
def main(): """Main evaluation entrypoint function.""" args = parse_args() seed_random_number_generators(args.seed) model_file = args.model preds_file = args.preds subset = args.subset visualize = args.visualize batch_size = 6 model = None if model_file: model_state = torch.load(model_file) model = build_mpii_pose_model(**model_state['model_desc']) model.load_state_dict(model_state['state_dict']) model = model.cuda() print(model_state['model_desc']) if preds_file: # Load predictions from file with h5py.File(preds_file, 'r') as f: preds = torch.from_numpy(f['preds'][:]).double() elif model: # Generate predictions with the model use_flipped = not args.disable_flip print('Use flip augmentations: {}'.format(use_flipped)) dataset = MPIIDataset('/datasets/mpii', subset, use_aug=False, image_specs=model.image_specs) preds = generate_predictions(model, dataset, use_flipped=use_flipped, batch_size=batch_size) else: # We need to get predictions from somewhere! raise Exception( 'at least one of "--preds" and "--model" must be present') # Calculate PCKh accuracies evaluator = PCKhEvaluator() evaluate_mpii_predictions(preds, subset, evaluator) # Print PCKh accuracies for meter_name in sorted(evaluator.meters.keys()): meter = evaluator.meters[meter_name] mean, _ = meter.value() print(meter_name, mean) # Visualise predictions if visualize: dsnt.gui.run_gui(preds, subset, model)
def main(): """Main training entrypoint function.""" args = parse_args() seed_random_number_generators(args.seed) epochs = args.epochs batch_size = args.batch_size use_train_aug = not args.no_aug out_dir = args.out_dir base_model = args.base_model dilate = args.dilate truncate = args.truncate initial_lr = args.lr schedule_milestones = args.schedule_milestones schedule_gamma = args.schedule_gamma experiment_id = datetime.datetime.now().strftime('%Y%m%d-%H%M%S%f') exp_out_dir = os.path.join(out_dir, experiment_id) if out_dir else None print('Experiment ID: {}'.format(experiment_id)) #### # Model #### model_desc = { 'base': base_model, 'dilate': dilate, 'truncate': truncate, 'output_strat': args.output_strat, 'preact': args.preact, 'reg': args.reg, 'reg_coeff': args.reg_coeff, 'hm_sigma': args.hm_sigma, } model = build_mpii_pose_model(**model_desc) model.cuda() input_size = model.image_specs.size #### # Data #### train_data = MPIIDataset('/datasets/mpii', 'train', use_aug=use_train_aug, image_specs=model.image_specs, max_length=args.train_samples) train_loader = DataLoader(train_data, batch_size, num_workers=4, pin_memory=True, shuffle=True) val_data = MPIIDataset('/datasets/mpii', 'val', use_aug=False, image_specs=model.image_specs) val_loader = DataLoader(val_data, batch_size, num_workers=4, pin_memory=True) #### # Metrics and visualisation #### train_eval = PCKhEvaluator() val_eval = PCKhEvaluator() def eval_metrics_for_batch(evaluator, batch, norm_out): """Evaluate and accumulate performance metrics for batch.""" norm_out = norm_out.type(torch.DoubleTensor) # Coords in original MPII dataset space orig_out = torch.bmm(norm_out, batch['transform_m']).add_( batch['transform_b'].expand_as(norm_out)) norm_target = batch['part_coords'].double() orig_target = torch.bmm(norm_target, batch['transform_m']).add_( batch['transform_b'].expand_as(norm_target)) head_lengths = batch['normalize'].double() evaluator.add(orig_out, orig_target, batch['part_mask'], head_lengths) reporting = Reporting(train_eval, val_eval) tel = reporting.telemetry reporting.setup_console_output() if exp_out_dir: reporting.setup_folder_output(exp_out_dir) with open(os.path.join(exp_out_dir, 'cli_args.json'), 'w') as f: json.dump(vars(args), f, sort_keys=True, indent=2) if args.showoff: import pyshowoff with open('/etc/hostname', 'r') as f: hostname = f.read().strip() client = pyshowoff.Client('http://' + args.showoff) notebook = client.add_notebook( '[{}] Human pose ({}-d{}-t{}, {}, {}@{:.1e}, reg={})'.format( hostname, base_model, dilate, truncate, args.output_strat, args.optim, args.lr, args.reg)).result() for tag_name in args.tags: notebook.add_tag(tag_name) reporting.setup_showoff_output(notebook) progress_frame = notebook.add_frame('Progress', bounds={ 'x': 0, 'y': 924, 'width': 1920, 'height': 64 }).result() else: progress_frame = None # Set constant values tel['experiment_id'].set_value(experiment_id) tel['args'].set_value(vars(args)) # Generate a Graphviz graph to visualise the model dummy_data = torch.cuda.FloatTensor(1, 3, input_size, input_size).uniform_(0, 1) out_var = model(Variable(dummy_data, requires_grad=False)) if isinstance(out_var, list): out_var = out_var[-1] tel['model_graph'].set_value( make_dot(out_var, dict(model.named_parameters()))) del dummy_data best_val_acc_meter = tele.meter.MaxValueMeter(skip_reset=True) #### # Optimiser #### # Initialize optimiser and learning rate scheduler if args.optim == '1cycle': optimizer = optim.SGD(model.parameters(), lr=0) scheduler = make_1cycle(optimizer, epochs * len(train_loader), lr_max=initial_lr, momentum=0.9) else: if args.optim == 'sgd': optimizer = optim.SGD(model.parameters(), lr=initial_lr, momentum=0.9) elif args.optim == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=initial_lr) else: raise Exception('unrecognised optimizer: {}'.format(args.optim)) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=schedule_milestones, gamma=schedule_gamma) # `vis` will hold a few samples for visualisation vis = {} #### # Training #### def train(epoch): """Do a full pass over the training set, updating model parameters.""" if hasattr(scheduler, 'step'): scheduler.step(epoch) model.train() samples_processed = 0 with progressbar.ProgressBar(max_value=len(train_data)) as bar: for i, batch in generator_timer(enumerate(train_loader), tel['train_data_load_time']): if hasattr(scheduler, 'batch_step'): scheduler.batch_step() with timer(tel['train_data_transfer_time']): in_var = Variable(batch['input'].cuda(), requires_grad=False) target_var = Variable(batch['part_coords'].cuda(), requires_grad=False) mask_var = Variable(batch['part_mask'].type( torch.cuda.FloatTensor), requires_grad=False) with timer(tel['train_forward_time']): out_var = model(in_var) with timer(tel['train_criterion_time']): loss = model.forward_loss(out_var, target_var, mask_var) if np.isnan(loss.data[0]): state = { 'state_dict': model.state_dict(), 'model_desc': model_desc, 'optimizer': optimizer.state_dict(), 'epoch': epoch, 'input': in_var.data, 'target': target_var.data, 'mask': mask_var.data, } torch.save(state, 'model_dump.pth') raise Exception('training loss should not be nan') tel['train_loss'].add(loss.data[0]) with timer(tel['train_eval_time']): coords = model.compute_coords(out_var) eval_metrics_for_batch(train_eval, batch, coords) with timer(tel['train_backward_time']): optimizer.zero_grad() loss.backward() with timer(tel['train_optim_time']): optimizer.step() samples_processed += batch['input'].size(0) bar.update(samples_processed) if i == 0: vis['train_images'] = batch['input'] vis['train_preds'] = coords vis['train_masks'] = batch['part_mask'] vis['train_coords'] = batch['part_coords'] vis['train_heatmaps'] = model.heatmaps.data.cpu() if progress_frame is not None: so_far = epoch * len(train_data) + samples_processed total = epochs * len(train_data) notebook.set_progress(so_far / total) progress_frame.progress(so_far, total) def validate(epoch): '''Do a full pass over the validation set, evaluating model performance.''' model.eval() val_preds = torch.DoubleTensor(len(val_data), 16, 2) samples_processed = 0 with progressbar.ProgressBar(max_value=len(val_data)) as bar: for i, batch in enumerate(val_loader): in_var = Variable(batch['input'].cuda(), volatile=True) target_var = Variable(batch['part_coords'].cuda(), volatile=True) mask_var = Variable(batch['part_mask'].type( torch.cuda.FloatTensor), volatile=True) out_var = model(in_var) loss = model.forward_loss(out_var, target_var, mask_var) tel['val_loss'].add(loss.data[0]) coords = model.compute_coords(out_var) eval_metrics_for_batch(val_eval, batch, coords) preds = coords.double() pos = i * batch_size orig_preds = val_preds[pos:(pos + preds.size(0))] torch.baddbmm(batch['transform_b'], preds, batch['transform_m'], out=orig_preds) samples_processed += batch['input'].size(0) bar.update(samples_processed) if i == 0: vis['val_images'] = batch['input'] vis['val_preds'] = coords vis['val_masks'] = batch['part_mask'] vis['val_coords'] = batch['part_coords'] vis['val_heatmaps'] = model.heatmaps.data.cpu() tel['val_preds'].set_value(val_preds.numpy()) print('Entering the main training loop') for epoch in range(epochs): print('> Epoch {:3d}/{:3d}'.format(epoch + 1, epochs)) tel['epoch'].set_value(epoch) tel['epoch_time'].reset() print('Training pass...') train(epoch) print('Validation pass...') validate(epoch) train_sample = [] for i in range(min(16, vis['train_images'].size(0))): img = model.image_specs.unconvert(vis['train_images'][i], train_data) coords = (vis['train_preds'][i] + 1) * (input_size / 2) draw_skeleton(img, coords, vis['train_masks'][i]) train_sample.append(img) tel['train_sample'].set_value(train_sample) val_sample = [] for i in range(min(16, vis['val_images'].size(0))): img = model.image_specs.unconvert(vis['val_images'][i], val_data) coords = (vis['val_preds'][i] + 1) * (input_size / 2) draw_skeleton(img, coords, vis['val_masks'][i]) val_sample.append(img) tel['val_sample'].set_value(val_sample) def visualise_heatmaps(key): heatmap_images = [] for i in range(min(16, vis[key].size(0))): lwrist_hm = vis[key][i, PCKhEvaluator.JOINT_NAMES.index('lwrist')] rwrist_hm = vis[key][i, PCKhEvaluator.JOINT_NAMES.index('rwrist')] lwrist_hm = (lwrist_hm / lwrist_hm.max()).clamp_(0, 1) rwrist_hm = (rwrist_hm / rwrist_hm.max()).clamp_(0, 1) img = ToPILImage()(torch.stack( [rwrist_hm, lwrist_hm.clone().zero_(), lwrist_hm], 0)) heatmap_images.append(img) tel[key].set_value(heatmap_images) visualise_heatmaps('train_heatmaps') visualise_heatmaps('val_heatmaps') val_acc = val_eval.meters['total_mpii'].value()[0] is_best = best_val_acc_meter.add(val_acc) if exp_out_dir: state = { 'state_dict': model.state_dict(), 'model_desc': model_desc, 'optimizer': optimizer.state_dict(), 'epoch': epoch + 1, 'val_acc': val_acc, } torch.save(state, os.path.join(exp_out_dir, 'model.pth')) if is_best: torch.save(state, os.path.join(exp_out_dir, 'model-best.pth')) tel['best_val_preds'].set_value(tel['val_preds'].value()) tel.step() train_eval.reset() val_eval.reset() print()
def main(): args = parse_args() seed_random_number_generators(args.seed) model_desc = { 'base': args.base_model, 'dilate': args.dilate, 'truncate': args.truncate, 'output_strat': args.output_strat, 'preact': args.preact, 'reg': args.reg, 'reg_coeff': args.reg_coeff, 'hm_sigma': args.hm_sigma, } model = build_mpii_pose_model(**model_desc) model.cuda() train_data = MPIIDataset('/datasets/mpii', 'train', use_aug=True, image_specs=model.image_specs) sampler = make_data_sampler(args.max_iters * args.batch_size, len(train_data)) train_loader = DataLoader(train_data, args.batch_size, num_workers=4, drop_last=True, sampler=sampler) data_iter = iter(train_loader) print(json.dumps(model_desc, sort_keys=True, indent=2)) def do_training_iteration(optimiser): batch = next(data_iter) in_var = Variable(batch['input'].cuda(), requires_grad=False) target_var = Variable(batch['part_coords'].cuda(), requires_grad=False) mask_var = Variable(batch['part_mask'].type(torch.cuda.FloatTensor), requires_grad=False) # Calculate predictions and loss out_var = model(in_var) loss = model.forward_loss(out_var, target_var, mask_var) # Calculate gradients optimiser.zero_grad() loss.backward() # Update parameters optimiser.step() return loss.data[0] optimiser = SGD(model.parameters(), lr=1, weight_decay=args.weight_decay, momentum=args.momentum) tel = tele.Telemetry({ 'cli_args': ValueMeter(skip_reset=True), 'loss_lr': ValueMeter(), }) tel['cli_args'].set_value(vars(args)) if args.showoff: client = pyshowoff.Client('http://' + args.showoff) notebook = client.add_notebook( 'Hyperparameter search ({}-d{}-t{}, {}, reg={})'.format( args.base_model, args.dilate, args.truncate, args.output_strat, args.reg)).result() tel.sink(tele.showoff.Conf(notebook), [ Inspect(['cli_args'], 'CLI arguments', flatten=True), XYGraph(['loss_lr'], 'Loss vs learning rate graph'), ]) lrs = np.geomspace(args.lr_min, args.lr_max, args.max_iters) avg_loss = 0 min_loss = np.inf for i, lr in enumerate(tqdm(lrs, ascii=True)): for param_group in optimiser.param_groups: param_group['lr'] = lr loss = do_training_iteration(optimiser) avg_loss = args.ema_beta * avg_loss + (1 - args.ema_beta) * loss smoothed_loss = avg_loss / (1 - args.ema_beta**(i + 1)) if min_loss > 0 and smoothed_loss > 4 * min_loss: break min_loss = min(smoothed_loss, min_loss) tel['loss_lr'].set_value((lr, smoothed_loss)) tel.step()
def main(): """Main benchmark entrypoint function.""" args = parse_args() in_dir = Path(args.search_dir) subset = args.subset seed_random_number_generators(12345) exp_dirs = [ candidate.parent for candidate in in_dir.rglob('model.pth') if candidate.is_file() ] for exp_dir in sorted(exp_dirs): model_file = exp_dir / 'model.pth' preds_file = exp_dir / 'infer-{}.h5'.format(subset) metrics_file = exp_dir / 'infer-{}-metrics.json'.format(subset) if not model_file.is_file(): print('cannot find model.pth') continue if preds_file.is_file(): print('predictions found, skipping') continue model_state = torch.load(str(model_file)) model_desc = model_state['model_desc'] model = build_mpii_pose_model(**model_desc) model.load_state_dict(model_state['state_dict']) print(model_desc) dataset = MPIIDataset('/datasets/mpii', subset, use_aug=False, image_specs=model.image_specs) inference_time_meter = MedianValueMeter() preds = generate_predictions(model, dataset, use_flipped=False, time_meter=inference_time_meter, batch_size=1) # Save predictions to file with h5py.File(str(preds_file), 'w') as f: f.create_dataset('preds', data=preds.float().numpy()) time_median, time_err = inference_time_meter.value() print('Inference time: {:0.2f}±{:0.2f} ms'.format(time_median * 1000, time_err * 1000)) evaluator = PCKhEvaluator() evaluate_mpii_predictions(preds, subset, evaluator) metrics = { 'inference_time_ms': { 'median': time_median * 1000, 'error': time_err * 1000, # Median absolute deviation }, 'accuracy_pckh': { 'all': evaluator.meters['all'].value()[0], 'total_mpii': evaluator.meters['total_mpii'].value()[0], 'total_anewell': evaluator.meters['total_anewell'].value()[0], }, } with metrics_file.open('w') as f: json.dump(metrics, f, sort_keys=True, indent=2, separators=(',', ': '))
def test_getitem(self): dataset = MPIIDataset('/datasets/mpii', 'train', use_aug=False) sample = dataset[543] self.assertIn('input', sample) self.assertIn('part_coords', sample)