def loadDataset(): # frame rate is 30fps, but we skip frames so that we go down to 15fps fr_eff = 15 # I'm going to keep a few seconds. Some sequences are shorter than others, # so there's a definite tradeoff here: seq_length = 2 * fr_eff # drop anything shorter than a few hundred ms discard_shorter = int(round(0.8 * fr_eff)) # jump forward this far between chosen sequences gap = 23 data = P2DDataset( data_file_path='./ntu_data.h5', seq_length=seq_length, gap=gap) dataset = {} dataset['dim_observations'] = data.dim_obs dataset['data_type'] = 'real' dataset['p2d'] = data dataset['train'], dataset['mask_train'] \ = data.get_ds_for_train(train=True, seq_length=seq_length, discard_shorter=discard_shorter, gap=gap) dataset['val'], dataset['mask_val'] \ = dataset['test'], dataset['mask_test'] \ = data.get_ds_for_train(train=False, seq_length=seq_length, discard_shorter=discard_shorter, gap=gap) print('Shapes of various things:') to_check_shape = ['train', 'val', 'test'] for to_shape in to_check_shape: print('%s: %s' % (to_shape, dataset[to_shape].shape)) return dataset
def load_data(data_file, seq_length, seq_skip): db = P2DDataset(data_file, seq_length, seq_skip, gap=13, remove_head=True) train_X, _, train_A = db.get_pose_ds(train=True) train_A = np.argmax(train_A, axis=-1) val_X, _, val_A = db.get_pose_ds(train=False) val_A = np.argmax(val_A, axis=-1) return train_X, train_A, val_X, val_A, db
def loadDataset(): seq_length = 32 # skip of 1 because it's already downsapmled aggressively seq_skip = 1 gap = 3 data = P2DDataset( './mpii_ca2.h5', seq_length, seq_skip, gap=gap, val_frac=0.2, have_actions=True, completion_length=256, aclass_full_length=96, aclass_act_length=8, head_vel=False) # TODO: factor this out into common code (it's shared with IkeaDB and will # probably end up shared with Penn) dataset = {} dataset['dim_observations'] = data.dim_obs dataset['data_type'] = 'real' dataset['p2d'] = data dataset['train'], dataset['mask_train'], dataset['train_cond_vals'] \ = data.get_pose_ds(train=True) dataset['val'], dataset['mask_val'], dataset['val_cond_vals'] \ = dataset['test'], dataset['mask_test'], dataset['test_cond_vals'] \ = data.get_pose_ds(train=False) # for action prediction dataset['train_aclass_ds'] = data.get_aclass_ds(train=True) dataset['val_aclass_ds'] = data.get_aclass_ds(train=False) # for sequence completion dataset['train_completions'] = data.get_completion_ds(train=True) dataset['val_completions'] = data.get_completion_ds(train=False) print('Shapes of various things:') to_check_shape = [ 'train', 'val', 'test', 'train_cond_vals', 'val_cond_vals', 'test_cond_vals' ] for to_shape in to_check_shape: print('%s: %s' % (to_shape, dataset[to_shape].shape)) to_check_len = [ 'train_aclass_ds', 'val_aclass_ds', 'train_completions', 'val_completions' ] for name in to_check_len: print('%s: %d (list)' % (name, len(dataset[name]))) return dataset
def loadDataset(): seq_length = 64 seq_skip = 3 gap = 4 data = P2DDataset( './ikea_action_data.h5', seq_length, completion_length=256, have_actions=True, # remove_head=True, head_vel=True) dataset = {} dataset['dim_observations'] = data.dim_obs dataset['data_type'] = 'real' dataset['p2d'] = data dataset['train'], dataset['mask_train'], dataset['train_cond_vals'] \ = data.get_pose_ds(train=True) dataset['val'], dataset['mask_val'], dataset['val_cond_vals'] \ = dataset['test'], dataset['mask_test'], dataset['test_cond_vals'] \ = data.get_pose_ds(train=False) # for action prediction dataset['train_aclass_ds'] = data.get_aclass_ds(train=True) dataset['val_aclass_ds'] = data.get_aclass_ds(train=False) # for sequence completion dataset['train_completions'] = data.get_completion_ds(train=True) dataset['val_completions'] = data.get_completion_ds(train=False) print('Shapes of various things:') to_check_shape = [ 'train', 'val', 'test', 'train_cond_vals', 'val_cond_vals', 'test_cond_vals' ] for to_shape in to_check_shape: print('%s: %s' % (to_shape, dataset[to_shape].shape)) to_check_len = [ 'train_aclass_ds', 'val_aclass_ds', 'train_completions', 'val_completions' ] for name in to_check_len: print('%s: %d (list)' % (name, len(dataset[name]))) return dataset
action='store_true', dest='is_3d', default=False, help='treat this as a 3D dataset') if __name__ == '__main__': args = parser.parse_args() cache_dir = args.output_dir mkdir_p(cache_dir) if args.is_3d: dataset = P3DDataset(args.dataset_path) else: if args.polar: # XXX: should move "polar" parameter into the .h5 file once I # decide whether polar is a good idea. kwargs = dict(relative=True, polar=True) print('using polar representation') else: print('using relative displacement representation (not polar)') kwargs = {} dataset = P2DDataset(args.dataset_path, 32, **kwargs) # will be passed to other fns to train identifier = args.model_type module = NETWORK_MODULES[identifier] baseline = generic_caching_baseline(module, identifier, cache_dir, dataset) # write_baseline(cache_dir, dataset, steps_to_predict, method): write_baseline(cache_dir, dataset, dataset.eval_test_length, baseline)
fp['/extra_data'] = json.dumps(extra_data) if __name__ == '__main__': args = parser.parse_args() extra_data = {} if args.is_3d: dataset = P3DDataset(args.dataset_path) cond_on, pred_on = dataset.get_ds_for_eval(train=False) cond_on_orig = f32(dataset.reconstruct_skeletons(cond_on)) pred_on_orig = f32(dataset.reconstruct_skeletons(pred_on)) pred_val = pred_scales = None cond_actions = pred_actions = action_names = None else: dataset = P2DDataset(args.dataset_path, 32) evds = dataset.get_ds_for_eval(train=False) cond_on = evds['conditioning'] pred_on = evds['prediction'] pred_scales = evds['prediction_scales'] if dataset.has_sparse_annos: pred_val = evds['prediction_valids'] else: pred_val = None extra_data['pck_joints'] = dataset.pck_joints seq_ids = evds['seq_ids'] pred_frame_numbers = evds['prediction_frame_nums'] cond_frame_numbers = evds['conditioning_frame_nums'] cond_on_orig = f32( dataset.reconstruct_poses(cond_on, seq_ids, cond_frame_numbers)) pred_on_orig = f32(
def eval(args): print('Looking for model') checkpoint_dir = os.path.join(args.work_dir, 'chkpt-aclass') orig_model = get_best_model(checkpoint_dir) # input_shape is batch*time*channels _, _, num_chans = orig_model.input_shape _, _, num_acts = orig_model.output_shape print("Fetching results from '%s'" % args.results_file) with h5py.File(args.results_file, 'r') as fp: pred_poses = fp['/poses_2d_pred'].value true_poses = fp['/poses_2d_true'].value # need to reprocess the actions :/ raw_actions = fp['/pred_actions_2d'].value assert pred_poses.ndim == 5, "expected 5D, got %s" % (pred_poses.shape) # we need to flatten out separate samples for pred_poses, then double # up true_poses, raw_actions, etc. as appropriate sample_dim = pred_poses.shape[1] new_pp_shape = (pred_poses.shape[0] * pred_poses.shape[1], ) + pred_poses.shape[2:] pred_poses = pred_poses.reshape(new_pp_shape) # wooohoo broadcast hacks dupe_marker \ = (np.arange(len(raw_actions)).reshape((-1, 1)) + np.zeros((1, sample_dim), dtype='int')).flatten() assert dupe_marker.size == len(pred_poses), \ "dupe_marker.shape %s, pred_poses.shape %s" \ % (dupe_marker.shape, pred_poses.shape) raw_actions = raw_actions[dupe_marker] true_poses = true_poses[dupe_marker] new_seq_len = pred_poses.shape[1] assert pred_poses.shape == true_poses.shape, \ "pred_poses.shape %s, true_poses.shape %s" \ % (pred_poses.shape, true_poses.shape) print('Creating new model') new_model = make_model(new_seq_len, num_chans, num_acts) print('Initialising weights') copy_weights(orig_model, new_model) print('Loading dataset') dataset = P2DDataset(args.dataset_path, 32) merged_actions, merged_act_names = merge_actions(raw_actions, dataset.action_names, args.dataset_name) valid_acts = (0 <= merged_actions) \ & (merged_actions < len(merged_act_names)) print('%d/%d actions invalid (will be ignored)' % ((~valid_acts).flatten().sum(), valid_acts.size)) def make_predictions(poses): postproc = postprocess_poses(poses) one_hot = new_model.predict(postproc) labels = one_hot_argmax(one_hot) # now do MNLL, accuracy flat_valid_flags = valid_acts.flatten() flat_labels = labels.flatten()[flat_valid_flags] flat_true_labels = merged_actions.flatten()[flat_valid_flags] flat_dists = one_hot.reshape((-1, one_hot.shape[-1]))[flat_valid_flags] accuracy = accuracy_score(flat_true_labels, flat_labels) f1 = f1_score(flat_true_labels, flat_labels, average='weighted') nll = -np.log(flat_dists[np.arange(len(flat_dists)), flat_labels]) report = '\n'.join([ 'Accuracy: %f' % accuracy, 'F1: %f' % f1, 'MNLL: %f' % nll.mean() ]) return report # TODO: maybe a better metric would show how much *worse* the predicted # actions are? Could measure "extra bits required to encode" or something # like that. print('How good do the predictions look?') pred_report = make_predictions(pred_poses) print(pred_report) print('How good do the originals look?') orig_report = make_predictions(true_poses) print(orig_report) dest_path = os.path.join( args.work_dir, os.path.basename(args.results_file) + '-results.txt') print("Writing results to '%s'" % dest_path) with open(dest_path, 'w') as fp: print("# Results for %s" % args.results_file, file=fp) print('\n## Predictions', file=fp) print(pred_report, file=fp) print('\n## Originals', file=fp) print(orig_report, file=fp)
def train(args): dataset = P2DDataset(args.dataset_path, 32) train_length = dataset.eval_condition_length + dataset.eval_test_length if args.dataset_name == 'ntu': train_gap = max(dataset.eval_seq_gap, train_length) else: train_gap = max(dataset.eval_seq_gap, train_length // 2) train_ds = dataset.get_ds_for_train_extra(train=True, seq_length=train_length, gap=train_gap, discard_shorter=False) # now we've got to reconstruct (painful thanks to masks) orig_poses = train_ds['poses'] dest_pose_blocks = [] for seq_idx in range(len(train_ds['poses'])): orig_pose_block = orig_poses[seq_idx] # we will truncate the current sequence so that the crappy stuff is # masked mask = train_ds['masks'][seq_idx] amask = mask.reshape((len(mask), -1)).all(axis=-1) for unmask_end in range(len(mask)): if not amask[unmask_end]: break else: unmask_end = len(mask) if not unmask_end: # sometimes all steps are masked out for god knows what reason # ergo, just throw some empty crap into the queue padded = np.full((len(orig_pose_block), 2 * len(dataset.parents)), MASK_VALUE) else: frame_numbers = train_ds['frame_numbers'][seq_idx, :unmask_end] vid_name = train_ds['vid_names'][seq_idx] reconst_block = dataset.reconstruct_poses( orig_pose_block[None, :unmask_end], vid_names=[vid_name], frame_inds=frame_numbers[None]) # now postprocess and add padding postprocessed, = postprocess_poses(reconst_block) assert postprocessed.ndim == 2, \ "should be 2D, got %s" % (postprocessed.shape,) pad_spec = [(0, orig_pose_block.shape[0] - postprocessed.shape[0]), (0, 0)] padded = np.pad(postprocessed, pad_spec, 'constant', constant_values=MASK_VALUE) dest_pose_blocks.append(padded) train_poses = np.stack(dest_pose_blocks, axis=0) # oh, and we can deal with actions merged_actions = train_ds['actions'] merged_actions, merged_act_names = merge_actions(train_ds['actions'], dataset.action_names, args.dataset_name) oh_merged_actions = one_hot_cat(merged_actions, len(merged_act_names)) assert oh_merged_actions.shape[:2] == train_poses.shape[:2] # _, train_aclass_ds \ # = merge_actions(dataset['train_aclass_ds'], merge_map, old_act_names) # aclass_target_names, val_aclass_ds \ # = merge_actions(dataset['val_aclass_ds'], merge_map, old_act_names) # train_aclass_ds_bal = balance_aclass_ds( # train_aclass_ds, aclass_target_names, target_func=balance_func) # # it's okay if the validation set isn't fully balanced in this case, # # since some actions don't appear in it at all. # val_aclass_ds_bal = balance_aclass_ds( # val_aclass_ds, aclass_target_names, target_func=balance_func) n_actions = len(merged_act_names) print('Number of actions: %d' % n_actions) print('Actions: ' + ', '.join(merged_act_names)) # train_X, train_Y = to_XY(train_aclass_ds_bal, n_actions) # val_X, val_Y = to_XY(val_aclass_ds_bal, n_actions) # print('Action balance (train): ', train_Y.sum(axis=0)) # print('Action balance (val): ', val_Y.sum(axis=0)) checkpoint_dir = os.path.join(args.work_dir, 'chkpt-aclass') try: os.makedirs(checkpoint_dir) except FileExistsError: pass # meta_path = os.path.join(checkpoint_dir, 'meta.json') # with open(meta_path, 'w') as fp: # to_dump = { # 'actions': list(merged_act_names), # } # json.dump(to_dump, fp) # gotta shuffle so that validation split is random data_perm = np.random.permutation(len(train_poses)) train_poses = train_poses[data_perm] oh_merged_actions = oh_merged_actions[data_perm] seq_len, num_channels = train_poses.shape[1:] model = make_model(seq_len, num_channels, n_actions) model.fit(train_poses, oh_merged_actions, batch_size=256, nb_epoch=1000, validation_split=0.1, callbacks=[ EarlyStopping(monitor='val_acc', patience=50), ModelCheckpoint(os.path.join( checkpoint_dir, 'action-classifier-{epoch:02d}-{val_loss:.2f}.hdf5'), save_best_only=True) ], shuffle=True)