Ejemplo n.º 1
0
def loadDataset():
    # frame rate is 30fps, but we skip frames so that we go down to 15fps
    fr_eff = 15
    # I'm going to keep a few seconds. Some sequences are shorter than others,
    # so there's a definite tradeoff here:
    seq_length = 2 * fr_eff
    # drop anything shorter than a few hundred ms
    discard_shorter = int(round(0.8 * fr_eff))
    # jump forward this far between chosen sequences
    gap = 23
    data = P2DDataset(
        data_file_path='./ntu_data.h5',
        seq_length=seq_length,
        gap=gap)

    dataset = {}

    dataset['dim_observations'] = data.dim_obs
    dataset['data_type'] = 'real'
    dataset['p2d'] = data

    dataset['train'], dataset['mask_train'] \
        = data.get_ds_for_train(train=True, seq_length=seq_length,
                                discard_shorter=discard_shorter, gap=gap)
    dataset['val'], dataset['mask_val'] \
        = dataset['test'], dataset['mask_test'] \
        = data.get_ds_for_train(train=False, seq_length=seq_length,
                                discard_shorter=discard_shorter, gap=gap)

    print('Shapes of various things:')
    to_check_shape = ['train', 'val', 'test']
    for to_shape in to_check_shape:
        print('%s: %s' % (to_shape, dataset[to_shape].shape))

    return dataset
Ejemplo n.º 2
0
def load_data(data_file, seq_length, seq_skip):
    db = P2DDataset(data_file, seq_length, seq_skip, gap=13, remove_head=True)
    train_X, _, train_A = db.get_pose_ds(train=True)
    train_A = np.argmax(train_A, axis=-1)
    val_X, _, val_A = db.get_pose_ds(train=False)
    val_A = np.argmax(val_A, axis=-1)
    return train_X, train_A, val_X, val_A, db
Ejemplo n.º 3
0
def loadDataset():
    seq_length = 32
    # skip of 1 because it's already downsapmled aggressively
    seq_skip = 1
    gap = 3
    data = P2DDataset(
        './mpii_ca2.h5',
        seq_length,
        seq_skip,
        gap=gap,
        val_frac=0.2,
        have_actions=True,
        completion_length=256,
        aclass_full_length=96,
        aclass_act_length=8,
        head_vel=False)

    # TODO: factor this out into common code (it's shared with IkeaDB and will
    # probably end up shared with Penn)
    dataset = {}
    dataset['dim_observations'] = data.dim_obs
    dataset['data_type'] = 'real'
    dataset['p2d'] = data

    dataset['train'], dataset['mask_train'], dataset['train_cond_vals'] \
        = data.get_pose_ds(train=True)
    dataset['val'], dataset['mask_val'], dataset['val_cond_vals'] \
        = dataset['test'], dataset['mask_test'], dataset['test_cond_vals'] \
        = data.get_pose_ds(train=False)
    # for action prediction
    dataset['train_aclass_ds'] = data.get_aclass_ds(train=True)
    dataset['val_aclass_ds'] = data.get_aclass_ds(train=False)
    # for sequence completion
    dataset['train_completions'] = data.get_completion_ds(train=True)
    dataset['val_completions'] = data.get_completion_ds(train=False)

    print('Shapes of various things:')
    to_check_shape = [
        'train', 'val', 'test', 'train_cond_vals', 'val_cond_vals',
        'test_cond_vals'
    ]
    for to_shape in to_check_shape:
        print('%s: %s' % (to_shape, dataset[to_shape].shape))
    to_check_len = [
        'train_aclass_ds', 'val_aclass_ds', 'train_completions',
        'val_completions'
    ]
    for name in to_check_len:
        print('%s: %d (list)' % (name, len(dataset[name])))

    return dataset
Ejemplo n.º 4
0
def loadDataset():
    seq_length = 64
    seq_skip = 3
    gap = 4
    data = P2DDataset(
        './ikea_action_data.h5',
        seq_length,
        completion_length=256,
        have_actions=True,
        # remove_head=True,
        head_vel=True)

    dataset = {}

    dataset['dim_observations'] = data.dim_obs
    dataset['data_type'] = 'real'
    dataset['p2d'] = data

    dataset['train'], dataset['mask_train'], dataset['train_cond_vals'] \
        = data.get_pose_ds(train=True)
    dataset['val'], dataset['mask_val'], dataset['val_cond_vals'] \
        = dataset['test'], dataset['mask_test'], dataset['test_cond_vals'] \
        = data.get_pose_ds(train=False)
    # for action prediction
    dataset['train_aclass_ds'] = data.get_aclass_ds(train=True)
    dataset['val_aclass_ds'] = data.get_aclass_ds(train=False)
    # for sequence completion
    dataset['train_completions'] = data.get_completion_ds(train=True)
    dataset['val_completions'] = data.get_completion_ds(train=False)

    print('Shapes of various things:')
    to_check_shape = [
        'train', 'val', 'test', 'train_cond_vals', 'val_cond_vals',
        'test_cond_vals'
    ]
    for to_shape in to_check_shape:
        print('%s: %s' % (to_shape, dataset[to_shape].shape))
    to_check_len = [
        'train_aclass_ds', 'val_aclass_ds', 'train_completions',
        'val_completions'
    ]
    for name in to_check_len:
        print('%s: %d (list)' % (name, len(dataset[name])))

    return dataset
Ejemplo n.º 5
0
                    action='store_true',
                    dest='is_3d',
                    default=False,
                    help='treat this as a 3D dataset')

if __name__ == '__main__':
    args = parser.parse_args()
    cache_dir = args.output_dir
    mkdir_p(cache_dir)

    if args.is_3d:
        dataset = P3DDataset(args.dataset_path)
    else:
        if args.polar:
            # XXX: should move "polar" parameter into the .h5 file once I
            # decide whether polar is a good idea.
            kwargs = dict(relative=True, polar=True)
            print('using polar representation')
        else:
            print('using relative displacement representation (not polar)')
            kwargs = {}
        dataset = P2DDataset(args.dataset_path, 32, **kwargs)

    # will be passed to other fns to train
    identifier = args.model_type
    module = NETWORK_MODULES[identifier]
    baseline = generic_caching_baseline(module, identifier, cache_dir, dataset)

    #  write_baseline(cache_dir, dataset, steps_to_predict, method):
    write_baseline(cache_dir, dataset, dataset.eval_test_length, baseline)
Ejemplo n.º 6
0
        fp['/extra_data'] = json.dumps(extra_data)


if __name__ == '__main__':
    args = parser.parse_args()
    extra_data = {}

    if args.is_3d:
        dataset = P3DDataset(args.dataset_path)
        cond_on, pred_on = dataset.get_ds_for_eval(train=False)
        cond_on_orig = f32(dataset.reconstruct_skeletons(cond_on))
        pred_on_orig = f32(dataset.reconstruct_skeletons(pred_on))
        pred_val = pred_scales = None
        cond_actions = pred_actions = action_names = None
    else:
        dataset = P2DDataset(args.dataset_path, 32)
        evds = dataset.get_ds_for_eval(train=False)
        cond_on = evds['conditioning']
        pred_on = evds['prediction']
        pred_scales = evds['prediction_scales']
        if dataset.has_sparse_annos:
            pred_val = evds['prediction_valids']
        else:
            pred_val = None
        extra_data['pck_joints'] = dataset.pck_joints
        seq_ids = evds['seq_ids']
        pred_frame_numbers = evds['prediction_frame_nums']
        cond_frame_numbers = evds['conditioning_frame_nums']
        cond_on_orig = f32(
            dataset.reconstruct_poses(cond_on, seq_ids, cond_frame_numbers))
        pred_on_orig = f32(
Ejemplo n.º 7
0
def eval(args):
    print('Looking for model')
    checkpoint_dir = os.path.join(args.work_dir, 'chkpt-aclass')
    orig_model = get_best_model(checkpoint_dir)
    # input_shape is batch*time*channels
    _, _, num_chans = orig_model.input_shape
    _, _, num_acts = orig_model.output_shape

    print("Fetching results from '%s'" % args.results_file)
    with h5py.File(args.results_file, 'r') as fp:
        pred_poses = fp['/poses_2d_pred'].value
        true_poses = fp['/poses_2d_true'].value
        # need to reprocess the actions :/
        raw_actions = fp['/pred_actions_2d'].value

    assert pred_poses.ndim == 5, "expected 5D, got %s" % (pred_poses.shape)
    # we need to flatten out separate samples for pred_poses, then double
    # up true_poses, raw_actions, etc. as appropriate
    sample_dim = pred_poses.shape[1]
    new_pp_shape = (pred_poses.shape[0] *
                    pred_poses.shape[1], ) + pred_poses.shape[2:]
    pred_poses = pred_poses.reshape(new_pp_shape)
    # wooohoo broadcast hacks
    dupe_marker \
        = (np.arange(len(raw_actions)).reshape((-1, 1))
            + np.zeros((1, sample_dim), dtype='int')).flatten()
    assert dupe_marker.size == len(pred_poses), \
        "dupe_marker.shape %s, pred_poses.shape %s" \
        % (dupe_marker.shape, pred_poses.shape)
    raw_actions = raw_actions[dupe_marker]
    true_poses = true_poses[dupe_marker]
    new_seq_len = pred_poses.shape[1]
    assert pred_poses.shape == true_poses.shape, \
        "pred_poses.shape %s, true_poses.shape %s" \
        % (pred_poses.shape, true_poses.shape)

    print('Creating new model')
    new_model = make_model(new_seq_len, num_chans, num_acts)
    print('Initialising weights')
    copy_weights(orig_model, new_model)

    print('Loading dataset')
    dataset = P2DDataset(args.dataset_path, 32)
    merged_actions, merged_act_names = merge_actions(raw_actions,
                                                     dataset.action_names,
                                                     args.dataset_name)
    valid_acts = (0 <= merged_actions) \
        & (merged_actions < len(merged_act_names))
    print('%d/%d actions invalid (will be ignored)' %
          ((~valid_acts).flatten().sum(), valid_acts.size))

    def make_predictions(poses):
        postproc = postprocess_poses(poses)
        one_hot = new_model.predict(postproc)
        labels = one_hot_argmax(one_hot)

        # now do MNLL, accuracy
        flat_valid_flags = valid_acts.flatten()
        flat_labels = labels.flatten()[flat_valid_flags]
        flat_true_labels = merged_actions.flatten()[flat_valid_flags]
        flat_dists = one_hot.reshape((-1, one_hot.shape[-1]))[flat_valid_flags]

        accuracy = accuracy_score(flat_true_labels, flat_labels)
        f1 = f1_score(flat_true_labels, flat_labels, average='weighted')
        nll = -np.log(flat_dists[np.arange(len(flat_dists)), flat_labels])
        report = '\n'.join([
            'Accuracy: %f' % accuracy,
            'F1: %f' % f1,
            'MNLL: %f' % nll.mean()
        ])

        return report

    # TODO: maybe a better metric would show how much *worse* the predicted
    # actions are? Could measure "extra bits required to encode" or something
    # like that.

    print('How good do the predictions look?')
    pred_report = make_predictions(pred_poses)
    print(pred_report)

    print('How good do the originals look?')
    orig_report = make_predictions(true_poses)
    print(orig_report)

    dest_path = os.path.join(
        args.work_dir,
        os.path.basename(args.results_file) + '-results.txt')
    print("Writing results to '%s'" % dest_path)
    with open(dest_path, 'w') as fp:
        print("# Results for %s" % args.results_file, file=fp)
        print('\n## Predictions', file=fp)
        print(pred_report, file=fp)
        print('\n## Originals', file=fp)
        print(orig_report, file=fp)
Ejemplo n.º 8
0
def train(args):
    dataset = P2DDataset(args.dataset_path, 32)
    train_length = dataset.eval_condition_length + dataset.eval_test_length
    if args.dataset_name == 'ntu':
        train_gap = max(dataset.eval_seq_gap, train_length)
    else:
        train_gap = max(dataset.eval_seq_gap, train_length // 2)
    train_ds = dataset.get_ds_for_train_extra(train=True,
                                              seq_length=train_length,
                                              gap=train_gap,
                                              discard_shorter=False)

    # now we've got to reconstruct (painful thanks to masks)
    orig_poses = train_ds['poses']
    dest_pose_blocks = []
    for seq_idx in range(len(train_ds['poses'])):
        orig_pose_block = orig_poses[seq_idx]

        # we will truncate the current sequence so that the crappy stuff is
        # masked
        mask = train_ds['masks'][seq_idx]
        amask = mask.reshape((len(mask), -1)).all(axis=-1)
        for unmask_end in range(len(mask)):
            if not amask[unmask_end]:
                break
        else:
            unmask_end = len(mask)
        if not unmask_end:
            # sometimes all steps are masked out for god knows what reason
            # ergo, just throw some empty crap into the queue
            padded = np.full((len(orig_pose_block), 2 * len(dataset.parents)),
                             MASK_VALUE)
        else:
            frame_numbers = train_ds['frame_numbers'][seq_idx, :unmask_end]
            vid_name = train_ds['vid_names'][seq_idx]
            reconst_block = dataset.reconstruct_poses(
                orig_pose_block[None, :unmask_end],
                vid_names=[vid_name],
                frame_inds=frame_numbers[None])
            # now postprocess and add padding
            postprocessed, = postprocess_poses(reconst_block)
            assert postprocessed.ndim == 2, \
                "should be 2D, got %s" % (postprocessed.shape,)
            pad_spec = [(0, orig_pose_block.shape[0] - postprocessed.shape[0]),
                        (0, 0)]
            padded = np.pad(postprocessed,
                            pad_spec,
                            'constant',
                            constant_values=MASK_VALUE)
        dest_pose_blocks.append(padded)

    train_poses = np.stack(dest_pose_blocks, axis=0)

    # oh, and we can deal with actions
    merged_actions = train_ds['actions']
    merged_actions, merged_act_names = merge_actions(train_ds['actions'],
                                                     dataset.action_names,
                                                     args.dataset_name)
    oh_merged_actions = one_hot_cat(merged_actions, len(merged_act_names))

    assert oh_merged_actions.shape[:2] == train_poses.shape[:2]

    # _, train_aclass_ds \
    #     = merge_actions(dataset['train_aclass_ds'], merge_map, old_act_names)
    # aclass_target_names, val_aclass_ds \
    #     = merge_actions(dataset['val_aclass_ds'], merge_map, old_act_names)
    # train_aclass_ds_bal = balance_aclass_ds(
    #     train_aclass_ds, aclass_target_names, target_func=balance_func)
    # # it's okay if the validation set isn't fully balanced in this case,
    # # since some actions don't appear in it at all.
    # val_aclass_ds_bal = balance_aclass_ds(
    #     val_aclass_ds, aclass_target_names, target_func=balance_func)

    n_actions = len(merged_act_names)
    print('Number of actions: %d' % n_actions)
    print('Actions: ' + ', '.join(merged_act_names))

    # train_X, train_Y = to_XY(train_aclass_ds_bal, n_actions)
    # val_X, val_Y = to_XY(val_aclass_ds_bal, n_actions)
    # print('Action balance (train): ', train_Y.sum(axis=0))
    # print('Action balance (val): ', val_Y.sum(axis=0))

    checkpoint_dir = os.path.join(args.work_dir, 'chkpt-aclass')
    try:
        os.makedirs(checkpoint_dir)
    except FileExistsError:
        pass

    # meta_path = os.path.join(checkpoint_dir, 'meta.json')
    # with open(meta_path, 'w') as fp:
    #     to_dump = {
    #         'actions': list(merged_act_names),
    #     }
    #     json.dump(to_dump, fp)

    # gotta shuffle so that validation split is random
    data_perm = np.random.permutation(len(train_poses))
    train_poses = train_poses[data_perm]
    oh_merged_actions = oh_merged_actions[data_perm]

    seq_len, num_channels = train_poses.shape[1:]
    model = make_model(seq_len, num_channels, n_actions)
    model.fit(train_poses,
              oh_merged_actions,
              batch_size=256,
              nb_epoch=1000,
              validation_split=0.1,
              callbacks=[
                  EarlyStopping(monitor='val_acc', patience=50),
                  ModelCheckpoint(os.path.join(
                      checkpoint_dir,
                      'action-classifier-{epoch:02d}-{val_loss:.2f}.hdf5'),
                                  save_best_only=True)
              ],
              shuffle=True)