Ejemplo n.º 1
0
def test_cached():
    def f(x):
        return x

    cache_size = 3
    arr = [random.random() for _ in range(25)]
    z = add_cache(arr, cache_size)

    assert list(z) == arr
    assert list(z[10:]) == arr[10:]
    assert [z[i] for i in range(10)] == arr[:10]

    z[:10] = list(range(0, -10, -1))
    assert list(z[10:]) == arr[10:]
    assert list(z[:10]) == list(range(0, -10, -1))

    y = smap(f, arr)
    z = add_cache(y, cache_size)

    t1 = time()
    for i in range(len(arr)):
        assert z[i] == arr[i]
        for j in range(max(0, i - cache_size + 1), i + 1):
            assert z[j] == arr[j]
    t2 = time()

    duration = t2 - t1
    assert duration < .28
Ejemplo n.º 2
0
    def predict_fn(feature_sequences):
        durations = np.array([len(s) for s in feature_sequences[0]])
        step = max_time - 2 * warmup

        # turn sequences
        chunks = [(i, k, min(d, k + max_time)) for i, d in enumerate(durations)
                  for k in range(0, d - warmup, step)]
        chunked_sequences = []
        for feat in feature_sequences:

            def get_chunk(i, t1, t2, feat_=feat):
                return adjust_length(feat_[i][t1:t2], size=max_time, pad=0)

            chunked_sequences.append(seqtools.starmap(get_chunk, chunks))
        chunked_sequences.append([np.int32(t2 - t1) for _, t1, t2 in chunks])
        chunked_sequences = seqtools.collate(chunked_sequences)

        # turn into minibatches
        null_sample = chunked_sequences[0]
        n_features = len(null_sample)

        def collate(b):
            return [
                np.array([b[i][c] for i in range(batch_size)])
                for c in range(n_features)
            ]

        minibatches = seqtools.batch(chunked_sequences,
                                     batch_size,
                                     pad=null_sample,
                                     collate_fn=collate)
        # minibatches = seqtools.prefetch(
        #     minibatches, max_cached=nworkers * 5, nworkers=nworkers)

        # process
        batched_predictions = seqtools.starmap(predict_batch_fn, minibatches)
        batched_predictions = seqtools.add_cache(batched_predictions)
        chunked_predictions = seqtools.unbatch(batched_predictions, batch_size)

        # recompose
        out = [
            np.empty((d, ) + l_out.output_shape[2:], dtype=np.float32)
            for d in durations
        ]

        for v, (s, start, stop) in zip(chunked_predictions, chunks):
            skip = warmup if start > 0 else 0
            out[s][start + skip:stop] = v[skip:stop - start]

        return out
Ejemplo n.º 3
0
def transform_frames(frame_seq, t: Transformation):
    # shorthand notations
    duration = len(frame_seq)
    sx, sy = t.xscale, t.yscale
    rx, ry = t.ref2d
    rz = t.ref3d[2]

    # generate affine transformation matrix
    # triangles_src = np.array([[rx, rx + 1, rx], [ry, ry, ry + 1]], dtype=np.float32)
    triangles_src = np.array([[rx, ry], [rx + 10, ry], [rx, ry + 10]],
                             dtype=np.float32)
    triangles_dst = np.copy(triangles_src)
    x = triangles_dst[:, 0]
    y = triangles_dst[:, 1]

    z_corrections = 1 + t.zshift / (rz + .0001)
    x[...] = (x - rx) / z_corrections + rx
    y[...] = (y - ry) / z_corrections + ry

    x[...] = (x - rx) * sx + rx
    y[...] = (y - ry) * sy + ry

    x[...] = rx + np.cos(t.tilt) * (x - rx) - np.sin(t.tilt) * (y - ry)
    y[...] = ry + np.sin(t.tilt) * (x - rx) + np.cos(t.tilt) * (y - ry)

    tmatrix = cv2.getAffineTransform(triangles_src, triangles_dst)

    # affine frame-wise transformations
    output = seqtools.smap(lambda f: cv2.warpAffine(f, tmatrix, (640, 480)),
                           frame_seq)

    # fliplr
    if t.fliplr:
        output = seqtools.smap(np.fliplr, output)

    # time scale
    if t.tscale != 1:
        output_duration = transform_durations(duration, t)
        indices = np.round(np.linspace(0, duration - 1,
                                       output_duration)).astype(np.int)
        output = seqtools.gather(output, indices)
        if t.tscale > 1:
            output = seqtools.add_cache(output, cache_size=1)

    return output
Ejemplo n.º 4
0
def test_cached_timing():
    def f(x):
        sleep(.01)
        return x

    cache_size = 3
    arr = [random.random() for _ in range(100)]

    y = smap(f, arr)
    z = add_cache(y, cache_size)

    t1 = time()
    for i in range(len(arr)):
        assert z[i] == arr[i]
        for j in range(max(0, i - cache_size + 1), i + 1):
            assert z[j] == arr[j]
    t2 = time()

    duration = t2 - t1
    print("test_cached_timing {:.2f}s".format(duration))

    assert duration < 1.2
Ejemplo n.º 5
0
import time
import seqtools

files = [
    'file1', 'file2', 'file3', 'file4', 'file5', 'file6', 'file7', 'file8',
    'file9', 'file10'
]


def load(some_file):
    time.sleep(.1)
    return list(range(10) if some_file == 'file10' else range(200))


loaded_files = seqtools.smap(load, files)
loaded_files = seqtools.add_cache(loaded_files, 2)
all_samples = seqtools.unbatch(loaded_files, 200, 10)


def preprocess(x):
    t = time.clock()
    while time.clock() - t < 0.005:
        pass  # busy waiting
    return x


preprocessed_samples = seqtools.smap(preprocess, all_samples)
minibatches = seqtools.batch(preprocessed_samples, 64, collate_fn=list)

t1 = time.time()
for batch in minibatches:
Ejemplo n.º 6
0
def prepare():
    global train_subset, val_subset, test_subset, \
        durations, gloss_seqs, pose2d_seqs, pose3d_seqs

    # Create temporary directory
    if not os.path.exists(cachedir):
        os.mkdir(cachedir)

    # Load data
    train_subset, val_subset, test_subset = dataset.default_splits()
    pose2d_seqs = [dataset.positions(i) for i in range(len(dataset))]
    pose3d_seqs = [dataset.positions_3d(i) for i in range(len(dataset))]

    # Eliminate strange gloss annotations
    gloss_seqs_train = [dataset.glosses(r) for r in train_subset]
    rejected = set()
    for r, gseq in zip(train_subset, gloss_seqs_train):
        for i in range(len(gseq) - 1):
            if gseq[i + 1, 1] - gseq[i, 2] < 0:
                rejected.add(r)
    train_subset = np.setdiff1d(train_subset, rejected)
    if len(rejected) > 0:
        logging.warning(
            "Eliminated sequences with invalid glosses: {}".format(rejected))

    # Interpolate missing poses and eliminate deteriorated training sequences
    invalid_masks = seqtools.smap(detect_invalid_pts, pose2d_seqs)
    pose2d_seqs = seqtools.smap(interpolate_positions, pose2d_seqs,
                                invalid_masks)
    pose3d_seqs = seqtools.smap(interpolate_positions, pose3d_seqs,
                                invalid_masks)

    rejected = np.where(
        [np.mean(im[:, important_joints]) > .15 for im in invalid_masks])[0]
    train_subset = np.setdiff1d(train_subset, rejected)
    if len(rejected) > 0:
        logging.warning(
            "eliminated {} sequences with missing positions".format(
                len(rejected)))

    # Default preprocessing
    ref2d = seqtools.add_cache(seqtools.smap(get_ref_pts, pose2d_seqs),
                               cache_size=1)
    ref3d = seqtools.add_cache(seqtools.smap(get_ref_pts, pose3d_seqs),
                               cache_size=1)

    transformations = np.rec.array(
        [(r2, r3, False, 0, tgt_dist - r3[2], 1., 1., 1., 1.)
         for r2, r3 in zip(ref2d, ref3d)],
        dtype=TransformationType)

    # Precompute transformations for augmentation of the training set
    original_train_subset = train_subset

    rec_mapping = np.arange(len(dataset))
    for _ in range(5 - 1):
        offset = len(rec_mapping)
        new_subset = np.arange(offset, offset + len(original_train_subset))

        newt = np.repeat(transformations[0], len(new_subset),
                         axis=0).view(np.recarray)
        newt.fliplr = uniform(size=len(newt)) < 0.15
        newt.tilt += uniform(-7, 7, size=len(newt)) * np.pi / 180
        newt.xscale += uniform(.85, 1.15, size=len(newt))
        newt.yscale += uniform(.85, 1.15, size=len(newt))
        newt.zscale += uniform(.85, 1.15, size=len(newt))
        newt.tscale += uniform(.85, 1.15, size=len(newt))

        rec_mapping = np.concatenate([rec_mapping, original_train_subset])
        transformations = np.concatenate([transformations,
                                          newt]).view(np.recarray)
        train_subset = np.concatenate([train_subset, new_subset])

    # Apply transformations (if they are cheap to compute)
    durations = np.array([
        transform_durations(dataset.durations(r), t)
        for r, t in zip(rec_mapping, transformations)
    ])

    gloss_seqs = [
        transform_glosses(dataset.glosses(r), dataset.durations(r), t)
        for r, t in zip(rec_mapping, transformations)
    ]

    pose2d_seqs = seqtools.gather(pose2d_seqs, rec_mapping)
    pose2d_seqs = seqtools.smap(
        partial(transform_pose2d, flip_mapping=flip_mapping, frame_width=640),
        pose2d_seqs, transformations)

    pose3d_seqs = seqtools.gather(pose3d_seqs, rec_mapping)
    pose3d_seqs = seqtools.smap(
        partial(transform_pose3d, flip_mapping=flip_mapping), pose3d_seqs,
        transformations)

    # Export
    np.save(os.path.join(cachedir, "pose2d_seqs.npy"),
            seqtools.concatenate(pose2d_seqs))
    np.save(os.path.join(cachedir, "pose3d_seqs.npy"),
            seqtools.concatenate(pose3d_seqs))

    with open(os.path.join(cachedir, "data.pkl"), 'wb') as f:
        pkl.dump((durations, gloss_seqs, rec_mapping, transformations,
                  train_subset, val_subset, test_subset), f)
Ejemplo n.º 7
0
def transfer_feat_seqs(transfer_from, freeze_at):
    import theano
    import theano.tensor as T
    import lasagne
    from sltools.nn_utils import adjust_length
    from experiments.utils import reload_best_hmm, reload_best_rnn

    report = shelve.open(os.path.join(cachedir, transfer_from))

    if report['meta']['modality'] == "skel":
        source_feat_seqs = [skel_feat_seqs]
    elif report['meta']['modality'] == "bgr":
        source_feat_seqs = [bgr_feat_seqs]
    elif report['meta']['modality'] == "fusion":
        source_feat_seqs = [skel_feat_seqs, bgr_feat_seqs]
    else:
        raise ValueError()

    # no computation required
    if freeze_at == "inputs":
        return source_feat_seqs

    # reuse cached features
    dump_file = os.path.join(
        cachedir,
        report['meta']['experiment_name'] + "_" + freeze_at + "feats.npy")
    if os.path.exists(dump_file):
        boundaries = np.stack(
            (np.cumsum(durations) - durations, np.cumsum(durations)), axis=1)
        return [split_seq(np.load(dump_file, mmap_mode='r'), boundaries)]

    # reload model
    if report['meta']['model'] == "hmm":
        _, recognizer, _ = reload_best_hmm(report)
        l_in = recognizer.posterior.l_in
        if freeze_at == "embedding":
            l_feats = recognizer.posterior.l_feats
        elif freeze_at == "logits":
            l_feats = recognizer.posterior.l_raw
        elif freeze_at == "posteriors":
            l_feats = lasagne.layers.NonlinearityLayer(
                recognizer.posterior.l_out, T.exp)
        else:
            raise ValueError()
        batch_size, max_time, *_ = l_in[0].output_shape  # TODO: fragile
        warmup = recognizer.posterior.warmup

    else:
        _, model_dict, _ = reload_best_rnn(report)
        l_in = model_dict['l_in']
        l_feats = model_dict['l_feats']
        batch_size, max_time, *_ = l_in[0].output_shape  # TODO: fragile
        warmup = model_dict['warmup']

    feats_var = lasagne.layers.get_output(l_feats, deterministic=True)
    predict_batch_fn = theano.function([l.input_var for l in l_in], feats_var)

    step = max_time - 2 * warmup

    # turn sequences into chunks
    chunks = [(i, k, min(d, k + max_time)) for i, d in enumerate(durations)
              for k in range(0, d - warmup, step)]
    chunked_sequences = []
    for feat in source_feat_seqs:

        def get_chunk(i, t1, t2, feat_=feat):
            return adjust_length(feat_[i][t1:t2], size=max_time, pad=0)

        chunked_sequences.append(seqtools.starmap(get_chunk, chunks))
    chunked_sequences = seqtools.collate(chunked_sequences)

    # turn into minibatches
    null_sample = chunked_sequences[0]
    n_features = len(null_sample)

    def collate(b):
        return [
            np.array([b[i][c] for i in range(batch_size)])
            for c in range(n_features)
        ]

    minibatches = seqtools.batch(chunked_sequences,
                                 batch_size,
                                 pad=null_sample,
                                 collate_fn=collate)
    # minibatches = seqtools.prefetch(minibatches, nworkers=2, max_buffered=10)

    # process
    batched_predictions = seqtools.starmap(predict_batch_fn, minibatches)
    batched_predictions = seqtools.add_cache(batched_predictions)
    chunked_predictions = seqtools.unbatch(batched_predictions, batch_size)

    # recompose
    feat_size = l_feats.output_shape[2:]
    storage = open_memmap(dump_file,
                          'w+',
                          dtype=np.float32,
                          shape=(sum(durations), ) + feat_size)
    subsequences = np.stack(
        [np.cumsum(durations) - durations,
         np.cumsum(durations)], axis=1)
    out_view = seqtools.split(storage, subsequences)

    for v, (s, start, stop) in zip(chunked_predictions, chunks):
        skip = warmup if start > 0 else 0
        out_view[s][start + skip:stop] = v[skip:stop - start]

    return [out_view]