def get_threat_heatmaps(mode):
    if not os.path.exists('done'):
        names, labels, x = get_aps_data_hdf5(mode)
        f = h5py.File('data.hdf5', 'w')
        th = f.create_dataset('th', x.shape + (3, ))

        with read_input_dir('hand_labeling/threat_segmentation/base'):
            for i, (name, label,
                    data) in tqdm.tqdm(enumerate(zip(names, labels, x)),
                                       total=len(x)):
                files = glob.glob(name + '*')
                assert files, 'missing hand segmentation for %s' % name

                image = imageio.imread(files[0])
                masks = [
                    _get_mask(image, SEGMENTATION_COLORS[ci])
                    for ci in range(3)
                ]
                with read_input_dir(
                        'hand_labeling/threat_segmentation/revision_v0'):
                    for revision in glob.glob(name + '*'):
                        rlabel = int(revision.split('_')[1].split('.')[0])
                        rci = [i + 1 for i in range(17)
                               if label[i]].index(rlabel)
                        rimage = imageio.imread(revision)
                        masks[rci] = _get_mask(rimage, SEGMENTATION_COLORS[0])

                th[i] = np.stack(masks, axis=-1)

        open('done', 'w').close()
    else:
        f = h5py.File('data.hdf5', 'r')
        th = f['th']
    return th
Beispiel #2
0
def get_a3d_projection_data(mode, percentile):
    if not os.path.exists('done'):
        angles, width, height = 16, 512, 660
        tf.reset_default_graph()

        data_in = tf.placeholder(tf.float32,
                                 [width // 2, width // 2, height // 2])
        angle = tf.placeholder(tf.float32, [])

        with tf.device('/cpu:0'):
            image = tf.contrib.image.rotate(data_in,
                                            -2 * math.pi * angle / angles)
        max_proj = tf.reduce_max(image, axis=1)
        mean_proj, var_proj = tf.nn.moments(image, axes=[1])
        std_proj = tf.sqrt(var_proj)

        surf = image > tf.contrib.distributions.percentile(
            image, percentile, axis=1, keep_dims=True)
        dmap = tf.cast(
            tf.argmax(tf.cast(surf, tf.int32), axis=1) / width, tf.float32)
        proj = tf.image.rot90(
            tf.stack([dmap, max_proj, mean_proj, std_proj], axis=-1))

        gen = get_data(mode, 'a3d')
        f = h5py.File('data.hdf5', 'w')
        dset = f.create_dataset('dset',
                                (len(gen), angles, height // 2, width // 2, 5))
        names, labels, dset_in = get_aps_data_hdf5(mode)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i, (_, _, data) in enumerate(tqdm.tqdm(get_data(mode, 'a3d'))):
                data = (data[::2, ::2, ::2] + data[::2, ::2, 1::2] +
                        data[::2, 1::2, ::2] + data[::2, 1::2, 1::2] +
                        data[1::2, ::2, ::2] + data[1::2, ::2, 1::2] +
                        data[1::2, 1::2, ::2] + data[1::2, 1::2, 1::2]) / 8
                for j in tqdm.trange(angles):
                    dset[i, j, ..., :-1] = sess.run(proj,
                                                    feed_dict={
                                                        data_in: data,
                                                        angle: j
                                                    })
                    dset[i, j, ..., -1] = (dset_in[i, ::2, ::2, j] +
                                           dset_in[i, ::2, 1::2, j] +
                                           dset_in[i, 1::2, ::2, j] +
                                           dset_in[i, 1::2, 1::2, j])

        f.close()
        with open('pkl', 'wb') as f:
            pickle.dump((names, labels), f)
        open('done', 'w').close()

    with open('pkl', 'rb') as f:
        names, labels = pickle.load(f)
    f = h5py.File('data.hdf5', 'r')
    dset = f['dset']
    return names, labels, dset
def get_passenger_groups(mode):
    assert not mode.startswith('test')

    clusters = get_passenger_clusters()
    names, _, _ = get_aps_data_hdf5(mode)
    group = [None] * len(names)
    for i in range(len(group)):
        for j, cluster in enumerate(clusters):
            if names[i] in cluster:
                group[i] = j
    return group
def train_clustering_model(mode, duration):
    tf.reset_default_graph()

    dmat_in = tf.placeholder(tf.float32, [None, None, 27])
    labels_in = tf.placeholder(tf.float32, [None, None])

    dmat = tf.reshape(dmat_in, [-1, 27])
    mean, var = tf.nn.moments(dmat, [0, 1])
    dmat = (dmat - mean) / tf.sqrt(var)
    labels = tf.reshape(labels_in, [-1])
    logits = tf.squeeze(tf.layers.dense(dmat, 1))
    logprob = -tf.nn.softplus(-logits)
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits))

    optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
    train_step = optimizer.minimize(loss)

    saver = tf.train.Saver()
    model_path = os.getcwd() + '/model.ckpt'

    def predict(x):
        with tf.Session() as sess:
            saver.restore(sess, model_path)
            return sess.run(logprob, feed_dict={dmat_in: x})

    if os.path.exists('done'):
        return predict

    dmat_train = get_distance_matrix(mode)
    clusters = get_passenger_clusters()
    names, _, _ = get_aps_data_hdf5(mode)
    name_idx = {x: i for i, x in enumerate(names)}

    labels_train = np.zeros(dmat_train.shape[:2])
    for cluster in clusters:
        for name1 in cluster:
            for name2 in cluster:
                i1, i2 = name_idx[name1], name_idx[name2]
                labels_train[i1, i2] = 1

    def train_model(sess, duration):
        t0 = time.time()
        while time.time() - t0 < duration * 3600:
            sess.run(train_step, feed_dict={
                dmat_in: dmat_train, labels_in: labels_train
            })
        saver.save(sess, model_path)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        train_model(sess, duration)
    open('done', 'w').close()

    return predict
def naive_cluster_passengers(mode, n_clusters):
    names, _, x = get_aps_data_hdf5(mode)
    images = x[:, ::8, ::8, 0].reshape((len(x), -1))
    reduced_data = sklearn.decomposition.PCA(n_components=128).fit_transform(images)
    kmeans = sklearn.cluster.KMeans(n_clusters).fit(reduced_data)
    clusters = kmeans.predict(reduced_data)

    for i in range(n_clusters):
        os.mkdir(str(i))
    for name, cluster, data in tqdm.tqdm(zip(names, clusters, x), total=len(x)):
        imageio.imsave('%s/%s.png' % (cluster, name), data[..., 0]/data[..., 0].max())
def get_distance_matrix(mode):
    if not os.path.exists('done'):
        batch_size = 32

        tf.reset_default_graph()
        x1_in = tf.placeholder(tf.float32, [None, 660, 512, 16])
        x2_in = tf.placeholder(tf.float32, [None, 660, 512, 16])
        dist_mats = []

        for feat in range(3):
            res = 512
            if feat == 0:
                x1, x2 = x1_in, x2_in
            elif feat == 1:
                x1, x2 = x1_in[:, :330, :, :], x2_in[:, :330, :, :]
            else:
                x1, x2 = x1_in[:, :, 128:384, :], x2_in[:, :, 128:384, :]
            x1 = tf.image.resize_images(x1, [res, res])
            x2 = tf.image.resize_images(x2, [res, res])

            for _ in range(9):
                n = 16 * res**2
                x1_v = tf.reshape(x1, [-1, n])
                x2_v = tf.transpose(tf.reshape(x2, [-1, n]))
                dots = tf.matmul(x1_v, x2_v)
                diff = tf.reduce_sum(tf.square(x1_v), axis=1, keep_dims=True) - 2*dots + \
                       tf.reduce_sum(tf.square(x2_v), axis=0, keep_dims=True)

                dist = tf.sqrt(tf.maximum(diff/n, 0))
                dist_mats.append(dist)

                res //= 2
                x1 = tf.image.resize_images(x1, [res, res])
                x2 = tf.image.resize_images(x2, [res, res])

        dist_mat = tf.stack(dist_mats, axis=-1)

        _, _, dset = get_aps_data_hdf5(mode)
        dmat = np.zeros((len(dset), len(dset), 27))
        with tf.Session() as sess:
            for i in tqdm.trange(0, len(dset), batch_size):
                for j in tqdm.trange(0, len(dset), batch_size):
                    mat = sess.run(dist_mat, feed_dict={
                        x1_in: dset[i:i+batch_size],
                        x2_in: dset[j:j+batch_size]
                    })
                    dmat[i:i+batch_size, j:j+batch_size, :] = mat

        np.save('dmat.npy', dmat)
        open('done', 'w').close()
    else:
        dmat = np.load('dmat.npy')
    return dmat
def get_data_and_threat_heatmaps(mode):
    names, labels, x = get_aps_data_hdf5(mode)
    if not os.path.exists('done'):
        th = get_threat_heatmaps(mode)
        f = h5py.File('data.hdf5', 'w')
        dset = f.create_dataset('dset', x.shape + (4, ))
        for i, (data, hmap) in tqdm.tqdm(enumerate(zip(x, th)), total=len(x)):
            dset[i] = np.concatenate([data[..., np.newaxis], hmap], axis=-1)
        open('done', 'w').close()
    else:
        f = h5py.File('data.hdf5', 'r')
        dset = f['dset']
    return names, labels, dset
def plot_nearest_neighbors(mode, max_near):
    perm = passenger_clustering.get_nearest_neighbors(mode)
    group = passenger_clustering.get_passenger_groups(mode)
    names, _, dset = get_aps_data_hdf5(mode)

    for i, name in enumerate(names):
        n_wrong = sum(group[perm[i][j]] != group[i] for j in range(max_near))
        images = []
        for j in range(max_near):
            images.append(dset[perm[i][j], ::4, ::4, 0])
        rows = [np.concatenate(images[i:i+4], axis=1) for i in range(0, max_near, 4)]
        image = np.concatenate(rows, axis=0)
        imageio.imsave('%s_%s.png' % (n_wrong, name), image / image.max())
def write_aps_hand_labeling_revision_v0(mode):
    names, _, x = get_aps_data_hdf5(mode)
    todo = {}
    with read_input_dir('hand_labeling/threat_segmentation'):
        with open('revision_v0.txt', 'r') as f:
            for line in f:
                name, labels = line[:5], line[6:]
                assert name not in todo, "duplicate revision names"
                todo[name] = [int(x) for x in labels.split(', ')]
    for name, data in tqdm.tqdm(zip(names, x), total=len(x)):
        for label in todo.get(name[:5], []):
            images = np.concatenate(np.rollaxis(data, 2), axis=1) / data.max()
            filename = '%s_%s' % (name, label)
            skimage.io.imsave('%s.png' % filename, np.repeat(images[..., np.newaxis], 3, axis=-1))
def plot_image_registration_samples(mode, n_samples):
    names, _, dset = get_aps_data_hdf5(mode)
    group = passenger_clustering.get_passenger_groups(mode)
    for spacing in tqdm.tqdm([8, 16, 32, 64]):
        for num_res in tqdm.tqdm([2, 3, 4]):
            for num_iter in tqdm.tqdm([8, 16, 32, 64, 128]):
                np.random.seed(0)
                im1, im2 = [], []
                for i in range(n_samples):
                    while True:
                        i1, i2, angle = np.random.randint(len(dset)), np.random.randint(len(dset)), \
                                        np.random.randint(16)
                        if group[i1] == group[i2]:
                            break
                    d1, d2 = dset[i1, ..., angle], dset[i2, ..., angle]
                    d1 /= d1.max()
                    d2 /= d2.max()
                    im1.append(d1)
                    im2.append(d2)

                params = common.pyelastix.get_default_params()
                params.FinalGridSpacingInPhysicalUnits = spacing
                params.NumberOfResolutions = num_res
                params.MaximumNumberOfIterations = num_iter
                reg = passenger_clustering.register_images(im1, im2, params)

                for i, (d1, d2, im) in enumerate(zip(im1, im2, reg)):
                    im /= im.max()
                    image = np.concatenate([
                        np.concatenate([d1, d2], axis=1),
                        np.concatenate([im, np.zeros(d1.shape)], axis=1)
                    ], axis=0)
                    image = np.repeat(image[..., np.newaxis], 3, axis=-1)
                    image[660:, 512:, 0] = d2
                    image[660:, 512:, 1] = im

                    path = '%s/%s/%s' % (spacing, num_res, num_iter)
                    if not os.path.exists(path):
                        os.makedirs(path)
                    imageio.imsave('%s/%s.png' % (path, i), image)
def write_passenger_id_images(mode):
    names, _, x = get_aps_data_hdf5(mode)
    for name, data in tqdm.tqdm(zip(names, x), total=len(x)):
        imageio.imsave('%s.png' % name, data[..., 0] / np.max(data[..., 0]))
def write_aps_hand_labeling_images(mode):
    names, labels, x = get_aps_data_hdf5(mode)
    for name, label, data in tqdm.tqdm(zip(names, labels, x), total=len(x)):
        images = np.concatenate(np.rollaxis(data, 2), axis=1) / data.max()
        filename = '_'.join([name] + [str(i+1) for i in range(17) if label and label[i]])
        skimage.io.imsave('%s.png' % filename, np.repeat(images[..., np.newaxis], 3, axis=-1))