Example #1
0
def view_mistakes():
    foldername = 'myval'
    id_to_pred = get_decisions(
        foldername,
        '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binary13b')
    fileids = sb.clean_glob(
        glob.glob(
            '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/%s/images/*.png'
            % foldername))

    # imgs = sd.get_salt_labels(folder='myval')
    # imgs = np.reshape(imgs, (imgs.shape[0], 101,101))
    imgs = sd.get_salt_images(folder='myval')
    val_pix_num = sd.salt_pixel_num(folder='myval')
    valY = np.array(val_pix_num > 0).astype(int)

    mistake_i = 0
    for file_id_i, file_id in enumerate(fileids):
        fileid, file_extension = os.path.splitext(file_id)
        if valY[file_id_i] != id_to_pred[fileid]:
            imgs[mistake_i, :, :] = imgs[file_id_i, :, :]
            mistake_i += 1

    print('made %i mistakes' % mistake_i)
    X = np.swapaxes(np.swapaxes(imgs[:mistake_i, :, :], 0, 2), 0,
                    1).astype(float)
    fig, ax = plt.subplots(1, 1)
    tracker = ScrollThruPlot(ax, X, fig)
    fig.canvas.mpl_connect('scroll_event', tracker.onscroll)
    plt.show()
Example #2
0
def eval_masks(
    outpath='/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/predictions/myval/'
):
    valX = get_salt_images(folder='myval')
    fileids = sb.clean_glob(
        glob.glob(
            '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/myval/images/*.png'
        ))
    model_dir = '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binarypix1'
    model = tf.estimator.Estimator(model_fn, model_dir=model_dir)
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': valX[:384, :, :, :]},
        batch_size=batch_size,
        shuffle=False)
    gen = model.predict(input_fn)

    for file_i, prediction in enumerate(tqdm(gen, total=384)):
        p_label = prediction['mask']
        pred = np.array(p_label).reshape((101, 101))
        plt.imsave(outpath + fileids[file_i], pred, cmap=cm.gray)

    # now get the tail
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': valX[-32:, :, :, :]},
        batch_size=batch_size,
        shuffle=False)
    gen = model.predict(input_fn)
    for file_i, prediction in enumerate(gen):
        fileid = fileids[404 - 32 + file_i]

        p_label = prediction['mask']
        pred = np.array(p_label).reshape((101, 101))
        plt.imsave(outpath + fileid, pred, cmap=cm.gray)
Example #3
0
def kaggle_test(
        outpath='/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/predictions/'
):
    testX = get_salt_images(folder='test')
    fileids = sb.clean_glob(
        glob.glob(
            '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/test/images/*.png'
        ))

    model_dir = '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binarypix1'
    model = tf.estimator.Estimator(model_fn, model_dir=model_dir)

    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': testX[:17984, :, :, :]},
        batch_size=batch_size,
        shuffle=False)
    gen = model.predict(input_fn)

    id_to_pred = np.load(
        '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binary1/test_bin_pred.npy'
    ).tolist()

    with open(outpath + 'binarypix2.csv', 'a') as fd:
        fd.write('id,rle_mask\n')
        for file_i, prediction in enumerate(tqdm(gen, total=17984)):
            fileid, file_extension = os.path.splitext(fileids[file_i])

            p_label = prediction['mask']
            pred = np.array(p_label).reshape(
                (101, 101)).transpose().reshape(101**2) * id_to_pred[fileid]
            fd.write('%s,%s\n' % (fileid, myrlestring(pred)))

        # now get the tail
        input_fn = tf.estimator.inputs.numpy_input_fn(
            x={'images': testX[-32:, :, :, :]},
            batch_size=batch_size,
            shuffle=False)
        gen = model.predict(input_fn)
        for file_i, prediction in enumerate(gen):
            if file_i >= 16:
                fileid, file_extension = os.path.splitext(fileids[18000 - 32 +
                                                                  file_i])

                p_label = prediction['mask']
                pred = np.array(p_label).reshape(
                    (101, 101)).transpose().reshape(101**
                                                    2) * id_to_pred[fileid]
                fd.write('%s,%s\n' % (fileid, myrlestring(pred)))
Example #4
0
def get_decisions(foldername='test', model_dir=None, outfile=None):
    """
    change the first two vars to run on different sets
    """
    valX = get_salt_images(folder=foldername)
    fileids = sb.clean_glob(
        glob.glob(
            '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/%s/images/*.png'
            % foldername))

    setsize = len(fileids)
    headsz = int(setsize / float(batch_size)) * batch_size

    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': valX[:headsz, :, :, :]},
        batch_size=batch_size,
        shuffle=False)

    # '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binary1'
    bin_model = tf.estimator.Estimator(model_fn, model_dir=model_dir)
    gen = bin_model.predict(input_fn)

    id_to_pred = {}

    for file_i, prediction in enumerate(tqdm(gen, total=headsz)):
        fileid, file_extension = os.path.splitext(fileids[file_i])
        id_to_pred[fileid] = prediction

    # now get the tail
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': valX[-batch_size:, :, :, :]},
        batch_size=batch_size,
        shuffle=False)
    gen = bin_model.predict(input_fn)
    for file_i, prediction in enumerate(gen):
        idx = setsize - batch_size + file_i
        fileid, file_extension = os.path.splitext(fileids[idx])

        id_to_pred[fileid] = prediction

    # '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binary1/test_bin_pred'
    if outfile:
        np.save(outfile, id_to_pred)

    return id_to_pred
Example #5
0
def kaggle_summary(
    outpath='/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/predictions/myval/'
):
    valX = get_salt_images(folder='myval')
    valY = get_salt_labels(folder='myval')
    fileids = sb.clean_glob(
        glob.glob(
            '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/myval/images/*.png'
        ))

    model_dir = '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binarypix1'
    model = tf.estimator.Estimator(model_fn, model_dir=model_dir)
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': valX[:384, :, :, :]},
        batch_size=batch_size,
        shuffle=False)
    gen = model.predict(input_fn)

    id_to_pred = np.load(
        '/scratch0/ilya/locDoc/data/kaggle-seismic-dataset/models/binary13b/val_bin_pred.npy'
    ).tolist()
    threshes = np.array(
        [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95])
    tps = np.zeros(threshes.shape[0])
    fps = np.zeros(threshes.shape[0])
    fns = np.zeros(threshes.shape[0])

    def tp_fp_fn_calc(gt, predicted, tps, fps, fns):
        component1 = np.array(predicted).astype(bool)
        component2 = gt.astype(bool)

        overlap = component1 * component2  # Logical AND
        union = component1 + component2  # Logical OR

        iou = overlap.sum() / float(union.sum() + 1e-5)

        mask_present_gt = np.any(gt > 0)
        # these metrics methods return a tuple container
        if mask_present_gt:
            if np.all(predicted < 1):
                fns += np.ones(threshes.shape[0])
            else:
                tps += (threshes < iou).astype(int)
        else:
            if np.any(predicted > 0):
                fps += np.ones(threshes.shape[0])

    for file_i, prediction in enumerate(tqdm(gen, total=384)):
        fileid, file_extension = os.path.splitext(fileids[file_i])

        p_label = prediction['mask']
        p_label = np.array(p_label) * id_to_pred[fileid]
        tp_fp_fn_calc(valY[file_i, :], p_label, tps, fps, fns)

    # now get the tail
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': valX[-32:, :, :, :]},
        batch_size=batch_size,
        shuffle=False)
    gen = model.predict(input_fn)
    for file_i, prediction in enumerate(gen):
        idx = 404 - 32 + file_i
        fileid, file_extension = os.path.splitext(fileids[idx])

        p_label = prediction['mask']
        p_label = np.array(p_label) * id_to_pred[fileid]
        tp_fp_fn_calc(valY[idx, :], p_label, tps, fps, fns)

    precisions = tps / (tps + fps + fns)
    avg_precision = precisions.sum() / threshes.shape[0]

    print('%d masks in dataset' % np.sum(valY.sum(axis=1) > 0))
    for idx, thresh in enumerate(threshes):
        print('tp at %f: %f' % (thresh, tps[idx]))
        print('fp at %f: %f' % (thresh, fps[idx]))
        print('fn at %f: %f' % (thresh, fns[idx]))
        print('precisions at %f: %f' % (thresh, precisions[idx]))
    print('avg precision: %f' % (avg_precision))