Beispiel #1
0
def threshold(hfile, resultsfile):

    images, labels, keys = data.load_dataset(hfile)

    for image, label, key in zip(images, labels, keys):

        methods = filter(lambda f: 'threshold' in f, filters.__all__)
        for method in methods:
            if 'adaptive' in method:
                # skip adaptive for now.
                continue
            
            threshold = getattr(filters, method)

        
            # do threshold
            pred = (image > threshold(image)).astype(int)
        
            # save results.
            if os.path.isfile(resultsfile):
                mode = 'r+'
            else:
                mode = 'w'

            with h5py.File(resultsfile, mode) as f:
                # save validation predictions
                try:
                    g = f[key]
                except KeyError:
                    g = f.create_group(key)
                g[method] = pred
Beispiel #2
0
def crossval(hfile, resultsfile, crop):
    """Run LOOCV with reduced hypercolumn features and SGD with linear SVM loss.
    Read data from hdf5 HFILE with input images and annotations.
    """
    cv = LeaveOneOut()
    images, labels, keys = data.load_dataset(hfile, cropbar=crop)

    for train_idx, val_idx in cv.split(images):
        print('CV iteration {}'.format(val_idx[0]))

        hc = hypercolumn.ReducedHyperColumn()
        clf = tensorsgd.TensorSGD()

        X_train = hc.fit(images[train_idx], verbose=True)
        clf.fit(X_train, labels[train_idx])

        train_pred = clf.predict(X_train)

        X_val = hc.predict(images[val_idx], verbose=True)
        val_pred = clf.predict(X_val)

        if os.path.isfile(resultsfile):
            mode = 'r+'
        else:
            mode = 'w'

        with h5py.File(resultsfile, mode) as f:
            # save validation predictions
            for pred, key in zip(val_pred, keys[val_idx]):
                try:
                    g = f[key]
                except KeyError:
                    g = f.create_group(key)
                g['validation'] = pred

            # save training predictions
            for pred, key in zip(train_pred, keys[train_idx]):
                try:
                    g = f[key]
                except KeyError:
                    g = f.create_group(key)
                g['train{}'.format(val_idx)] = pred
Beispiel #3
0
def train_pixelnet(dataset, batchsize, npix, max_epochs, validation_steps, run_id, bottleneck):

    datadir = 'data'
    datafile = os.path.join(datadir, '{}.h5'.format(dataset))

    validation_set_path = os.path.join(datadir, '{}-validation-sets.json'.format(dataset))
    validation_set = data.load_validation_set(validation_set_path, run_id)

    if dataset == 'uhcs':
        nclasses = 4
        cropbar = 38
    elif dataset == 'spheroidite':
        nclasses = 2
        cropbar = None

    model_dir = os.path.join('models', 'crossval', dataset, 'run{:02d}'.format(run_id))
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)

    images, labels, names = data.load_dataset(datafile, cropbar=cropbar)

    images = data.preprocess_images(images, equalize=True, tf=False)

    # add a channel axis (of size 1 since these are grayscale inputs)
    images = images[:,:,:,np.newaxis]
    images = np.repeat(images, 3, axis=-1)
    images = applications.vgg16.preprocess_input(images)

    # train/validation split
    train_idx, val_idx = data.validation_split(validation_set, names)
    ntrain = len(train_idx)

    X_train, y_train, names_train = images[train_idx], labels[train_idx], names[train_idx]
    X_val, y_val, names_val = images[val_idx], labels[val_idx], names[val_idx]

    inv_freq = y_train.size / np.bincount(y_train.flat)
    class_weights = np.squeeze(normalize(np.sqrt(inv_freq), order=1))

    # don't use alpha-balanced version of focal loss...
    # class_weights = None
    focus_param = 2.0

    # write the validation set to the model directory as well...
    with open(os.path.join(model_dir, 'validation_set.txt'), 'w') as vf:
        for name in names_val:
            print(name, file=vf)

    N, h, w, _ = images.shape

    steps_per_epoch = int(ntrain / batchsize)
    print('steps_per_epoch: {}'.format(steps_per_epoch))

    max_epochs = 25
    validation_steps = 10

    base_model = vgg.fully_conv_model()

    layernames = [
        'block1_conv2_relu', 'block2_conv2_relu', 'block3_conv3_relu', 'block4_conv3_relu', 'block5_conv3_relu', 'fc2_relu'
    ]

    hc = hypercolumn.build_model(base_model, layernames, batchnorm=True, mode='sparse', relu=False)
    model = pixelnet.build_model(hc, nclasses=nclasses, width=1024, mode='sparse', dropout_rate=0.1, l2_reg=0.0)

    opt = adamw.AdamW(lr=1e-3, weight_decay=5e-4, amsgrad=True)
    for layer in base_model.layers:
        layer.trainable = False


    # model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc'])
    model.compile(
        loss=losses.focal_crossentropy_loss(focus_param=focus_param, class_weights=class_weights),
        optimizer=opt,
        metrics=['acc']
    )

    csv_logger = callbacks.CSVLogger(os.path.join(model_dir, 'training-1.log'))
    checkpoint = callbacks.ModelCheckpoint(
        os.path.join(
            model_dir,
            'weights.{epoch:03d}-{val_loss:.4f}.hdf5'
        ),
        save_best_only=False,
        save_weights_only=True,
        period=25
    )

    training_data = px_utils.random_pixel_samples(
        X_train, y_train, nclasses=nclasses,
        replace_samples=False, horizontal_flip=True, vertical_flip=True,
        rotation_range=360, zoom_range=0.5, intensity_shift=0.05
    )


    f = model.fit_generator(
        training_data,
        steps_per_epoch,
        epochs=max_epochs,
        callbacks=[csv_logger, checkpoint],
        validation_data=px_utils.random_pixel_samples(X_val, y_val, nclasses=nclasses, replace_samples=False),
        validation_steps=validation_steps,
    )

    for layer in base_model.layers:
        layer.trainable = True

    # fine-tune the whole network
    opt = adamw.AdamW(lr=1e-5, weight_decay=5e-4, amsgrad=True)
    model.compile(
        loss=losses.focal_crossentropy_loss(focus_param=focus_param, class_weights=class_weights),
        optimizer=opt,
        metrics=['acc']
    )

    csv_logger = callbacks.CSVLogger(os.path.join(model_dir, 'finetune-1.log'))
    checkpoint = callbacks.ModelCheckpoint(
        os.path.join(
            model_dir,
            'weights-finetune.{epoch:03d}-{val_loss:.4f}.hdf5'
        ),
        save_best_only=False,
        save_weights_only=True,
        period=25
    )

    f = model.fit_generator(
        training_data,
        steps_per_epoch,
        epochs=max_epochs,
        callbacks=[csv_logger, checkpoint],
        validation_data=px_utils.random_pixel_samples(X_val, y_val, nclasses=nclasses, replace_samples=False),
        validation_steps=validation_steps,
    )
Beispiel #4
0
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping

import sys
sys.path.append(os.getcwd())
sys.path.append('../uhcs-segment')

from pixelnet.pixelnet import pixelnet_model
from pixelnet.utils import random_pixel_samples
from uhcsseg import data

# suppress some of the noisier tensorflow log messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

if __name__ == '__main__':
    datafile = '../uhcs-segment/data/uhcs.h5'
    images, labels, names = data.load_dataset(datafile, cropbar=38)
    print(images.shape)
    images = data.preprocess_images(images)

    # add channel axis
    images = images[:, :, :, np.newaxis]
    N, h, w, _ = images.shape

    batchsize = 4
    ntrain = 20
    npix = 2048
    nclasses = 4

    steps_per_epoch = ntrain * h * w / (batchsize * npix)
    # steps_per_epoch = 100
    print('steps_per_epoch:')
Beispiel #5
0
def train_pixelnet(dataset, batchsize, npix, max_epochs, validation_steps,
                   run_id, bottleneck):

    datadir = 'data'
    datafile = os.path.join(datadir, '{}.h5'.format(dataset))

    validation_set_path = os.path.join(
        datadir, '{}-validation-sets.json'.format(dataset))
    validation_set = data.load_validation_set(validation_set_path, run_id)

    if dataset == 'uhcs':
        nclasses = 4
        cropbar = 38
    elif dataset == 'spheroidite':
        nclasses = 2
        cropbar = None

    model_dir = os.path.join('models', 'crossval', dataset,
                             'run{:02d}'.format(run_id))
    os.makedirs(model_dir, exist_ok=True)

    images, labels, names = data.load_dataset(datafile, cropbar=cropbar)
    images = data.preprocess_images(images)

    # add a channel axis (of size 1 since these are grayscale inputs)
    images = images[:, :, :, np.newaxis]

    # train/validation split
    train_idx, val_idx = data.validation_split(validation_set, names)
    ntrain = len(train_idx)

    X_train, y_train, names_train = images[train_idx], labels[
        train_idx], names[train_idx]
    X_val, y_val, names_val = images[val_idx], labels[val_idx], names[val_idx]

    class_weight = np.array([
        y_train.size / np.sum(y_train == label) for label in np.unique(y_train)
    ])
    print(class_weight)

    # write the validation set to the model directory as well...
    with open(os.path.join(model_dir, 'validation_set.txt'), 'w') as vf:
        for name in names_val:
            print(name, file=vf)

    N, h, w, _ = images.shape

    steps_per_epoch = 5
    # steps_per_epoch = 100
    # steps_per_epoch = ntrain * h * w / (batchsize*npix)
    print('steps_per_epoch: {}'.format(steps_per_epoch))

    opt = optimizers.Adam(lr=1e-3)
    model = pixelnet_model(nclasses=nclasses, bottleneck=bottleneck)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['acc'])

    csv_logger = CSVLogger(os.path.join(model_dir, 'training-1.log'))
    checkpoint = ModelCheckpoint(os.path.join(
        model_dir, 'weights.{epoch:03d}-{val_loss:.4f}.hdf5'),
                                 save_best_only=False,
                                 save_weights_only=True,
                                 period=10)
    early_stopping = EarlyStopping(monitor='val_loss', patience=3)
    training_data = random_pixel_samples(X_train,
                                         y_train,
                                         nclasses=nclasses,
                                         replace_samples=False,
                                         rotation_range=360,
                                         zoom_range=0.5,
                                         horizontal_flip=True,
                                         vertical_flip=True,
                                         intensity_shift=0.05)

    # note: keras/engine/training.py:L132 --> is not None
    f = model.fit_generator(training_data,
                            steps_per_epoch,
                            epochs=max_epochs,
                            callbacks=[csv_logger, checkpoint],
                            validation_data=random_pixel_samples(
                                X_val,
                                y_val,
                                nclasses=nclasses,
                                replace_samples=False),
                            validation_steps=validation_steps,
                            class_weight=class_weight)

    # load best model and evaluate
    # sort by epoch -- use with ModelCheckpoint(..., save_best_only=True)
    # file path format should be 'weights.{epoch}...'
    weights_files = glob.glob(os.path.join(model_dir, 'weights*.hdf5'))
    best_weights = sorted(weights_files)[-1]

    # re-instantiate model because of keras requirement that tensors
    # have the same shape at train and test time
    model = pixelnet_model(nclasses=nclasses,
                           inference=True,
                           bottleneck=bottleneck)
    model.load_weights(best_weights)

    for X, y in [(X_train, y_train), (X_val, y_val)]:
        # run with batch_size=1 for inference due to dense feature upsampling
        p_validate = model.predict(X, batch_size=1)
        pred = np.argmax(p_validate, axis=-1)

        # measure accuracy over the whole validation set
        print('accuracy: {}'.format(perf.accuracy(pred, y)))
        print('IU_avg: {}'.format(perf.IU_avg(pred, y)))

        print('IU')
        for c in range(nclasses):
            iu = perf.IU(pred, y, c)
            print('IU({}): {}'.format(c, iu))