Beispiel #1
0
def load_fc_weights_from_vgg16bn(model):
    "Load weights for model from the dense layers of the Vgg16BN model."
    # See imagenet_batchnorm.ipynb for info on how the weights for
    # Vgg16BN can be generated from the standard Vgg16 weights.
    from vgg16bn import Vgg16BN
    vgg16_bn = Vgg16BN()
    _, fc_layers = utils.split_at(vgg16_bn.model, Convolution2D)
    utils.copy_weights(fc_layers, model.layers)
def hybrid_ensemble(number=1):
    batch_size=32
    target_size=(224, 224)

    reset_valid()
    set_valid(number=3)

    gen = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.05,
            height_shift_range=0.05, width_zoom_range=0.1, zoom_range=0.1,
            shear_range=0.1, channel_shift_range=10)
    trn_batches = gen.flow_from_directory(train_path, target_size=target_size,
                batch_size=batch_size, shuffle=True, class_mode='categorical')
    val_batches = gen.flow_from_directory(valid_path, target_size=target_size,
                batch_size=batch_size, shuffle=False, class_mode='categorical')

    VGGbn = Vgg16BN()
    VGGbn.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    VGGbn.fit_generator(trn_batches, trn_batches.n, nb_epoch=1,
                    validation_data=val_batches, nb_val_samples=val_batches.n)

    last_conv_idx = [index, for index, layer in enumerate(VGGbn.model.layers) \
                                            if type(layer) is Convolution2D][-1]
    Conv_layers = VGGbn.model.layers[:last_conv_idx + 1]
    Conv_model = Sequential(Conv_layers)

    trn_batches = gen.flow_from_directory(train_path, target_size=target_size,
                batch_size=batch_size, shuffle=False, class_mode='categorical')

    conv_features = Conv_model.predict_generator(trn_batches,
                                                        trn_batches.nb_sample)
    conv_val_feat = Conv_model.predict_generator(val_batches,
                                                        val_batches.nb_sample)
    predarray = []
    for n in xrange(number):
        reset_valid()
        set_valid(number=3)

        FC_model = Sequential(create_FCbn_layers(p=0.3))
        FC_model.compile(Adam(), loss='categorical_crossentropy',
                                                        metrics=['accuracy'])
        FC_model.fit(conv_features, trn_batches.labels, batch_size=batch_size,
                nb_epoch=1, validation_data=(conv_val_feat, val_batches.labels))

        gen_t = image.ImageDataGenerator()
        tst_batches = gen_t.flow_from_directory(test_path,
                        batch_size=batch_size, shuffle=False, class_mode=None)
        conv_tst_feat = Conv_model.predict_generator(tst_batches,
                                                        tst_batches.nb_sample)
        preds = FC_model.predict(conv_tst_feat, batch_size=batch_size*2)
        predarray.append(preds)
Beispiel #3
0
def main():

    path, datadir = path_setup()

    # create model with correct number of outputs
    # this pops off the existing last layer, sets remaining layers to not trainable
    # and puts on a layer with the correct number of outputs
    from vgg16bn import Vgg16BN
    vgg = Vgg16BN()
    vgg.ft(settings.NUM_CLASSES)
    model = vgg.model

    #load training data
    trn = ut.load_array(path, settings.TRAIN_FOLDER_NAME)
    val = ut.load_array(path, settings.VALIDATE_FOLDER_NAME)
    tst = ut.load_array(path, settings.TEST_FOLDER_NAME)

    # gen = image.ImageDataGenerator()

    from keras import optimizers
    model.compile(optimizer=optimizers.Adam(1e-3),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    (val_classes, trn_classes, val_labels, trn_labels, val_filenames,
     filenames, test_filenames) = ut.get_classes(path)

    # see https://stackoverflow.com/questions/41771965/error-when-checking-model-input-expected-convolution2d-input-1-to-have-shape-n
    trn = trn.transpose(0, 3, 1, 2)
    val = val.transpose(0, 3, 1, 2)
    # tst = tst.transpose(0, 3, 1, 2)

    model.fit(trn,
              trn_labels,
              batch_size=settings.BATCH_SIZE,
              nb_epoch=settings.NUMBER_EPOCHS,
              validation_data=(val, val_labels))

    file_best_weights = os.path.join(path, settings.CHECKPOINTFILE_2)
    print("saving bestweights to " + file_best_weights)
    model.save_weights(file_best_weights)
Beispiel #4
0
def load_dense_weights_from_vgg16bn(model):
    from vgg16bn import Vgg16BN
    vgg16_bn = Vgg16BN()
    _, dense_layers = split_model(vgg16_bn.model)
    copy_weights(dense_layers, model.layers)
Beispiel #5
0
save_array(path+'results/test_640.dat', test)


# In[6]:


trn = load_array(path+'results/trn_640.dat')
val = load_array(path+'results/val_640.dat')


# We can now create our VGG model - we'll need to tell it we're not using the normal 224x224 images, which also means it won't include the fully connected layers (since they don't make sense for non-default sizes). We will also remove the last max pooling layer, since we don't want to throw away information yet.

# In[74]:


vgg640 = Vgg16BN((360, 640)).model
vgg640.pop()
vgg640.input_shape, vgg640.output_shape
vgg640.compile(Adam(), 'categorical_crossentropy', metrics=['accuracy'])


# We can now pre-compute the output of the convolutional part of VGG.

# In[75]:


conv_val_feat = vgg640.predict(val, batch_size=32, verbose=1)
conv_trn_feat = vgg640.predict(trn, batch_size=32, verbose=1)


# In[76]:
Beispiel #6
0
from vgg16bn import Vgg16BN

vgg = Vgg16BN()
from utils import *

#Define path
path = "data/"
#path = "data/sample/"
results_path = path + 'results/'

input_shape = (224, 224)

#Define conv model
from vgg16bn import Vgg16BN
vgg16 = Vgg16BN()
vgg16.ft(8)
conv_layers, fc_layers = split_at(vgg16.model, Convolution2D)
conv_model = Sequential(conv_layers)

batch_size = 64
gen = image.ImageDataGenerator()
train_datagen = gen.flow_from_directory(path + 'train/',
                                        target_size=input_shape,
                                        batch_size=batch_size)
test_datagen = gen.flow_from_directory(path + 'test/',
                                       target_size=input_shape,
                                       batch_size=batch_size,
                                       shuffle=False)

s = FeatureSaver(train_datagen=train_datagen, test_datagen=test_datagen)
f = h5py.File(results_path + 'vgg_conv.h5', 'w')
s.save_train(conv_model, f, num_epochs=1)
s.save_test(conv_model, f)
            
            img = crop(img, img_row)
            img = rotate(img, img_row)
            img = img.resize((IMG_WIDTH, IMG_HEIGHT))
            yield np.expand_dims(np.moveaxis(np.asarray(img), 2, 0), 0)

def crop(img, img_row):
    return img.crop((img_row['xl'], img_row['yu'], img_row['xr'],img_row['yd']))

def rotate(img, img_row):
    result = img if(img_row['xr'] - img_row['xl'] > img_row['yd'] - img_row['yu']) else img.transpose(Image.ROTATE_90) 
    return result


# ## Neural net

# In[4]:

vgg640 = Vgg16BN((IMG_HEIGHT, IMG_WIDTH)).model
vgg640.pop()
vgg640.compile(Adam(), 'categorical_crossentropy', metrics=['accuracy'])


#compute features
vgg_features = vgg640.predict_generator(crop_rotate_scale_gen(), val_samples = len(os.listdir(DATA_PATH))
vgg_features = np.moveaxis(vgg_features, 1, 3)

#serialize vgg features
np.save('{}/vgg16_cropped_block5.npy'.format(FEATURE_PATH), vgg_features)

target_size = (224, 224)

# train/valid batch generators

gen = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.05,
        height_shift_range=0.05, width_zoom_range=0.1, zoom_range=0.1,
        shear_range=0.1, channel_shift_range=10)
# does it matter that I don't set dim_ordering='tf' ?

trn_batches = gen.flow_from_directory(train_path, target_size=target_size,
                batch_size=batch_size, shuffle=True, class_mode='categorical')
val_batches = gen.flow_from_directory(valid_path, target_size=target_size,
                batch_size=batch_size, shuffle=False, class_mode='categorical')

# load VGG16BN model & its weights
VGGbn = Vgg16BN()
VGGbn.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# (maybe) train the model at low η to train the Conv layers a bit
VGGbn.fit_generator(trn_batches, trn_batches.n, nb_epoch=1,
                    validation_data=val_batches, nb_val_samples=val_batches.n)
# find out how many epochs at what η to do this until it's ~optimal

# separate Conv layers & create new ConvNet (w/ vgg weights)
last_conv_idx = [index, for index, layer in enumerate(VGGbn.model.layers) \
                                            if type(layer) is Convolution2D][-1]
Conv_layers = VGGbn.model.layers[:last_conv_idx + 1]

# create new ConvNet from VGG16BN conv layers
Conv_model = Sequential(Conv_layers)
def main():

    path, datadir = path_setup()

    # create model with correct number of outputs
    # this pops off the existing last layer, sets remaining layers to not trainable
    # and puts on a layer with the correct number of outputs
    from vgg16bn import Vgg16BN
    vgg = Vgg16BN()
    vgg.ft(settings.NUM_CLASSES)
    model = vgg.model

    # now load wth the best weights (created by train_2_VGG_BN.py
    file_best_weights = os.path.join(path, settings.CHECKPOINTFILE_2)
    print("loading bestweights from " + file_best_weights)
    try:
        model.load_weights(file_best_weights)
    except OSError:
        print('WHOAH!!!! Run train_2_VGG_BN.py in order to create ' +
              settings.CHECKPOINTFILE_2)
        return

    #load training data
    trn = ut.load_array(path, settings.TRAIN_FOLDER_NAME)
    val = ut.load_array(path, settings.VALIDATE_FOLDER_NAME)
    tst = ut.load_array(path, settings.TEST_FOLDER_NAME)

    # see https://stackoverflow.com/questions/41771965/error-when-checking-model-input-expected-convolution2d-input-1-to-have-shape-n
    trn = trn.transpose(0, 3, 1, 2)
    val = val.transpose(0, 3, 1, 2)
    tst = tst.transpose(0, 3, 1, 2)

    (val_classes, trn_classes, val_labels, trn_labels, val_filenames,
     trn_filenames, tst_filenames) = ut.get_classes(path)

    # lets strip off the FC layers and just calculate the stacked conv layer outputs for the network
    # then create a fc network of appropriate input and output size) and use above conv outputs
    # to train it, much faster since most of the back prop compute is in the conv layers.
    # this means the only training is happenning in the FC layers
    conv_layers, fc_layers = ut.split_at(model, Conv2D)
    conv_model = Sequential(conv_layers)

    #whats it look like
    plot_model(conv_model,
               to_file=settings.CONV_LAYERS_MODEL_PNG,
               show_shapes=True)
    # display_summary("CONV model size is", conv_model)

    # get the predictions
    conv_trn_feat = load_CNN_Codes(conv_model, trn, path,
                                   settings.CONV_TRN_FEAT)
    conv_val_feat = load_CNN_Codes(conv_model, val, path,
                                   settings.CONV_VAL_FEAT)
    conv_tst_feat = load_CNN_Codes(conv_model, tst, path,
                                   settings.CONV_TST_FEAT)

    print("Feature shape:" + str(conv_val_feat.shape))

    #********************************************************************************
    # the follwoing bits create and train a fully connected top layer using CNN_Codes
    fc_top_layer_model = create_and_train_FC_top_layer_using_CNN_Codes(
        conv_layers, conv_trn_feat, conv_val_feat, path, trn_labels,
        val_labels)

    #********************************************************************************
    #tape the fully connected top layer on to the conv base layer and train the whole thingt once more (conv layers are not trainable)
    tape_FC_Top_layer_onto_conv_base_and_train(conv_model, fc_top_layer_model,
                                               trn, trn_labels, val,
                                               val_labels)

    ###########################################
    #there are thousands of images but only a few image sizes(<10), assumme each image size corresponds to a particular
    # boat and that each boat is going for a particular type of fish, then can we assumme a higher percentage of pix for
    # that kind of fish on that particular boat? (Probably, but the neural net has already taken this into account, so
    # no accuracy boost)
    create_new_model_that_includes_size_of_image_as_input(
        conv_trn_feat, conv_val_feat, model, path, trn_filenames, trn_labels,
        val_filenames, val_labels)
import os
os.chdir('Desktop/input')
from utils import *
from vgg16bn import Vgg16BN

path = ""
batch_size = 64
(val_classes, trn_classes, val_labels, trn_labels, val_filenames, filenames,
 test_filenames) = get_classes(path)

trn = get_data(path + 'train', (540, 960))
val = get_data(path + 'valid', (540, 960))
test = get_data(path + 'test', (540, 960))
vgg640 = Vgg16BN((540, 960)).model
vgg640.pop()
vgg640.input_shape, vgg640.output_shape
vgg640.compile(Adam(), 'categorical_crossentropy', metrics=['accuracy'])
##
conv_val_feat = vgg640.predict(val, batch_size=32, verbose=1)
conv_trn_feat = vgg640.predict(trn, batch_size=32, verbose=1)
conv_test_feat = vgg640.predict(test, batch_size=32, verbose=1)
##
save_array(path + 'results/conv_val_960.dat', conv_val_feat)
save_array(path + 'results/conv_trn_960.dat', conv_trn_feat)
save_array(path + 'results/conv_test_960.dat', conv_test_feat)
##
conv_val_feat = load_array(path + 'results/conv_val_960.dat')
conv_trn_feat = load_array(path + 'results/conv_trn_960.dat')
conv_test_feat = load_array(path + 'results/conv_test_960.dat')

conv_layers, _ = split_at(vgg640, Convolution2D)
Beispiel #12
0
def vgg(output_size, dropout=0.5):
    model = Vgg16BN((224, 224), True,
                    dropout=dropout).model  #NB: batch normalization added
    model.pop()
    model.add(Dense(output_size, activation='softmax'))
    return model
Beispiel #13
0
# val = get_batches(path+'valid', shuffle=False, batch_size=batch_size, class_mode=None, target_size=(360,640))

if not os.path.exists(path.split('stage1')[0] + 'results'):
    os.makedirs(path.split('stage1')[0] + 'results')
import glob
import pandas as pd

# save_array(path+'results/trn_640.dat', trn)
# save_array(path+'results/val_640.dat', val)
# save_array(path+'results/test_640.dat', test)
nb_train= len(glob.glob(path+'*/*.dcm'))
# nb_train_samples = 500
# nb_val= len(glob.glob(path+'valid/*/*.jpg'))
# nb_test= len(glob.glob('input/test/*/*.jpg'))
import numpy as np
vgg512 = Vgg16BN((512, 512), use_preprocess=True).model
vgg512.pop()
print(vgg512.input_shape, vgg512.output_shape)
vgg512.compile(Adam(), 'categorical_crossentropy', metrics=['accuracy'])
import time

train_csv_table = pd.read_csv('../input/stage1_labels.csv')
t0=time.time()

print(len([x[0] for x in os.walk(path)][1:]))
print(len(glob.glob(path+'*/*.dcm')))
patient_ids = [x[0].split('/')[-1] for x in os.walk(path)][1:]
for an_index,an_id in enumerate(patient_ids):
    if an_index %100==0:
        print(time.time()-t0)
        t0 = time.time()