Esempio n. 1
0
    def apply(inputs):
        filters = input_filters * expand_ratio
        if expand_ratio != 1:
            x = layers.Conv2D(
                filters,
                kernel_size=kernel_size,
                strides=strides,
                kernel_initializer=CONV_KERNEL_INITIALIZER,
                data_format="channels_last",
                padding="same",
                use_bias=False,
                name=name + "expand_conv",
            )(inputs)
            x = layers.BatchNormalization(axis=bn_axis,
                                          momentum=bn_momentum,
                                          name=name + "expand_bn")(x)
            x = layers.Activation(activation=activation,
                                  name=name + "expand_activation")(x)
        else:
            x = inputs

        # Squeeze and excite
        if 0 < se_ratio <= 1:
            filters_se = max(1, int(input_filters * se_ratio))
            se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x)
            if bn_axis == 1:
                se_shape = (filters, 1, 1)
            else:
                se_shape = (1, 1, filters)

            se = layers.Reshape(se_shape, name=name + "se_reshape")(se)

            se = layers.Conv2D(
                filters_se,
                1,
                padding="same",
                activation=activation,
                kernel_initializer=CONV_KERNEL_INITIALIZER,
                name=name + "se_reduce",
            )(se)
            se = layers.Conv2D(
                filters,
                1,
                padding="same",
                activation="sigmoid",
                kernel_initializer=CONV_KERNEL_INITIALIZER,
                name=name + "se_expand",
            )(se)

            x = layers.multiply([x, se], name=name + "se_excite")

        # Output phase:
        x = layers.Conv2D(
            output_filters,
            kernel_size=1 if expand_ratio != 1 else kernel_size,
            strides=1 if expand_ratio != 1 else strides,
            kernel_initializer=CONV_KERNEL_INITIALIZER,
            padding="same",
            use_bias=False,
            name=name + "project_conv",
        )(x)
        x = layers.BatchNormalization(axis=bn_axis,
                                      momentum=bn_momentum,
                                      name=name + "project_bn")(x)
        if expand_ratio == 1:
            x = layers.Activation(activation=activation,
                                  name=name + "project_activation")(x)

        # Residual:
        if strides == 1 and input_filters == output_filters:
            if survival_probability:
                x = layers.Dropout(
                    survival_probability,
                    noise_shape=(None, 1, 1, 1),
                    name=name + "drop",
                )(x)
            x = layers.add([x, inputs], name=name + "add")
        return x
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 24 15:13:56 2017

@author: ihong

ch5 - DL for computer vision
"""

# %% Convenet

from keras import layers
from keras import models

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
                        input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

#model.summary()
# Dense connected layers
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.summary()

## Triaining convnet -- MNIST
from keras.datasets import mnist
from keras.utils import to_categorical
Esempio n. 3
0
import keras
from keras import layers
from keras import models
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import tensorflowjs as tfjs

model = models.Sequential()
model.add(
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])

classes = [
    'zero', 'one', 'two', 'three', 'four', 'five', 'seven', 'eight', 'nine'
]

train_dir = './static/img/dataset/train'
import keras

# reznet = keras.applications.resnet.ResNet50(include_top=False, weights=None, input_tensor=None, input_shape=(51, 51, 1), pooling=None, classes=1000)
# out = reznet.get_layer('conv5_block3_add').output
# out = keras.layers.Flatten()(out)
# out = keras.layers.Dense(1024, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001), bias_regularizer=keras.regularizers.l2(0.001))(out)
# out = keras.layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001), bias_regularizer=keras.regularizers.l2(0.001))(out)
# out = keras.layers.Dense(32, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001), bias_regularizer=keras.regularizers.l2(0.001))(out)
# out = keras.layers.Dense(9)(out)
# model = keras.Model(reznet.input, out)
#
# import numpy as np
# a = np.zeros((51,51,1))
# model.predict(a.reshape(1,51,51,1))
# print(model.summary())
from  keras import layers, regularizers
in1 = layers.Input((51, 51, 1,))
m1 = layers.Conv2D(32, (4, 4), strides=(2, 2), activation='relu', input_shape=(51, 51, 1))(in1)
m1 = layers.Conv2D(64, (4, 4), strides=(2, 2), activation='relu')(m1)
m1 = layers.Conv2D(64, (3, 3), strides=(2, 2), activation='relu')(m1)
m1 = layers.Conv2D(64, (2, 2), strides=(1, 1), activation='relu')(m1)
m1 = layers.Flatten()(m1)
conv_model = keras.Model(in1, m1)

out = layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001), bias_regularizer=regularizers.l2(0.001))(m1)
out = layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.001), bias_regularizer=regularizers.l2(0.001))(out)
out = layers.Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.001), bias_regularizer=regularizers.l2(0.001))(out)
out = layers.Dense(9)(out)

model = keras.Model(in1, out)
model.summary()
Esempio n. 5
0
def Compute_parameters(n,m,k,l):
	"""
	How to compute the number of parameters in each cnn layer:
	Definition n--width of filter
		   m--height of filter
		   k--number of input feature maps 
		   l--number of output feature maps
	Then number of paramters  #= (n*m*k+1)*l
	"""
	print "# of paramteres in this layer=",(n*m*k+1)*l



if __name__=='__main__':
	model=models.Sequential()
	model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(28,28,1)))  ##32 is the depth of the kernal,(3,3) is the filter/kernal shape
	Compute_parameters(3,3,1,32)
	model.add(layers.MaxPooling2D(2,2))
	model.add(layers.Conv2D(64,(3,3),activation='relu'))
	Compute_parameters(3,3,32,64)
	model.add(layers.MaxPooling2D(2,2))
	model.add(layers.Conv2D(64,(3,3),activation='relu'))
	Compute_parameters(3,3,64,64)
	model.add(layers.Flatten())
	model.add(layers.Dense(64,activation='relu'))
	model.add(layers.Dense(10,activation='softmax'))
	print model.summary()
	(train_images,train_labels),(test_images,test_labels) = mnist.load_data()
	train_images=train_images.reshape((60000,28,28,1))
	train_images=train_images.astype('float32')/255
	test_images=test_images.reshape((10000,28,28,1))
Esempio n. 6
0
# model.add(layers.Dropout(0.5))
# model.add(layers.Dense(512, activation='relu'))
# model.add(layers.Dense(5, activation='softmax'))

# Conv2D since learning about images
# Maxpooling to halve the input in both dim
# AvgPool - Sum all of the values and dividing it by the total number of values
# MaxPool - Selecting the maximum value
# Dropout is used to prevent overfitting
# last layer is softmax since it is good for multi-class - gives prob
#

model = models.Sequential()
model.add(
    layers.Conv2D(filters=20,
                  kernel_size=(3, 3),
                  activation='relu',
                  input_shape=(img_size, img_size, 3)))
# model.add(layers.Dropout(0.25))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(filters=40, kernel_size=(3, 3), activation='relu'))
model.add(layers.Dropout(0.25))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(filters=80, kernel_size=(3, 3), activation='relu'))
# model.add(layers.Dropout(0.25))
# model.add(layers.Conv2D(filters=80, kernel_size=(3,3), activation='relu'))
model.add(layers.Dropout(0.25))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(filters=160, kernel_size=(3, 3), activation='relu'))
                                                     batch_size=20,
                                                     class_mode='binary')
validation_generator = validation_data_gen.flow_from_directory(
    validation_dir, target_size=(150, 150), batch_size=20, class_mode='binary')
# ImageDataGenerator()函数参数说明
# rotation_range:是角度值(0~180),表示图像随机旋转的角度范围
# width_shift, height_shift:图像在水平或者垂直方向上平移的范围(相对于总宽度或者总高度的比例)
# shear_range:随机错切变换的角度
# zoom_range:随机缩放的范围
# horizontal_flip:随机将一半图像水平翻转(因为现实世界的图像很少水平对称)
# fill_mode:用于填充新创建像素的方法,创建这些新的像素用于填充旋转或者平移产生的像素缺失

model = models.Sequential()
model.add(
    layers.Conv2D(32, (3, 3),
                  activation=activations.relu,
                  input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation=activations.relu))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation=activations.relu))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation=activations.relu))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation=activations.relu))
model.add(layers.Dense(1, activation=activations.sigmoid))

model.compile(loss=losses.binary_crossentropy,
              optimizer=optimizers.rmsprop(lr=1e-4),
    test_images = test_images.reshape(10000, 28, 28, 1)
    input_shape = (28, 28, 1)
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

# ラベルの準備
train_labels = to_categorical(train_labels, 10)
print("train_labels[0] : ", train_labels[0])
test_labels = to_categorical(test_labels, 10)
print("test_labels[0]  : ", test_labels[0])

# ニューラルネットワークの構築とコンパイル
network = models.Sequential()
network.add(
    layers.Conv2D(32,
                  kernel_size=(3, 3),
                  activation='relu',
                  input_shape=input_shape))
network.add(layers.Conv2D(64, (3, 3), activation='relu'))
network.add(layers.MaxPooling2D(pool_size=(2, 2)))
network.add(layers.Dropout(0.25))
network.add(layers.Flatten())
network.add(layers.Dense(128, activation='relu'))
network.add(layers.Dropout(0.5))
network.add(layers.Dense(10, activation='softmax'))
network.compile(optimizer='adadelta',
                loss='categorical_crossentropy',
                metrics=['accuracy'])
network.summary()

# 学習
network.fit(train_images, train_labels, epochs=20, batch_size=128)
Esempio n. 9
0
            images.append(img)
        elif file_name == label_file_name:
            for l in open('{}\\{}'.format(data_dir, file_name), 'r'):
                lbl = [0] * 3
                lbl[int(l)] = 1
                labels.append(lbl)
    # plt.imshow(images[0])
    # plt.show()
    # print(labels)
    # print(images[0])

    # 建立CNN模型:卷积层-池化层-卷积层-全连接层
    model = models.Sequential()
    model.add(
        layers.Conv2D(16, (3, 3),
                      padding='same',
                      activation='relu',
                      input_shape=(INPUT_SIZE, INPUT_SIZE, 1)))
    model.add(layers.MaxPooling2D((4, 4)))
    model.add(layers.Conv2D(16, (3, 3), padding='same', activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(32, activation='relu'))
    model.add(layers.Dense(3, activation='softmax'))
    model.summary()

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # 训练模型
    train_len = int(len(images) * TRAIN_PERCENTAGE)
    train_images = np.array(images[:train_len]).reshape(
Esempio n. 10
0
def conv_block(input_tensor,
               kernel_size,
               filters,
               stage,
               block,
               strides=(2, 2)):
    """A block that has a conv layer at shortcut.

    # Arguments
        input_tensor: input tensor
        kernel_size: default 3, the kernel size of
            middle conv layer at main path
        filters: list of integers, the filters of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names
        strides: Strides for the first conv layer in the block.

    # Returns
        Output tensor for the block.

    Note that from stage 3,
    the first conv layer at main path is with strides=(2, 2)
    And the shortcut should have strides=(2, 2) as well
    """
    filters1, filters2, filters3 = filters
    if backend.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = layers.Conv2D(filters1, (1, 1),
                      strides=strides,
                      kernel_initializer='he_normal',
                      name=conv_name_base + '2a')(input_tensor)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = layers.Activation('relu')(x)

    x = layers.Conv2D(filters2,
                      kernel_size,
                      padding='same',
                      kernel_initializer='he_normal',
                      name=conv_name_base + '2b')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = layers.Activation('relu')(x)

    x = layers.Conv2D(filters3, (1, 1),
                      kernel_initializer='he_normal',
                      name=conv_name_base + '2c')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    shortcut = layers.Conv2D(filters3, (1, 1),
                             strides=strides,
                             kernel_initializer='he_normal',
                             name=conv_name_base + '1')(input_tensor)
    shortcut = layers.BatchNormalization(axis=bn_axis,
                                         name=bn_name_base + '1')(shortcut)

    x = layers.add([x, shortcut])
    x = layers.Activation('relu')(x)
    return x
# Scale image data from range 0:255 to range 0:1
X_train = X_train / 255.0
X_test = X_test / 255.0


# one-hot encode the labels
Y_train = utils.to_categorical(Y_train)
Y_test = utils.to_categorical(Y_test)



##############################################
# Simple custom CNN Keras functional model
##############################################
inputs = layers.Input(shape=(32, 32, 3))
net = layers.Conv2D(32, kernel_size=(3, 3), padding='same')(inputs)
net = layers.Activation('relu')(net)
net = layers.BatchNormalization()(net)
net = layers.MaxPooling2D(pool_size=(2,2))(net)

net = layers.Conv2D(64, kernel_size=(3, 3), padding='same')(net)
net = layers.Activation('relu')(net)
net = layers.BatchNormalization()(net)
net = layers.MaxPooling2D(pool_size=(2,2))(net)

net = layers.Flatten()(net)
net = layers.Dropout(0.4)(net)
net = layers.Dense(512)(net)
net = layers.Activation('relu')(net)

net = layers.Dropout(0.4)(net)
Esempio n. 12
0
                      data_format='channels_first'),
], ids=['GRU', 'LSTM', 'ConvLSTM2D'])
def test_preprocess_weights_for_loading(layer):
    # A model is needed to initialize weights.
    _ = Sequential([layer])
    weights1 = layer.get_weights()
    weights2 = saving.preprocess_weights_for_loading(
        layer, convert_weights(layer, weights1),
        original_keras_version='1')
    assert all([np.allclose(x, y, 1e-5)
                for (x, y) in zip(weights1, weights2)])


@keras_test
@pytest.mark.parametrize("layer", [
    layers.Conv2D(2, (3, 3), input_shape=[5, 5, 3]),
    layers.Conv2DTranspose(2, (5, 5),
                           input_shape=[7, 7, 3],
                           data_format='channels_first'),
], ids=['Conv2D', 'Conv2DTranspose'])
def test_preprocess_weights_for_loading_for_model(layer):
    model = Sequential([layer])
    weights1 = model.get_weights()
    weights2 = saving.preprocess_weights_for_loading(
        model, convert_weights(layer, weights1),
        original_keras_version='1')
    assert all([np.allclose(x, y, 1e-5)
                for (x, y) in zip(weights1, weights2)])


@keras_test
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator

data = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')

labels = to_categorical(data.iloc[:, 0].values)
inputs = data.iloc[:, 1:].values.reshape(-1, 28, 28, 1)

x_train, x_test, y_train, y_test = train_test_split(inputs, labels)

model = models.Sequential()

model.add(
    layers.Conv2D(128, (7, 7),
                  activation='relu',
                  input_shape=(28, 28, 1),
                  padding='same'))
model.add(layers.Dropout(0.25))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(128, (7, 7), activation='relu', padding='same'))
model.add(layers.Dropout(0.25))

model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.BatchNormalization())
model.add(layers.Conv2D(128, (5, 5), activation='relu', padding='same'))
model.add(layers.Dropout(0.25))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(128, (5, 5), activation='relu', padding='same'))
model.add(layers.Dropout(0.25))
Esempio n. 14
0
def EfficientNetV2(
    width_coefficient,
    depth_coefficient,
    default_size,
    dropout_rate=0.2,
    drop_connect_rate=0.2,
    depth_divisor=8,
    min_depth=8,
    bn_momentum=0.9,
    activation="swish",
    blocks_args="default",
    model_name="efficientnetv2",
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
):
    """Instantiates the EfficientNetV2 architecture using given scaling coefficients.

  Args:
    width_coefficient: float, scaling coefficient for network width.
    depth_coefficient: float, scaling coefficient for network depth.
    default_size: integer, default input image size.
    dropout_rate: float, dropout rate before final classifier layer.
    drop_connect_rate: float, dropout rate at skip connections.
    depth_divisor: integer, a unit of network width.
    min_depth: integer, minimum number of filters.
    bn_momentum: float. Momentum parameter for Batch Normalization layers.
    activation: activation function.
    blocks_args: list of dicts, parameters to construct block modules.
    model_name: string, model name.
    include_top: whether to include the fully-connected layer at the top of the
      network.
    weights: one of `None` (random initialization), `"imagenet"` (pre-training
      on ImageNet), or the path to the weights file to be loaded.
    input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) or
      numpy array to use as image input for the model.
    input_shape: optional shape tuple, only to be specified if `include_top` is
      False. It should have exactly 3 inputs channels.
    pooling: optional pooling mode for feature extraction when `include_top` is
      `False`. - `None` means that the output of the model will be the 4D tensor
      output of the last convolutional layer. - "avg" means that global average
      pooling will be applied to the output of the last convolutional layer, and
      thus the output of the model will be a 2D tensor. - `"max"` means that
      global max pooling will be applied.
    classes: optional number of classes to classify images into, only to be
      specified if `include_top` is True, and if no `weights` argument is
      specified.
    classifier_activation: A string or callable. The activation function to use
      on the `"top"` layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the `"top"` layer.
    include_preprocessing: Boolean, whether to include the preprocessing layer
      (`Rescaling`) at the bottom of the network. Defaults to `True`.

  Returns:
    A `keras.Model` instance.

  Raises:
    ValueError: in case of invalid argument for `weights`,
      or invalid input shape.
    ValueError: if `classifier_activation` is not `"softmax"` or `None` when
      using a pretrained top layer.
  """

    if blocks_args == "default":
        blocks_args = DEFAULT_BLOCKS_ARGS[model_name]

    if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)):
        raise ValueError("The `weights` argument should be either "
                         "`None` (random initialization), `imagenet` "
                         "(pre-training on ImageNet), "
                         "or the path to the weights file to be loaded."
                         f"Received: weights={weights}")

    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            "If using `weights` as `'imagenet'` with `include_top`"
            " as true, `classes` should be 1000"
            f"Received: classes={classes}")

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=default_size,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    x = img_input

    if include_preprocessing:
        # Apply original V1 preprocessing for Bx variants
        # if number of channels allows it
        num_channels = input_shape[bn_axis - 1]
        if model_name.split("-")[-1].startswith("b") and num_channels == 3:
            x = layers.Rescaling(scale=1. / 255)(x)
            x = layers.Normalization(
                mean=[0.485, 0.456, 0.406],
                variance=[0.229**2, 0.224**2, 0.225**2],
                axis=bn_axis,
            )(x)
        else:
            x = layers.Rescaling(scale=1. / 128.0, offset=-1)(x)

    # Build stem
    stem_filters = round_filters(
        filters=blocks_args[0]["input_filters"],
        width_coefficient=width_coefficient,
        min_depth=min_depth,
        depth_divisor=depth_divisor,
    )
    x = layers.Conv2D(
        filters=stem_filters,
        kernel_size=3,
        strides=2,
        kernel_initializer=CONV_KERNEL_INITIALIZER,
        padding="same",
        use_bias=False,
        name="stem_conv",
    )(x)
    x = layers.BatchNormalization(
        axis=bn_axis,
        momentum=bn_momentum,
        name="stem_bn",
    )(x)
    x = layers.Activation(activation, name="stem_activation")(x)

    # Build blocks
    blocks_args = copy.deepcopy(blocks_args)
    b = 0
    blocks = float(sum(args["num_repeat"] for args in blocks_args))

    for (i, args) in enumerate(blocks_args):
        assert args["num_repeat"] > 0

        # Update block input and output filters based on depth multiplier.
        args["input_filters"] = round_filters(
            filters=args["input_filters"],
            width_coefficient=width_coefficient,
            min_depth=min_depth,
            depth_divisor=depth_divisor)
        args["output_filters"] = round_filters(
            filters=args["output_filters"],
            width_coefficient=width_coefficient,
            min_depth=min_depth,
            depth_divisor=depth_divisor)

        # Determine which conv type to use:
        block = {0: MBConvBlock, 1: FusedMBConvBlock}[args.pop("conv_type")]
        repeats = round_repeats(repeats=args.pop("num_repeat"),
                                depth_coefficient=depth_coefficient)
        for j in range(repeats):
            # The first block needs to take care of stride and filter size increase.
            if j > 0:
                args["strides"] = 1
                args["input_filters"] = args["output_filters"]

            x = block(
                activation=activation,
                bn_momentum=bn_momentum,
                survival_probability=drop_connect_rate * b / blocks,
                name="block{}{}_".format(i + 1, chr(j + 97)),
                **args,
            )(x)

    # Build top
    top_filters = round_filters(filters=1280,
                                width_coefficient=width_coefficient,
                                min_depth=min_depth,
                                depth_divisor=depth_divisor)
    x = layers.Conv2D(
        filters=top_filters,
        kernel_size=1,
        strides=1,
        kernel_initializer=CONV_KERNEL_INITIALIZER,
        padding="same",
        data_format="channels_last",
        use_bias=False,
        name="top_conv",
    )(x)
    x = layers.BatchNormalization(
        axis=bn_axis,
        momentum=bn_momentum,
        name="top_bn",
    )(x)
    x = layers.Activation(activation=activation, name="top_activation")(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name="top_dropout")(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,
                         activation=classifier_activation,
                         kernel_initializer=DENSE_KERNEL_INITIALIZER,
                         bias_initializer=tf.constant_initializer(0),
                         name="predictions")(x)
    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = training.Model(inputs, x, name=model_name)

    # Load weights.
    if weights == "imagenet":
        if include_top:
            file_suffix = ".h5"
            file_hash = WEIGHTS_HASHES[model_name[-2:]][0]
        else:
            file_suffix = "_notop.h5"
            file_hash = WEIGHTS_HASHES[model_name[-2:]][1]
        file_name = model_name + file_suffix
        weights_path = data_utils.get_file(file_name,
                                           BASE_WEIGHTS_PATH + file_name,
                                           cache_subdir="models",
                                           file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 15
0
print('# of validation images:', validation['features'].shape[0])

# Pad images with 0s
train['features'] = np.pad(train['features'], ((0, 0), (2, 2), (2, 2), (0, 0)),
                           'constant')
validation['features'] = np.pad(validation['features'],
                                ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
test['features'] = np.pad(test['features'], ((0, 0), (2, 2), (2, 2), (0, 0)),
                          'constant')

print("Updated Image Shape: {}".format(train['features'][0].shape))

model = keras.Sequential()
model.add(
    layers.Conv2D(filters=6,
                  kernel_size=(3, 3),
                  activation='relu',
                  input_shape=(32, 32, 1)))
model.add(layers.AveragePooling2D())
model.add(layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
model.add(layers.AveragePooling2D())
model.add(layers.Flatten())
model.add(layers.Dense(units=120, activation='relu'))
model.add(layers.Dense(units=84, activation='relu'))
model.add(layers.Dense(units=10, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

X_train, y_train = train['features'], to_categorical(train['labels'])
X_validation, y_validation = validation['features'], to_categorical(
    validation['labels'])
Esempio n. 16
0
    def construct_graph(self, input_tensor, stage5=True):
        assert self.input_tensor is not None, "input_tensor can not be none!"
        # Stage 1
        x = KL.ZeroPadding2D((3, 3))(input_tensor)
        x = KL.Conv2D(64, (7, 7), strides=(2, 2), name='conv1',
                      use_bias=True)(x)
        x = BatchNorm(axis=3, name='bn_conv1')(x)
        x = KL.Activation('relu')(x)
        C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x)
        # Stage 2
        x = self.conv_block(x,
                            3, [64, 64, 256],
                            stage=2,
                            block='a',
                            strides=(1, 1))
        x = self.identity_block(x, 3, [64, 64, 256], stage=2, block='b')
        C2 = x = self.identity_block(x, 3, [64, 64, 256], stage=2, block='c')
        # Stage 3
        x = self.conv_block(x, 3, [128, 128, 512], stage=3, block='a')
        x = self.identity_block(x, 3, [128, 128, 512], stage=3, block='b')
        x = self.identity_block(x, 3, [128, 128, 512], stage=3, block='c')
        C3 = x = self.identity_block(x, 3, [128, 128, 512], stage=3, block='d')
        # Stage 4
        x = self.conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
        block_count = {"resnet50": 5, "resnet101": 22}[self.architecture]
        for i in range(block_count):
            x = self.identity_block(x,
                                    3, [256, 256, 1024],
                                    stage=4,
                                    block=chr(98 + i))
        C4 = x
        # Stage 5
        if stage5:
            x = self.conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
            x = self.identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
            C5 = x = self.identity_block(x,
                                         3, [512, 512, 2048],
                                         stage=5,
                                         block='c')
        else:
            C5 = None

        P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5)
        P4 = KL.Add(name="fpn_p4add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
            KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4)
        ])
        P3 = KL.Add(name="fpn_p3add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
            KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3)
        ])
        P2 = KL.Add(name="fpn_p2add")([
            KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
            KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2)
        ])

        # Attach 3x3 conv to all P layers to get the final feature maps.
        P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2)
        P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3)
        P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4)
        P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5)
        # P6 is used for the 5th anchor scale in RPN. Generated by
        # subsampling from P5 with stride of 2.
        P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)

        self.output_layers = [P2, P3, P4, P5, P6]
Esempio n. 17
0
def vgg16(input_shape=(512, 512, 3),
          input_tensor=None,
          pretrained_weights_path=None,
          output_stride=16):
    if input_tensor is None:
        img_input = layers.Input(shape=input_shape, name='img_input')
    else:
        img_input = input_tensor
    x = layers.BatchNormalization()(img_input)

    # Block 1
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv1')(x)
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
    f1 = x

    # Block 2
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv1')(x)
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
    f2 = x

    # Block 3
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv1')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv2')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
    f3 = x

    # Block 4
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv3')(x)
    if output_stride > 16:
        x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
    f4 = x

    # Block 5
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv3')(x)
    if output_stride > 8:
        x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
    f5 = x

    if pretrained_weights_path is not None and os.path.exists(
            pretrained_weights_path):
        log('load pretrained encoder weights from `{}`'.format(
            pretrained_weights_path))
        Model(img_input, x,
              name='vgg16_encoder').load_weights(pretrained_weights_path)

    if output_stride == 8:
        features = [f1, f2, f5, f5, f5]
    elif output_stride == 16:
        features = [f1, f2, f3, f5, f5]
    else:
        features = [f1, f2, f3, f4, f5]

    return img_input, features
Esempio n. 18
0
def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha,
                          depth_multiplier=1, strides=(1, 1), block_id=1):
    """Adds a depthwise convolution block.
    A depthwise convolution block consists of a depthwise conv,
    batch normalization, relu6, pointwise convolution,
    batch normalization and relu6 activation.
    # Arguments
        inputs: Input tensor of shape `(rows, cols, channels)`
            (with `channels_last` data format) or
            (channels, rows, cols) (with `channels_first` data format).
        pointwise_conv_filters: Integer, the dimensionality of the output space
            (i.e. the number of output filters in the pointwise convolution).
        alpha: controls the width of the network.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        depth_multiplier: The number of depthwise convolution output channels
            for each input channel.
            The total number of depthwise convolution output
            channels will be equal to `filters_in * depth_multiplier`.
        strides: An integer or tuple/list of 2 integers,
            specifying the strides of the convolution
            along the width and height.
            Can be a single integer to specify the same value for
            all spatial dimensions.
            Specifying any stride value != 1 is incompatible with specifying
            any `dilation_rate` value != 1.
        block_id: Integer, a unique identification designating
            the block number.
    # Input shape
        4D tensor with shape:
        `(batch, channels, rows, cols)` if data_format='channels_first'
        or 4D tensor with shape:
        `(batch, rows, cols, channels)` if data_format='channels_last'.
    # Output shape
        4D tensor with shape:
        `(batch, filters, new_rows, new_cols)`
        if data_format='channels_first'
        or 4D tensor with shape:
        `(batch, new_rows, new_cols, filters)`
        if data_format='channels_last'.
        `rows` and `cols` values might have changed due to stride.
    # Returns
        Output tensor of block.
    """
    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
    pointwise_conv_filters = int(pointwise_conv_filters * alpha)

    if strides == (1, 1):
        x = inputs
    else:
        x = layers.ZeroPadding2D(((0, 1), (0, 1)),
                                 name='conv_pad_%d' % block_id)(inputs)
    x = layers.DepthwiseConv2D((3, 3),
                               padding='same' if strides == (1, 1) else 'valid',
                               depth_multiplier=depth_multiplier,
                               strides=strides,
                               use_bias=False,
                               name='conv_dw_%d' % block_id)(x)
    x = layers.BatchNormalization(
        axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x)
    x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x)

    x = layers.Conv2D(pointwise_conv_filters, (1, 1),
                      padding='same',
                      use_bias=False,
                      strides=(1, 1),
                      name='conv_pw_%d' % block_id)(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='conv_pw_%d_bn' % block_id)(x)
    return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x)
Esempio n. 19
0
                          is_sobel=args.sobel,
                          channel=args.channel,
                          is_gray=args.is_gray,
                          orig=True)

    #build
    input_x = kl.Input(shape=args.input_sz, name='input1')
    input_gx = kl.Input(shape=args.input_sz, name='input2')

    base_model = BaseModel(args.input_sz)

    head_a = [
        kl.Conv2D(
            args.cluster_a,
            kernel_size=1,
            strides=1,
            activation='softmax',
            kernel_initializer=ki.he_normal(),
            # bias_initializer = ki.Zeros(),
            name='a' + str(i)) for i in range(args.head_a_num)
    ]

    head_b = kl.Conv2D(
        args.cluster_b,
        kernel_size=1,
        strides=1,
        activation='softmax',
        kernel_initializer=ki.he_normal(),
        # bias_initializer = ki.Zeros(),
        name='b')

    x = base_model(input_x)
Esempio n. 20
0
    def get_fcn8s_model(input_shape=(224, 224, 3), class_no=21):
        """
        FCN 8 模型
        :param input_shape: (输入图片长,输入图片宽,RGB层数),注意长宽最好是32的倍数
        :param class_no: 类别数量
        :return: Keras模型
        """
        input_tensor = layers.Input(shape=input_shape)
        x = layers.ZeroPadding2D(padding=(99, 99))(input_tensor)  # Pad 100, 99 + 1 in first layer of vgg
        with tf.variable_scope("vgg_encoder"):
            encoder = VGG16(input_tensor=x, include_top=False, weights='imagenet')

        with tf.variable_scope("vgg_decoder"):
            with tf.variable_scope("fcn_32s"):
                x = encoder.get_layer('block5_pool').output  # 拿pool5的输出
                # 卷积做降采用
                x = layers.Conv2D(filters=4096, kernel_size=(7, 7), activation='relu', padding='valid', name='fc6')(x)
                x = layers.Dropout(0.5)(x)
                x = layers.Conv2D(filters=4096, kernel_size=(1, 1), activation='relu', padding='valid', name='fc7')(x)
                x = layers.Dropout(0.5)(x)

                # 使用 1x1卷积 做卷积操作,模拟全链接层操作
                x = layers.Conv2D(filters=class_no, kernel_size=(1, 1), padding='valid')(x)

            # 使用反卷积做Upsampling到2倍
            x = layers.Conv2DTranspose(filters=class_no, kernel_size=(4, 4), strides=(2, 2), padding='same',
                                       use_bias=False, name='upsampling1')(x)

            with tf.variable_scope("fcn_16s"):
                pool4_output = encoder.get_layer('block4_pool').output  # 拿pool4的输出
                pool4_output = layers.Conv2D(filters=class_no, kernel_size=(1, 1), padding='valid')(pool4_output)

            # 裁剪到2x2 大小
            pool4_crop = FCN.center_crop(pool4_output, x)
            x = layers.add([x, pool4_crop])

            with tf.variable_scope("fcn_8s"):
                pool3_output = encoder.get_layer('block3_pool').output  # 拿pool3的输出
                pool3_output = layers.Conv2D(filters=class_no, kernel_size=(1, 1), padding='valid')(pool3_output)

            # 使用反卷积做Upsampling到4倍
            x = layers.Conv2DTranspose(filters=class_no, kernel_size=(4, 4), strides=(2, 2),
                                       padding='same', use_bias=False, name='upsampling2')(x)
            # 中心裁剪
            pool3_crop = FCN.center_crop(pool3_output, x)
            x = layers.add([x, pool3_crop])

            # 使用反卷积做Upsampling
            x = layers.Conv2DTranspose(filters=class_no, kernel_size=(16, 16), strides=(8, 8), padding='same',
                                       use_bias=False, name='upsampling3')(x)

        # 如果size不够,再做一个Bilinear的Upsampling(通常在图片size不为32的倍数时候需要)
        if K.int_shape(x)[1:3] != K.int_shape(input_tensor)[1:3]:
            print('Size different, do Bilinear Upsampling')
            x = layers.Lambda(lambda x: tf.image.resize_bilinear(x, size=K.int_shape(input_tensor)[1:3]))(x)

        # 对输出的每一个像素的各类别(即各通道)的输出使用softmax
        x = layers.Activation('softmax', name='output')(x)

        model = models.Model(inputs=input_tensor, outputs=x)

        return model
y_val.shape


# In[37]:


#add in a Conv2D (64) and maxPooling2D ((2, 2))

#now, use dropout (0.05, 0.15, 0.25, 0.50)

#change dense 256 -> 512 ...

#only use low batch_size=64

model30 = models.Sequential()
model30.add(layers.Conv2D(64, (3, 3), activation='relu',
                        input_shape=(64, 64, 1)))
model30.add(layers.MaxPooling2D((2, 2)))
model30.add(layers.Flatten())
model30.add(layers.Dropout(0.05))
model30.add(layers.Dense(256, activation='relu'))
model30.add(layers.Dense(1, activation='sigmoid'))

##########################################################

model30.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])

##########################################################

processing_time = []
Esempio n. 22
0
def EfficientNet(input_shape,
                 block_args_list,
                 global_params,
                 include_top=True,
                 pooling=None):
    batch_norm_momentum = global_params.batch_norm_momentum
    batch_norm_epsilon = global_params.batch_norm_epsilon
    if global_params.data_format == "channels_first":
        channel_axis = 1
    else:
        channel_axis = -1

    # Stem part
    inputs = KL.Input(shape=input_shape)
    x = inputs
    x = KL.Conv2D(
        filters=round_filters(32, global_params),
        kernel_size=[3, 3],
        strides=[2, 2],
        kernel_initializer=conv_kernel_initializer,
        padding="same",
        use_bias=False,
    )(x)
    x = KL.BatchNormalization(axis=channel_axis,
                              momentum=batch_norm_momentum,
                              epsilon=batch_norm_epsilon)(x)
    x = Swish()(x)

    # Blocks part
    block_idx = 1
    n_blocks = sum([block_args.num_repeat for block_args in block_args_list])
    drop_rate = global_params.drop_connect_rate or 0
    drop_rate_dx = drop_rate / n_blocks

    for block_args in block_args_list:
        assert block_args.num_repeat > 0
        # Update block input and output filters based on depth multiplier.
        block_args = block_args._replace(
            input_filters=round_filters(block_args.input_filters,
                                        global_params),
            output_filters=round_filters(block_args.output_filters,
                                         global_params),
            num_repeat=round_repeats(block_args.num_repeat, global_params),
        )

        # The first block needs to take care of stride and filter size increase.
        x = MBConvBlock(block_args,
                        global_params,
                        drop_connect_rate=drop_rate_dx * block_idx)(x)
        block_idx += 1

        if block_args.num_repeat > 1:
            block_args = block_args._replace(
                input_filters=block_args.output_filters, strides=[1, 1])

        for _ in xrange(block_args.num_repeat - 1):
            x = MBConvBlock(block_args,
                            global_params,
                            drop_connect_rate=drop_rate_dx * block_idx)(x)
            block_idx += 1

    # Head part
    x = KL.Conv2D(
        filters=round_filters(1280, global_params),
        kernel_size=[1, 1],
        strides=[1, 1],
        kernel_initializer=conv_kernel_initializer,
        padding="same",
        use_bias=False,
    )(x)
    x = KL.BatchNormalization(axis=channel_axis,
                              momentum=batch_norm_momentum,
                              epsilon=batch_norm_epsilon)(x)
    x = Swish()(x)

    if include_top:
        x = KL.GlobalAveragePooling2D(data_format=global_params.data_format)(x)
        if global_params.dropout_rate > 0:
            x = KL.Dropout(global_params.dropout_rate)(x)
        x = KL.Dense(global_params.num_classes,
                     kernel_initializer=dense_kernel_initializer)(x)
        x = KL.Activation("softmax")(x)
    else:
        if pooling == "avg":
            x = KL.GlobalAveragePooling2D(
                data_format=global_params.data_format)(x)
        elif pooling == "max":
            x = KL.GlobalMaxPooling2D(data_format=global_params.data_format)(x)

    outputs = x
    model = KM.Model(inputs, outputs)

    return model
Esempio n. 23
0
def get_model():
    inpl = layers.Input((360, 640, 3))
    inpr = layers.Input((360, 640, 3))
    inpc = layers.Input((360, 640, 3))
    xl = layers.Conv2D(32, 3, strides=(2, 2))(inpl)
    xl = layers.BatchNormalization()(xl)
    xl = layers.Activation('relu')(xl)
    xl = layers.Conv2D(32, 3)(xl)
    xl = layers.BatchNormalization()(xl)
    xl = layers.Activation('relu')(xl)
    # xl = layers.MaxPooling2D()(xl)

    xr = layers.Conv2D(32, 3, strides=(2, 2))(inpr)
    xr = layers.BatchNormalization()(xr)
    xr = layers.Activation('relu')(xr)
    xr = layers.Conv2D(32, 3)(xr)
    xr = layers.BatchNormalization()(xr)
    xr = layers.Activation('relu')(xr)
    # xr = layers.MaxPooling2D()(xr)

    # xc = layers.Conv2D(32, 3, 2)(inpc)
    # xc = layers.BatchNormalization()(xc)
    # xc = layers.Activation('relu')(xc)
    # xc = layers.Conv2D(32, 3)(xc)
    # xc = layers.BatchNormalization()(xc)
    # xc = layers.Activation('relu')(xc)
    # xc = layers.MaxPooling2D()(xc)

    # x = layers.concatenate((xl, xc, xr))
    x = layers.concatenate([xl, xr])
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(96, 3, strides=(2, 2))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(128, 3)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(128, 3)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(256, 3)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(256, 3)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(512, 3)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(512, 3)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    # x = layers.GlobalAveragePooling2D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(2, activation='sigmoid')(x)

    # model = models.Model([inpl, inpc, inpr], x)
    model = models.Model([inpl, inpr], x)
    model.compile(optimizer='adam', loss='mae')
    return model
Esempio n. 24
0
    def __init__(self, **kwargs):
        super(WeightsNormalize, self).__init__(**kwargs)

        self.conv = layers.Conv2D(3, kernel_size=1)
        self.concat = layers.Concatenate(axis=-1)
        self.softmax = layers.Softmax()
def Conv2DClassifierIn1(x_train, y_train, x_test, y_test):
    summary = True
    verbose = 1

    # setHyperParams------------------------------------------------------------------------------------------------
    batch_size = {{choice([32, 64, 128, 256, 512])}}
    epoch = {{choice([25, 50, 75, 100, 125, 150, 175, 200])}}

    conv_block = {{choice(['two', 'three', 'four'])}}

    conv1_num = {{choice([8, 16, 32, 64])}}
    conv2_num = {{choice([16, 32, 64, 128])}}
    conv3_num = {{choice([32, 64, 128])}}
    conv4_num = {{choice([32, 64, 128, 256])}}

    dense1_num = {{choice([128, 256, 512])}}
    dense2_num = {{choice([64, 128, 256])}}

    l1_regular_rate = {{uniform(0.00001, 1)}}
    l2_regular_rate = {{uniform(0.000001, 1)}}
    drop1_num = {{uniform(0.1, 1)}}
    drop2_num = {{uniform(0.0001, 1)}}

    activator = {{choice(['elu', 'relu', 'tanh'])}}
    optimizer = {{choice(['adam', 'rmsprop', 'SGD'])}}

    #---------------------------------------------------------------------------------------------------------------
    kernel_size = (3, 3)
    pool_size = (2, 2)
    initializer = 'random_uniform'
    padding_style = 'same'
    loss_type = 'binary_crossentropy'
    metrics = ['accuracy']
    my_callback = None
    # early_stopping = EarlyStopping(monitor='val_loss', patience=4)
    # checkpointer = ModelCheckpoint(filepath='keras_weights.hdf5',
    #                                verbose=1,
    #                                save_best_only=True)
    # my_callback = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
    #                                           patience=5, min_lr=0.0001)

    # build --------------------------------------------------------------------------------------------------------
    input_layer = Input(shape=x_train.shape[1:])
    conv = layers.Conv2D(conv1_num,
                         kernel_size,
                         padding=padding_style,
                         kernel_initializer=initializer,
                         activation=activator)(input_layer)
    conv = layers.Conv2D(conv1_num,
                         kernel_size,
                         padding=padding_style,
                         kernel_initializer=initializer,
                         activation=activator)(conv)
    pool = layers.MaxPooling2D(pool_size, padding=padding_style)(conv)
    if conv_block == 'two':
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)
    elif conv_block == 'three':
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)

        conv = layers.Conv2D(conv3_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv3_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)
    elif conv_block == 'four':
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)

        conv = layers.Conv2D(conv3_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv3_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)

        conv = layers.Conv2D(conv4_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv4_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)

    flat = layers.Flatten()(pool)
    drop = layers.Dropout(drop1_num)(flat)

    dense = layers.Dense(dense1_num,
                         activation=activator,
                         kernel_regularizer=regularizers.l1_l2(
                             l1=l1_regular_rate, l2=l2_regular_rate))(drop)
    BatchNorm = layers.BatchNormalization(axis=-1)(dense)
    drop = layers.Dropout(drop2_num)(BatchNorm)

    dense = layers.Dense(dense2_num,
                         activation=activator,
                         kernel_regularizer=regularizers.l1_l2(
                             l1=l1_regular_rate, l2=l2_regular_rate))(drop)

    output_layer = layers.Dense(len(np.unique(y_train)),
                                activation='softmax')(dense)

    model = models.Model(inputs=input_layer, outputs=output_layer)

    if summary:
        model.summary()

# train(self):
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(y_train),
                                                      y_train.reshape(-1))
    class_weights_dict = dict(enumerate(class_weights))
    model.compile(
        optimizer=optimizer,
        loss=loss_type,
        metrics=metrics  # accuracy
    )

    result = model.fit(x=x_train,
                       y=y_train,
                       batch_size=batch_size,
                       epochs=epoch,
                       verbose=verbose,
                       callbacks=my_callback,
                       validation_data=(x_test, y_test),
                       shuffle=True,
                       class_weight=class_weights_dict)

    validation_acc = np.amax(result.history['val_acc'])
    print('Best validation acc of epoch:', validation_acc)
    return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}
def test_TimeDistributed():
    # first, test with Dense layer
    model = Sequential()
    model.add(wrappers.TimeDistributed(layers.Dense(2), input_shape=(3, 4)))
    model.add(layers.Activation('relu'))
    model.compile(optimizer='rmsprop', loss='mse')
    model.fit(np.random.random((10, 3, 4)),
              np.random.random((10, 3, 2)),
              epochs=1,
              batch_size=10)

    # test config
    model.get_config()

    # test when specifying a batch_input_shape
    test_input = np.random.random((1, 3, 4))
    test_output = model.predict(test_input)
    weights = model.layers[0].get_weights()

    reference = Sequential()
    reference.add(
        wrappers.TimeDistributed(layers.Dense(2), batch_input_shape=(1, 3, 4)))
    reference.add(layers.Activation('relu'))
    reference.compile(optimizer='rmsprop', loss='mse')
    reference.layers[0].set_weights(weights)

    reference_output = reference.predict(test_input)
    assert_allclose(test_output, reference_output, atol=1e-05)

    # test with Embedding
    model = Sequential()
    model.add(
        wrappers.TimeDistributed(layers.Embedding(5, 6),
                                 batch_input_shape=(10, 3, 4),
                                 dtype='int32'))
    model.compile(optimizer='rmsprop', loss='mse')
    model.fit(np.random.randint(5, size=(10, 3, 4), dtype='int32'),
              np.random.random((10, 3, 4, 6)),
              epochs=1,
              batch_size=10)

    # compare to not using batch_input_shape
    test_input = np.random.randint(5, size=(10, 3, 4), dtype='int32')
    test_output = model.predict(test_input)
    weights = model.layers[0].get_weights()

    reference = Sequential()
    reference.add(
        wrappers.TimeDistributed(layers.Embedding(5, 6),
                                 input_shape=(3, 4),
                                 dtype='int32'))
    reference.compile(optimizer='rmsprop', loss='mse')
    reference.layers[0].set_weights(weights)

    reference_output = reference.predict(test_input)
    assert_allclose(test_output, reference_output, atol=1e-05)

    # test with Conv2D
    model = Sequential()
    model.add(
        wrappers.TimeDistributed(layers.Conv2D(5, (2, 2), padding='same'),
                                 input_shape=(2, 4, 4, 3)))
    model.add(layers.Activation('relu'))
    model.compile(optimizer='rmsprop', loss='mse')
    model.train_on_batch(np.random.random((1, 2, 4, 4, 3)),
                         np.random.random((1, 2, 4, 4, 5)))

    model = model_from_json(model.to_json())
    model.summary()

    # test stacked layers
    model = Sequential()
    model.add(wrappers.TimeDistributed(layers.Dense(2), input_shape=(3, 4)))
    model.add(wrappers.TimeDistributed(layers.Dense(3)))
    model.add(layers.Activation('relu'))
    model.compile(optimizer='rmsprop', loss='mse')

    model.fit(np.random.random((10, 3, 4)),
              np.random.random((10, 3, 3)),
              epochs=1,
              batch_size=10)

    # test wrapping Sequential model
    model = Sequential()
    model.add(layers.Dense(3, input_dim=2))
    outer_model = Sequential()
    outer_model.add(wrappers.TimeDistributed(model, input_shape=(3, 2)))
    outer_model.compile(optimizer='rmsprop', loss='mse')
    outer_model.fit(np.random.random((10, 3, 2)),
                    np.random.random((10, 3, 3)),
                    epochs=1,
                    batch_size=10)

    # test with functional API
    x = Input(shape=(3, 2))
    y = wrappers.TimeDistributed(model)(x)
    outer_model = Model(x, y)
    outer_model.compile(optimizer='rmsprop', loss='mse')
    outer_model.fit(np.random.random((10, 3, 2)),
                    np.random.random((10, 3, 3)),
                    epochs=1,
                    batch_size=10)

    # test with BatchNormalization
    model = Sequential()
    model.add(
        wrappers.TimeDistributed(layers.BatchNormalization(center=True,
                                                           scale=True),
                                 name='bn',
                                 input_shape=(10, 2)))
    model.compile(optimizer='rmsprop', loss='mse')
    # Assert that mean and variance are 0 and 1.
    td = model.layers[0]
    assert np.array_equal(td.get_weights()[2], np.array([0, 0]))
    assert np.array_equal(td.get_weights()[3], np.array([1, 1]))
    # Train
    model.train_on_batch(np.random.normal(loc=2, scale=2, size=(1, 10, 2)),
                         np.broadcast_to(np.array([0, 1]), (1, 10, 2)))
    # Assert that mean and variance changed.
    assert not np.array_equal(td.get_weights()[2], np.array([0, 0]))
    assert not np.array_equal(td.get_weights()[3], np.array([1, 1]))
    # Verify input_map has one mapping from inputs to reshaped inputs.
    uid = object_list_uid(model.inputs)
    assert len(td._input_map.keys()) == 1
    assert uid in td._input_map
    assert K.int_shape(td._input_map[uid]) == (None, 2)
Esempio n. 27
0
def build_model():
    from keras import models, layers
    # Build U-Net model
    def upsample_conv(filters, kernel_size, strides, padding):
        return layers.Conv2DTranspose(filters, kernel_size, strides=strides, padding=padding)
    def upsample_simple(filters, kernel_size, strides, padding):
        return layers.UpSampling2D(strides)
    
    if UPSAMPLE_MODE=='DECONV':
        upsample=upsample_conv
    else:
        upsample=upsample_simple
        
    input_img = layers.Input(t_x.shape[1:], name = 'RGB_Input')
    pp_in_layer = input_img
    
    if NET_SCALING is not None:
        pp_in_layer = layers.AvgPool2D(NET_SCALING)(pp_in_layer)
        
    pp_in_layer = layers.GaussianNoise(GAUSSIAN_NOISE)(pp_in_layer)
    pp_in_layer = layers.BatchNormalization()(pp_in_layer)
    
    c1 = layers.Conv2D(8, (3, 3), activation='relu', padding='same') (pp_in_layer)
    c1 = layers.Conv2D(8, (3, 3), activation='relu', padding='same') (c1)
    p1 = layers.MaxPooling2D((2, 2)) (c1)
    
    c2 = layers.Conv2D(16, (3, 3), activation='relu', padding='same') (p1)
    c2 = layers.Conv2D(16, (3, 3), activation='relu', padding='same') (c2)
    p2 = layers.MaxPooling2D((2, 2)) (c2)
    
    c3 = layers.Conv2D(32, (3, 3), activation='relu', padding='same') (p2)
    c3 = layers.Conv2D(32, (3, 3), activation='relu', padding='same') (c3)
    p3 = layers.MaxPooling2D((2, 2)) (c3)
    
    c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same') (p3)
    c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same') (c4)
    p4 = layers.MaxPooling2D(pool_size=(2, 2)) (c4)
    
    
    c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same') (p4)
    c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same') (c5)
    
    u6 = upsample(64, (2, 2), strides=(2, 2), padding='same') (c5)
    u6 = layers.concatenate([u6, c4])
    c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same') (u6)
    c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same') (c6)
    
    u7 = upsample(32, (2, 2), strides=(2, 2), padding='same') (c6)
    u7 = layers.concatenate([u7, c3])
    c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same') (u7)
    c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same') (c7)
    
    u8 = upsample(16, (2, 2), strides=(2, 2), padding='same') (c7)
    u8 = layers.concatenate([u8, c2])
    c8 = layers.Conv2D(16, (3, 3), activation='relu', padding='same') (u8)
    c8 = layers.Conv2D(16, (3, 3), activation='relu', padding='same') (c8)
    
    u9 = upsample(8, (2, 2), strides=(2, 2), padding='same') (c8)
    u9 = layers.concatenate([u9, c1], axis=3)
    c9 = layers.Conv2D(8, (3, 3), activation='relu', padding='same') (u9)
    c9 = layers.Conv2D(8, (3, 3), activation='relu', padding='same') (c9)
    
    d = layers.Conv2D(1, (1, 1), activation='sigmoid') (c9)
    # d = layers.Cropping2D((EDGE_CROP, EDGE_CROP))(d)
    # d = layers.ZeroPadding2D((EDGE_CROP, EDGE_CROP))(d)
    if NET_SCALING is not None:
        d = layers.UpSampling2D(NET_SCALING)(d)
    
    seg_model = models.Model(inputs=[input_img], outputs=[d])
    seg_model.summary()
Esempio n. 28
0
def model_U_VGG_Centerline_Localheight():
    # input_shape = (720, 1280, 3)
    # input_shape = (512,512,3)
    input_shape = (None, None, 3)
    inputs = Input(shape=input_shape, name='input')

    # Block 1
    x0 = layers.Conv2D(64, (3, 3),
                       activation='relu',
                       padding='same',
                       name='block1_conv1')(inputs)
    x0 = layers.Conv2D(64, (3, 3),
                       activation='relu',
                       padding='same',
                       name='block1_conv2')(x0)
    x0 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x0)

    # Block 2
    x1 = layers.Conv2D(128, (3, 3),
                       activation='relu',
                       padding='same',
                       name='block2_conv1')(x0)
    x1 = layers.Conv2D(128, (3, 3),
                       activation='relu',
                       padding='same',
                       name='block2_conv2')(x1)
    x1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x1)

    # Block 3
    x2 = layers.Conv2D(256, (3, 3),
                       activation='relu',
                       padding='same',
                       name='block3_conv1')(x1)
    x2 = layers.Conv2D(256, (3, 3),
                       activation='relu',
                       padding='same',
                       name='block3_conv2')(x2)
    x2_take = layers.Conv2D(256, (3, 3),
                            activation='relu',
                            padding='same',
                            name='block3_conv3')(x2)
    x2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x2_take)

    # Block 4
    x3 = layers.Conv2D(512, (3, 3),
                       activation='relu',
                       padding='same',
                       name='block4_conv1')(x2)
    x3 = layers.Conv2D(512, (3, 3),
                       activation='relu',
                       padding='same',
                       name='block4_conv2')(x3)
    x3_take = layers.Conv2D(512, (3, 3),
                            activation='relu',
                            padding='same',
                            name='block4_conv3')(x3)
    x3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x3_take)

    # Block 5
    x4 = layers.Conv2D(512, (3, 3),
                       activation='relu',
                       padding='same',
                       name='block5_conv1')(x3)
    x4 = layers.Conv2D(512, (3, 3),
                       activation='relu',
                       padding='same',
                       name='block5_conv2')(x4)
    x4_take = layers.Conv2D(512, (3, 3),
                            activation='relu',
                            padding='same',
                            name='block5_conv3')(x4)
    x4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x4_take)

    # f1 = UpSampling2D((2,2))(x4)
    # if TASK_4:
    #    f1 = ZeroPadding2D(padding=((1,0), (0,0)), name = 'f1')(f1)
    f1 = x4_take
    f2 = x3
    h1 = Concatenate()([f2, f1])
    h1 = layers.Conv2D(128, (1, 1),
                       activation='relu',
                       padding='same',
                       name='up1_1')(h1)

    h1 = layers.Conv2D(128, (3, 3),
                       activation='relu',
                       padding='same',
                       name='up1_2')(h1)

    h2 = Concatenate()([x2, UpSampling2D((2, 2))(h1)])
    h2 = layers.Conv2D(64, (1, 1),
                       activation='relu',
                       padding='same',
                       name='up2_1')(h2)
    h2 = layers.Conv2D(64, (3, 3),
                       activation='relu',
                       padding='same',
                       name='up2_2')(h2)

    h3 = Concatenate()([x1, UpSampling2D((2, 2))(h2)])
    h3 = layers.Conv2D(32, (1, 1),
                       activation='relu',
                       padding='same',
                       name='up3_1')(h3)
    h3 = layers.Conv2D(32, (3, 3),
                       activation='relu',
                       padding='same',
                       name='up3_2')(h3)

    h4_take = Concatenate()([x0, UpSampling2D((2, 2))(h3)])

    h4 = layers.Conv2D(32, (1, 1),
                       activation='relu',
                       padding='same',
                       name='up4_1')(h4_take)
    h4 = layers.Conv2D(32, (3, 3),
                       activation='relu',
                       padding='same',
                       name='up4_2')(h4)

    h5 = Concatenate()([inputs, UpSampling2D((2, 2))(h4)])
    h5 = layers.Conv2D(16, (1, 1),
                       activation='relu',
                       padding='same',
                       name='up5_1')(h5)
    ################## output for TEXT/NON-TEXT ############

    o1 = layers.Conv2D(3, (3, 3),
                       activation='softmax',
                       padding='same',
                       name='up5_2')(h5)
    ################## output for centerline /other ###########
    h41 = layers.Conv2D(32, (1, 1),
                        activation='relu',
                        padding='same',
                        name='up41_1')(h4_take)
    h41 = layers.Conv2D(32, (3, 3),
                        activation='relu',
                        padding='same',
                        name='up41_2')(h41)

    h51 = Concatenate()([inputs, UpSampling2D((2, 2))(h41)])
    h51 = layers.Conv2D(16, (1, 1),
                        activation='relu',
                        padding='same',
                        name='up51_1')(h51)

    o11 = layers.Conv2D(2, (3, 3),
                        activation='softmax',
                        padding='same',
                        name='up51_2')(h51)

    ################ Regression ###########################
    '''
    b1 = Concatenate(name='agg_feat-1')([x4_take, h1])  # block_conv3, up1_2 # 32,32,630
    b1 = layers.Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same',
                                activation='relu', name='agg_feat-2')(b1)  # 64,64,128
    '''

    # ------ local height regression ------
    h42 = layers.Conv2D(32, (1, 1),
                        activation='relu',
                        padding='same',
                        name='up42_1')(h4_take)
    h42 = layers.Conv2D(32, (3, 3),
                        activation='relu',
                        padding='same',
                        name='up42_2')(h42)

    h52 = Concatenate()([inputs, UpSampling2D((2, 2))(h42)])
    h52 = layers.Conv2D(16, (1, 1),
                        activation='relu',
                        padding='same',
                        name='up52_1')(h52)

    o5 = layers.Conv2D(1, (3, 3),
                        activation='relu',
                        padding='same',
                        name='regress-4-1')(h52)

    # o1: t/nt, o11:centerline, o2:x,y, o3:sin,cos, o4:bounding box width,height, o5:localheight
    # model =  Model(inputs, [o1,o11, o2,o3,o4], name = 'U-VGG-model')
    model = Model(inputs, [o1, o11, o5], name='U-VGG-model-Localheight')

    return model
Esempio n. 29
0
    def create_instance_head(self, num_classes, rois, train_bn=True):
        """
        instance segmentation mask head
        Params
        ROIs: [batch, num_rois, H, W, C], the C of the initial Rois may not identical
        so it needs a preprocess first.
        """
        feature_maps = []
        for i in range(len(self.layers)):
            feature_maps.append(self.outputs[self.layers[i]])

        x = PyramidROIExtract([args.det_kernel, args.det_kernel],
                              self.config,
                              name="roi_align_mask")([rois] + feature_maps)
        # test_x = tf.random_uniform((16, 700, 3, 3, 512))
        # print('test x shape: ', test_x.shape, tf.shape(test_x))
        # print('test x: ', test_x)
        # print('x shape: ', x.shape, tf.shape(x))
        # print('x: ', x)
        # t1 = KL.Conv2D(256, (3, 3), padding="same")(x[:, 0, :, :, :])
        # print(t1)
        # t2 = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
        #                          name="test_mask_test")(x)
        # print(t2)
        # t_x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
        #                          name="test_mask_conv1")(test_x)

        with tf.variable_scope(DEFAULT_SSD_SCOPE) as sc:
            feature_maps = []
            for i in range(len(self.layers)):
                feature_maps.append(self.outputs[self.layers[i]])
            x = PyramidROIExtract([args.det_kernel, args.det_kernel],
                                  self.config,
                                  name="roi_align_mask")([rois] + feature_maps)

            x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2),
                                                      strides=2,
                                                      activation="relu"),
                                   name="instance_mask_deconv1")(
                                       x[:, :, :, :, :])
            # Conv layers
            x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                                   name="instance_mask_conv1")(x)
            x = KL.TimeDistributed(BatchNorm(),
                                   name='instance_mask_bn1')(x,
                                                             training=train_bn)
            x = KL.Activation('relu')(x)

            x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                                   name="instance_mask_conv2")(x)
            x = KL.TimeDistributed(BatchNorm(),
                                   name='instance_mask_bn2')(x,
                                                             training=train_bn)
            x = KL.Activation('relu')(x)

            x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2),
                                                      strides=2,
                                                      activation="relu"),
                                   name="instance_mask_deconv2")(x)
            x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                                   name="instance_mask_conv3")(x)
            x = KL.TimeDistributed(BatchNorm(),
                                   name='instance_mask_bn3')(x,
                                                             training=train_bn)
            x = KL.Activation('relu')(x)

            x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                                   name="instance_mask_conv4")(x)
            x = KL.TimeDistributed(BatchNorm(),
                                   name='instance_mask_bn4')(x,
                                                             training=train_bn)
            x = KL.Activation('relu')(x)

            x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2),
                                                      strides=2,
                                                      activation="relu"),
                                   name="instance_mask_deconv2")(x)
            x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1),
                                             strides=1,
                                             activation="sigmoid"),
                                   name="instance_mask")(x)
            return x
def train():
    ############### 1. Generating captchas

    images, labels = get_data_set(width=img_x, height=img_y, nr_of_chars=nr_of_chars, color=color,
                                  nr_of_captchas=numberOfCaptchas, numbers=numbers)

    ############### 2. Preprocessing the data
    #
    # Data needs to be reshaped into a 4D tensor - (sample_number, x_img_size, y_img_size, num_channels)
    # The number of channels = number of colors grescale = 1, color = 3
    images_train = images[:testSize]
    print('Before reshaping, 3D :', images_train.shape)
    images_train = images_train.reshape(images_train.shape[0], img_x, img_y, depth)
    images_train = images_train.astype('float32') / 255  # Scaling color dimension to 0-255 to 0-1
    print('After reshaping, 3D :', images_train[0].shape)

    images_test = images[testSize:]
    images_test = images_test.reshape(images_test.shape[0], img_x, img_y, depth)
    images_test = images_test.astype('float32') / 255

    labels_train = labels[:testSize]
    labels_test = labels[testSize:]

    # The categories are characters [aa, ab, ac, ... ]
    categories = get_all_possible_label_categories(nr_of_chars)
    lb = LabelBinarizer().fit(categories)
    labels_encoded_train = lb.transform(labels_train)
    labels_encoded_test = lb.transform(labels_test)

    ############### 3. Building the neural network
    # Sequential model
    model = Sequential()
    # First conv: 32 filters of 3x3
    model.add(Conv2D(32, (5, 5), padding='same', input_shape=(img_x, img_y, 1), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    # Second conv
    model.add(layers.Conv2D(64, (5, 5), padding='same', activation='relu'))
    #model.add(Dropout(0.3))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    # Third conv
    model.add(layers.Conv2D(96, (5, 5), padding='same', activation='relu'))
    #model.add(Dropout(0.3))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    # Fourth conv
    model.add(layers.Conv2D(128, (5, 5), padding='same', activation='relu'))
    #model.add(Dropout(0.3))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    # Fully connected layer
    model.add(Flatten())
    model.add(Dense(1024, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(len(categories), activation="softmax"))

    print(model.summary())
    model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(), metrics=["accuracy"])

    ############### 4. Training the neural network
    history = model.fit(
        images_train,
        labels_encoded_train,
        batch_size=128,
        validation_data=(images_test, labels_encoded_test),
        epochs=80
    )

    ############### 5. Visualizing and saving the result
    # Retrieving the acc and loss
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    # Creating the output dir for this model
    model_dir = os.path.join(output_dir, 'model_chars' + str(nr_of_chars) + '_acc_' + str(max(val_acc)).replace('.', '_')[:4])

    if os.path.exists(model_dir):
        model_dir = os.path.join(model_dir, '_' + str(time.time()))

    os.mkdir(model_dir)

    model.save(os.path.join(model_dir, 'model.h5'))
    model_json = model.to_json()
    copyfile('./captcha_learner_1.py', os.path.join(model_dir,'captcha_learner_1.py'))
    with open(os.path.join(model_dir, 'model.json'), "w") as json_file:
        json_file.write(model_json)

    epochs = range(1, len(acc) + 1)
    plt.figure()
    plt.subplot(211)
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()

    plt.subplot(212)

    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()
    plt.savefig(os.path.join(model_dir, 'metrics.png'))
    plt.show()