Beispiel #1
0
def CapsNetBasic(input_shape, n_class=2):
    x = layers.Input(shape=input_shape)

    # Layer 1: Just a conventional Conv2D layer
    conv1 = layers.Conv2D(filters=256,
                          kernel_size=5,
                          strides=1,
                          padding='same',
                          activation='relu',
                          name='conv1')(x)

    # Reshape layer to be 1 capsule x [filters] atoms
    _, H, W, C = conv1.get_shape()
    conv1_reshaped = layers.Reshape((H.value, W.value, 1, C.value))(conv1)

    # Layer 1: Primary Capsule: Conv cap with routing 1
    primary_caps = ConvCapsuleLayer(kernel_size=5,
                                    num_capsule=8,
                                    num_atoms=32,
                                    strides=1,
                                    padding='same',
                                    routings=1,
                                    name='primarycaps')(conv1_reshaped)

    # Layer 4: Convolutional Capsule: 1x1
    seg_caps = ConvCapsuleLayer(kernel_size=1,
                                num_capsule=1,
                                num_atoms=16,
                                strides=1,
                                padding='same',
                                routings=3,
                                name='seg_caps')(primary_caps)

    # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape.
    out_seg = Length(num_classes=n_class, seg=True, name='out_seg')(seg_caps)

    # Decoder network.
    _, H, W, C, A = seg_caps.get_shape()
    y = layers.Input(shape=input_shape[:-1] + (1, ))
    masked_by_y = Mask()(
        [seg_caps, y]
    )  # The true label is used to mask the output of capsule layer. For training
    masked = Mask()(
        seg_caps)  # Mask using the capsule with maximal length. For prediction

    def shared_decoder(mask_layer):
        recon_remove_dim = layers.Reshape(
            (H.value, W.value, A.value))(mask_layer)

        recon_1 = layers.Conv2D(filters=64,
                                kernel_size=1,
                                padding='same',
                                kernel_initializer='he_normal',
                                activation='relu',
                                name='recon_1')(recon_remove_dim)

        recon_2 = layers.Conv2D(filters=128,
                                kernel_size=1,
                                padding='same',
                                kernel_initializer='he_normal',
                                activation='relu',
                                name='recon_2')(recon_1)

        out_recon = layers.Conv2D(filters=1,
                                  kernel_size=1,
                                  padding='same',
                                  kernel_initializer='he_normal',
                                  activation='sigmoid',
                                  name='out_recon')(recon_2)

        return out_recon

    # Models for training and evaluation (prediction)
    train_model = models.Model(inputs=[x, y],
                               outputs=[out_seg,
                                        shared_decoder(masked_by_y)])
    eval_model = models.Model(inputs=x,
                              outputs=[out_seg,
                                       shared_decoder(masked)])

    # manipulate model
    noise = layers.Input(shape=((H.value, W.value, C.value, A.value)))
    noised_seg_caps = layers.Add()([seg_caps, noise])
    masked_noised_y = Mask()([noised_seg_caps, y])
    manipulate_model = models.Model(inputs=[x, y, noise],
                                    outputs=shared_decoder(masked_noised_y))

    return train_model, eval_model, manipulate_model
Beispiel #2
0
def CapsNet_nogradientstop(
    input_shape, n_class, routings
):  # best testing results! val 0.13xx testX cnn1 200 1 cnn2 150 9 drop1 0.68 drop20.68 n_channels 50 kernel_size 20,dropout1
    x = layers.Input(shape=input_shape)
    conv1 = layers.Conv1D(filters=200,
                          kernel_size=1,
                          strides=1,
                          padding='valid',
                          kernel_initializer='he_normal',
                          activation='relu',
                          name='conv1')(x)
    #conv1=BatchNormalization()(conv1)
    conv1 = Dropout(0.7)(conv1)
    conv2 = layers.Conv1D(filters=200,
                          kernel_size=9,
                          strides=1,
                          padding='valid',
                          kernel_initializer='he_normal',
                          activation='relu',
                          name='conv2')(conv1)
    #conv1=BatchNormalization()(conv1)
    conv2 = Dropout(0.75)(conv2)  #0.75 valx loss has 0.1278!
    primarycaps = PrimaryCap(conv2,
                             dim_capsule=8,
                             n_channels=60,
                             kernel_size=20,
                             kernel_initializer='he_normal',
                             strides=1,
                             padding='valid',
                             dropout=0.2)
    dim_capsule_dim2 = 10
    #Capsule layer. Routing algorithm works here.
    digitcaps_c = CapsuleLayer_nogradient_stop(num_capsule=n_class,
                                               dim_capsule=dim_capsule_dim2,
                                               num_routing=routings,
                                               name='digitcaps',
                                               kernel_initializer='he_normal',
                                               dropout=0.1)(primarycaps)
    #digitcaps_c = CapsuleLayer(num_capsule=n_class, dim_capsule=dim_capsule_dim2, num_routing=routings,name='digitcaps',kernel_initializer='he_normal')(primarycaps)
    digitcaps = Extract_outputs(dim_capsule_dim2)(digitcaps_c)
    weight_c = Extract_weight_c(dim_capsule_dim2)(digitcaps_c)
    out_caps = Length(name='capsnet')(digitcaps)
    # Decoder network.
    y = layers.Input(shape=(n_class, ))
    masked_by_y = Mask()(
        [digitcaps, y]
    )  # The true label is used to mask the output of capsule layer. For training
    masked = Mask(
    )(digitcaps)  # Mask using the capsule with maximal length. For prediction

    # Shared Decoder model in training and prediction
    decoder = Sequential(name='decoder')
    decoder.add(
        layers.Dense(512,
                     activation='relu',
                     input_dim=dim_capsule_dim2 * n_class))
    decoder.add(layers.Dense(1024, activation='relu'))
    decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid'))
    decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon'))

    # Models for training and evaluation (prediction)
    train_model = Model([x, y], [out_caps, decoder(masked_by_y)])
    eval_model = Model(x, [out_caps, decoder(masked)])
    weight_c_model = Model(x, weight_c)
    # manipulate model
    noise = layers.Input(shape=(n_class, dim_capsule_dim2))
    noised_digitcaps = layers.Add()([digitcaps, noise])
    masked_noised_y = Mask()([noised_digitcaps, y])
    manipulate_model = Model([x, y, noise], decoder(masked_noised_y))
    return train_model, eval_model, manipulate_model, weight_c_model
Beispiel #3
0
'''
import keras
from keras import layers
import numpy as np

latent_dim = 32
height = 32
width = 32
channels = 3

generator_input = keras.Input(shape=(latent_dim, ))

# 首先,将输入转换为16x16 128通道的feature map
x = layers.Dense(128 * 16 * 16)(generator_input)
x = layers.LeakyReLU()(x)
x = layers.Reshape((16, 16, 128))(x)

# 然后,添加卷积层
x = layers.Conv2D(256, 5, padding='same')(x)
x = layers.LeakyReLU()(x)

# 上采样至 32 x 32
x = layers.Conv2DTranspose(256, 4, strides=2, padding='same')(x)
x = layers.LeakyReLU()(x)

# 添加更多的卷积层
x = layers.Conv2D(256, 5, padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(256, 5, padding='same')(x)
x = layers.LeakyReLU()(x)
Beispiel #4
0
def MobileNet(input_shape=None,
              alpha=1.0,
              depth_multiplier=1,
              dropout=1e-3,
              include_top=True,
              weights='imagenet',
              input_tensor=None,
              pooling=None,
              classes=1000,
              **kwargs):
    """Instantiates the MobileNet architecture.

    # Arguments
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)`
            (with `channels_last` data format)
            or (3, 224, 224) (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 32.
            E.g. `(200, 200, 3)` would be one valid value.
        alpha: controls the width of the network.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        depth_multiplier: depth multiplier for depthwise convolution
            (also called the resolution multiplier)
        dropout: dropout rate
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
    """

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape and default size.
    if input_shape is None:
        default_size = 224
    else:
        if backend.image_data_format() == 'channels_first':
            rows = input_shape[1]
            cols = input_shape[2]
        else:
            rows = input_shape[0]
            cols = input_shape[1]

        if rows == cols and rows in [128, 160, 192, 224]:
            default_size = rows
        else:
            default_size = 224

    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=32,
                                      data_format=backend.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if backend.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]

    if weights == 'imagenet':
        if depth_multiplier != 1:
            raise ValueError('If imagenet weights are being loaded, '
                             'depth multiplier must be 1')

        if alpha not in [0.25, 0.50, 0.75, 1.0]:
            raise ValueError('If imagenet weights are being loaded, '
                             'alpha can be one of'
                             '`0.25`, `0.50`, `0.75` or `1.0` only.')

        if rows != cols or rows not in [128, 160, 192, 224]:
            if rows is None:
                rows = 224
                warnings.warn('MobileNet shape is undefined.'
                              ' Weights for input shape '
                              '(224, 224) will be loaded.')
            else:
                raise ValueError('If imagenet weights are being loaded, '
                                 'input must have a static square shape '
                                 '(one of (128, 128), (160, 160), '
                                 '(192, 192), or (224, 224)). '
                                 'Input shape provided = %s' % (input_shape, ))

    if backend.image_data_format() != 'channels_last':
        warnings.warn('The MobileNet family of models is only available '
                      'for the input data format "channels_last" '
                      '(width, height, channels). '
                      'However your settings specify the default '
                      'data format "channels_first" (channels, width, height).'
                      ' You should set `image_data_format="channels_last"` '
                      'in your Keras config located at ~/.keras/keras.json. '
                      'The model being returned right now will expect inputs '
                      'to follow the "channels_last" data format.')
        backend.set_image_data_format('channels_last')
        old_data_format = 'channels_first'
    else:
        old_data_format = None

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    x = _conv_block(img_input, 32, alpha, strides=(2, 2))
    x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)

    x = _depthwise_conv_block(x,
                              128,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=2)
    x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)

    x = _depthwise_conv_block(x,
                              256,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=4)
    x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)

    x = _depthwise_conv_block(x,
                              512,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=6)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)

    x = _depthwise_conv_block(x,
                              1024,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=12)
    x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)

    if include_top:
        if backend.image_data_format() == 'channels_first':
            shape = (int(1024 * alpha), 1, 1)
        else:
            shape = (1, 1, int(1024 * alpha))

        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Reshape(shape, name='reshape_1')(x)
        x = layers.Dropout(dropout, name='dropout')(x)
        x = layers.Conv2D(classes, (1, 1), padding='same',
                          name='conv_preds')(x)
        x = layers.Activation('softmax', name='act_softmax')(x)
        x = layers.Reshape((classes, ), name='reshape_2')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows))

    # Load weights.
    if weights == 'imagenet':
        if backend.image_data_format() == 'channels_first':
            raise ValueError('Weights for "channels_first" format '
                             'are not available.')
        if alpha == 1.0:
            alpha_text = '1_0'
        elif alpha == 0.75:
            alpha_text = '7_5'
        elif alpha == 0.50:
            alpha_text = '5_0'
        else:
            alpha_text = '2_5'

        if include_top:
            model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras_utils.get_file(model_name,
                                                weight_path,
                                                cache_subdir='models')
        else:
            model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras_utils.get_file(model_name,
                                                weight_path,
                                                cache_subdir='models')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    if old_data_format:
        backend.set_image_data_format(old_data_format)
    return model
def model_ContextSum(p, embedding_matrix, max_sent_len, n_out):
    print("Parameters:", p)

    # Take sentence encoded as indices and convert it to embeddings
    sentence_input = layers.Input(shape=(max_sent_len, ),
                                  dtype='int32',
                                  name='sentence_input')
    # Repeat the input N times for each edge
    x = layers.RepeatVector(MAX_EDGES_PER_GRAPH)(sentence_input)
    word_embeddings = layers.wrappers.TimeDistributed(
        layers.Embedding(output_dim=embedding_matrix.shape[1],
                         input_dim=embedding_matrix.shape[0],
                         input_length=max_sent_len,
                         weights=[embeddings],
                         mask_zero=True,
                         trainable=False))(x)
    word_embeddings = layers.Dropout(p['dropout1'])(word_embeddings)

    # Take token markers that identify entity positions, convert to position embeddings
    entity_markers = layers.Input(shape=(
        MAX_EDGES_PER_GRAPH,
        max_sent_len,
    ),
                                  dtype='int8',
                                  name='entity_markers')
    pos_embeddings = layers.wrappers.TimeDistributed(
        layers.Embedding(output_dim=p['position_emb'],
                         input_dim=POSITION_VOCAB_SIZE,
                         input_length=max_sent_len,
                         mask_zero=True,
                         embeddings_regularizer=regularizers.l2(),
                         trainable=True))(entity_markers)

    # Merge word and position embeddings and apply the specified amount of RNN layers
    for i in range(p["rnn1_layers"] - 1):
        lstm_layer = layers.LSTM(p['units1'], return_sequences=True)
        if p['bidirectional']:
            lstm_layer = layers.Bidirectional(lstm_layer)
        x = layers.wrappers.TimeDistributed(lstm_layer)(x)
    lstm_layer = layers.LSTM(p['units1'], return_sequences=False)
    if p['bidirectional']:
        lstm_layer = layers.Bidirectional(lstm_layer)
    sentence_matrix = layers.wrappers.TimeDistributed(lstm_layer)(x)

    # Take the vector of the sentences with the target entity pair
    layers_to_concat = []
    num_units = p['units1'] * (2 if p['bidirectional'] else 1)
    for i in range(MAX_EDGES_PER_GRAPH):
        sentence_vector = layers.Lambda(
            lambda l: l[:, i], output_shape=(num_units, ))(sentence_matrix)
        if i == 0:
            context_vectors = layers.Lambda(
                lambda l: l[:, i + 1:],
                output_shape=(MAX_EDGES_PER_GRAPH - 1,
                              num_units))(sentence_matrix)
        elif i == MAX_EDGES_PER_GRAPH - 1:
            context_vectors = layers.Lambda(
                lambda l: l[:, :i],
                output_shape=(MAX_EDGES_PER_GRAPH - 1,
                              num_units))(sentence_matrix)
        else:
            context_vectors = layers.Lambda(
                lambda l: K.concatenate([l[:, :i], l[:, i + 1:]], axis=1),
                output_shape=(MAX_EDGES_PER_GRAPH - 1,
                              num_units))(sentence_matrix)
        context_vector = GlobalSumPooling1D()(context_vectors)
        edge_vector = layers.concatenate([sentence_vector, context_vector])
        edge_vector = layers.Reshape((1, num_units * 2))(edge_vector)
        layers_to_concat.append(edge_vector)
    edge_vectors = layers.Concatenate(1)(layers_to_concat)

    # Apply softmax
    edge_vectors = layers.Dropout(p['dropout1'])(edge_vectors)
    main_output = layers.wrappers.TimeDistributed(
        layers.Dense(n_out, activation="softmax",
                     name='main_output'))(edge_vectors)

    model = models.Model(inputs=[sentence_input, entity_markers],
                         outputs=[main_output])
    model.compile(optimizer=p['optimizer'],
                  loss=masked_categorical_crossentropy,
                  metrics=['accuracy'])

    return model
Beispiel #6
0
def CapsNet(input_shape, n_class, routings):
    """
    A Capsule Network on MNIST.
    :param input_shape: data shape, 3d, [width, height, channels]
    :param n_class: number of classes
    :param routings: number of routing iterations
    :return: Two Keras Models, the first one used for training, and the second one for evaluation.
            `eval_model` can also be used for training.
    """
    x = layers.Input(shape=input_shape)

    # Layer 1: Just a conventional Conv2D layer
    conv1 = layers.Conv2D(filters=256,
                          kernel_size=9,
                          strides=1,
                          padding='valid',
                          activation='relu',
                          name='conv1')(x)

    # Layer 2: Conv2D layer with `squash` activation, then reshape to [None, num_capsule, dim_capsule]
    primarycaps = PrimaryCap(conv1,
                             dim_capsule=8,
                             n_channels=32,
                             kernel_size=9,
                             strides=2,
                             padding='valid')

    # Layer 3: Capsule layer. Routing algorithm works here.
    digitcaps = CapsuleLayer(num_capsule=n_class,
                             dim_capsule=16,
                             routings=routings,
                             name='digitcaps')(primarycaps)

    # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape.
    # If using tensorflow, this will not be necessary. :)
    out_caps = Length(name='capsnet')(digitcaps)

    # Decoder network.
    y = layers.Input(shape=(n_class, ))
    masked_by_y = Mask()(
        [digitcaps, y]
    )  # The true label is used to mask the output of capsule layer. For training
    masked = Mask(
    )(digitcaps)  # Mask using the capsule with maximal length. For prediction

    # Shared Decoder model in training and prediction
    decoder = models.Sequential(name='decoder')
    decoder.add(layers.Dense(512, activation='relu', input_dim=16 * n_class))
    decoder.add(layers.Dense(1024, activation='relu'))
    decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid'))
    decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon'))

    # Models for training and evaluation (prediction)
    train_model = models.Model([x, y], [out_caps, decoder(masked_by_y)])
    eval_model = models.Model(x, [out_caps, decoder(masked)])

    # manipulate model
    noise = layers.Input(shape=(n_class, 16))
    noised_digitcaps = layers.Add()([tf.squeeze(digitcaps, [1, 3]), noise])
    masked_noised_y = Mask()([noised_digitcaps, y])
    manipulate_model = models.Model([x, y, noise], decoder(masked_noised_y))
    return train_model, eval_model, manipulate_model
Beispiel #7
0
#x = conv2d_bn(x, 32, 3, 3, strides=(1, 1), padding='same')
#x = conv2d_bn(x, 32, 3, 3, strides=(1, 1), padding='same')
def multiply(x, n):
    x_prime = tf.reshape(x, (-1, n, 1))
    x_transpose = tf.transpose(x_prime, perm=[0, 2, 1])
    return tf.matmul(x_transpose, x_prime)


#Lambda(lambda x: multiply(x, n), output_shape =(n, n))

# Input is 100 * 5 matrix
seq_input = Input(shape=(100, 5))

# convert to tensor and get 10 layers
x = layers.Reshape((100, 5, 1))(seq_input)
x = Conv2D(filters=10, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)

# get outer product to get 100*100 matrix for each layer
final = {}


def matmul(mat_x):
    y = K.tf.matmul(mat_x, mat_x, transpose_b=True)
    return y


def multiply(x, n=100):
    x_prime = tf.reshape(x, (-1, n, 5))
    x_transpose = tf.transpose(x_prime, perm=[0, 2, 1])
    return tf.matmul(x_prime, x_transpose)
Beispiel #8
0
masked = Mask()(
    digitcaps)  # Mask using the capsule with maximal length. For prediction
out_caps = Length(num_classes=3, name='capsnet')(digitcaps)

#====================================

n_class = ''
input_shape = ''
#Decoder
#=============
# Btara's comment: similar here decoder is only used for image reconstruction
decoder = Sequential(name='decoder')
decoder.add(layers.Dense(512, activation='relu', input_dim=16 * n_class))
decoder.add(layers.Dense(1024, activation='relu'))
decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid'))
decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon'))
#==============

# Btara's comment: Seems right for this part, just note that the eval model is not needed (I think)
# if we don't have the decoder section
train_model = models.Model(
    inputs=[x, y],
    outputs=[out_caps, decoder(masked_by_y)],
)
eval_model = models.Model(inputs=x, outputs=[out_caps, decoder(masked)])


# Btara's comment: Don't have to make our own function for train_generator if we use image data generator
# see https://keras.io/preprocessing/image/
def train_generator(x, y, batch_size, shift_fraction=0.):
    train_datagen = ImageDataGenerator(
Beispiel #9
0
def yolo_vgg3_model(regularizer=None):
    CELL_DIM = NUM_BOX * (5 + NUM_CLASS)
    initializer = "glorot_normal"

    # Input Layer
    X_input = L.Input((MODEL_DIM, MODEL_DIM, 3))
    X = X_input

    # 448 x 448 x 3
    vgg_model = VGG16(include_top=False, weights='imagenet', input_tensor=X)
    for vgg_layer in vgg_model.layers:
        vgg_layer.trainable = False

    X = vgg_model.output
    X = L.BatchNormalization(axis=3)(X)

    # 7 x 7 x 512
    X = L.Conv2D(512,
                 kernel_size=(1, 1),
                 padding="same",
                 kernel_initializer=initializer,
                 kernel_regularizer=regularizer)(X)
    X = L.BatchNormalization(axis=3)(X)
    X = L.LeakyReLU()(X)
    X = L.Conv2D(512,
                 kernel_size=(3, 3),
                 padding="same",
                 kernel_initializer=initializer,
                 kernel_regularizer=regularizer)(X)
    X = L.BatchNormalization(axis=3)(X)
    X = L.LeakyReLU()(X)
    X = L.Conv2D(1024,
                 kernel_size=(1, 1),
                 padding="same",
                 kernel_initializer=initializer,
                 kernel_regularizer=regularizer)(X)
    X = L.BatchNormalization(axis=3)(X)
    X = L.LeakyReLU()(X)
    X = L.Conv2D(1024,
                 kernel_size=(3, 3),
                 padding="same",
                 kernel_initializer=initializer,
                 kernel_regularizer=regularizer)(X)
    X = L.BatchNormalization(axis=3)(X)
    X = L.LeakyReLU()(X)
    X = L.MaxPooling2D((2, 2), strides=(2, 2))(X)

    # 7 x 7 x 1024
    X = L.Conv2D(CELL_DIM,
                 kernel_size=(3, 3),
                 padding="same",
                 kernel_initializer=initializer,
                 kernel_regularizer=regularizer)(X)
    X = L.BatchNormalization(axis=3)(X)
    X = L.LeakyReLU()(X)

    X = L.Conv2D(CELL_DIM // 2,
                 kernel_size=(1, 1),
                 padding="same",
                 kernel_initializer=initializer,
                 kernel_regularizer=regularizer)(X)
    X = L.BatchNormalization(axis=3)(X)
    X = L.LeakyReLU()(X)

    X = L.Conv2D(CELL_DIM,
                 kernel_size=(3, 3),
                 padding="same",
                 kernel_initializer=initializer,
                 kernel_regularizer=regularizer)(X)
    X = L.BatchNormalization(axis=3)(X)
    X = L.LeakyReLU()(X)

    # 7 x 7 x 100
    X_BBox = L.Conv2D(NUM_BOX * 5,
                      kernel_size=(1, 1),
                      kernel_initializer=initializer,
                      kernel_regularizer=regularizer)(X)
    X_BBox = L.Reshape((GRID_SIZE, GRID_SIZE, NUM_BOX, -1))(X_BBox)
    X_BBox = L.Activation('sigmoid', name="ActBBox")(X_BBox)

    X_Class = L.Conv2D(NUM_BOX * NUM_CLASS,
                       kernel_size=(1, 1),
                       kernel_initializer=initializer,
                       kernel_regularizer=regularizer)(X)
    X_Class = L.Reshape((GRID_SIZE, GRID_SIZE, NUM_BOX, -1))(X_Class)
    X_Class = L.Activation('softmax', name="ActClass")(X_Class)

    X = L.Concatenate(axis=-1)([X_BBox, X_Class])

    model = Model(inputs=X_input, outputs=X, name="yolo_vgg3")

    return model
Beispiel #10
0
    def build_model(self):
        chm_input = Input(shape=(20, 20, 1), name="chm")
        rgb_input = Input(shape=(200, 200, 3), name="rgb")
        hsi_input = Input(shape=(20, 20, 3), name="hsi")
        las_input = Input(shape=(40, 40, 70, 1), name="las")

        # RGB downsample network
        rgb_down = layers.Conv2D(3, 5, activation="relu")(rgb_input)
        rgb_down = layers.Conv2D(3, 5, activation="relu")(rgb_down)
        rgb_down = layers.MaxPool2D(2)(rgb_down)
        rgb_down = layers.Conv2D(8, 5, activation="relu")(rgb_down)
        rgb_down = layers.Conv2D(8, 5, activation="relu")(rgb_down)
        rgb_down = layers.MaxPool2D(2)(rgb_down)
        rgb_down = layers.Conv2D(16, 4, activation="relu")(rgb_down)
        rgb_down = layers.Conv2D(16, 4, activation="relu")(rgb_down)
        rgb_down = layers.MaxPool2D(2, name="rgb_down")(rgb_down)
        rgb_down = layers.Conv2D(32, 4, activation="relu")(rgb_down)
        rgb_down = layers.Conv2D(32, 4, activation="relu")(rgb_down)
        rgb_down = layers.Flatten()(rgb_down)

        """
        # HSI upsample network
        hsi_up = layers.Conv2D(3, 2, activation="relu", padding="same")(hsi_input)
        hsi_up = layers.UpSampling2D(3)(hsi_up)
        #hsi_up = layers.Dropout(0.4)(hsi_up)
        hsi_up = layers.Conv2D(3, 4, activation="relu")(hsi_up)
        hsi_up = layers.Conv2D(3, 4, activation="relu")(hsi_up)
        hsi_up = layers.UpSampling2D(2)(hsi_up)
        #hsi_up = layers.Dropout(0.4)(hsi_up)
        hsi_up = layers.Conv2D(3, 5, activation="relu")(hsi_up)
        hsi_up = layers.Conv2D(3, 5, activation="relu")(hsi_up)
        hsi_up = layers.UpSampling2D(2, name="hsi_up")(hsi_up)
        #hsi_up = layers.Dropout(0.4)(hsi_up)

        # CHM upsample network
        chm_up = layers.Conv2D(1, 2, activation="relu", padding="same")(chm_input)
        chm_up = layers.UpSampling2D(3)(chm_up)
        #chm_up = layers.Dropout(0.4)(chm_up)
        chm_up = layers.Conv2D(1, 4, activation="relu")(chm_up)
        chm_up = layers.Conv2D(1, 4, activation="relu")(chm_up)
        chm_up = layers.UpSampling2D(2)(chm_up)
        #chm_up = layers.Dropout(0.4)(chm_up)
        chm_up = layers.Conv2D(1, 5, activation="relu")(chm_up)
        chm_up = layers.Conv2D(1, 5, activation="relu")(chm_up)
        chm_up = layers.UpSampling2D(2, name="chm_up")(chm_up)
        #chm_up = layers.Dropout(0.4)(chm_up)

        # High-res network
        high_res = layers.Concatenate(axis=3)([rgb_input, hsi_up, chm_up])
        high_res = layers.Conv2D(10, 5, activation="relu")(high_res)
        high_res = layers.Conv2D(10, 5, activation="relu")(high_res)
        high_res = layers.Conv2D(5, 5, activation="relu", name="high_res")(high_res)
        high_res = layers.Flatten()(high_res)

        """
        # Low-res network
        low_res = layers.Concatenate(axis=3)([hsi_input, chm_input])
        low_res = layers.Conv2D(4, 2, activation="relu", padding="same")(low_res)
        low_res = layers.Conv2D(8, 2, activation="relu", padding="same")(low_res)
        low_res = layers.Conv2D(8, 2, activation="relu", padding="same")(low_res)
        low_res = layers.MaxPool2D(2)(low_res)
        low_res = layers.Conv2D(16, 2, activation="relu", padding="same")(low_res)
        low_res = layers.Conv2D(16, 2, activation="relu", padding="same")(low_res)
        low_res = layers.MaxPool2D(2)(low_res)
        low_res = layers.Conv2D(32, 2, activation="relu", padding="same")(low_res)
        low_res = layers.Conv2D(32, 2, activation="relu", padding="same")(low_res)
        low_res = layers.MaxPool2D(2)(low_res)
        low_res = layers.Conv2D(64, 2, activation="relu", padding="same")(low_res)
        low_res = layers.Conv2D(64, 2, activation="relu", padding="same")(low_res)
        low_res = layers.Flatten()(low_res)

        # Las 3D network
        las_net = layers.Conv3D(2, 4, activation="relu", padding="same")(las_input)
        las_net = layers.Conv3D(2, 4, activation="relu", padding="same")(las_net)
        las_net = layers.MaxPool3D(2)(las_net)
        las_net = layers.Conv3D(8, 4, activation="relu", padding="same")(las_net)
        las_net = layers.Conv3D(8, 4, activation="relu", padding="same")(las_net)
        las_net = layers.MaxPool3D(2)(las_net)
        las_net = layers.Conv3D(16, 4, activation="relu", padding="same")(las_net)
        las_net = layers.Conv3D(16, 4, activation="relu", padding="same")(las_net)
        las_net = layers.MaxPool3D(2)(las_net)
        las_net = layers.Conv3D(32, 4, activation="relu", padding="same")(las_net)
        las_net = layers.Conv3D(32, 4, activation="relu", padding="same", name="las_net")(las_net)
        las_net = layers.Flatten()(las_net)

        # Combine networks with fully connected layers
        fully_con = layers.concatenate([low_res, las_net, rgb_down])
        fully_con = layers.Dropout(0.1)(fully_con)
        fully_con = layers.Dense(256)(fully_con)
        fully_con = layers.Dropout(0.4)(fully_con)
        fully_con = layers.Dense(256)(fully_con)
        fully_con = layers.Dropout(0.4)(fully_con)
        fully_con = layers.Dense(256)(fully_con)
        fully_con = layers.Dropout(0.4)(fully_con)
        fully_con = layers.Dense(256)(fully_con)
        fully_con = layers.Dropout(0.0)(fully_con)
        output_bounding = layers.Dense(120, kernel_regularizer=keras.regularizers.l2(0.0001))(fully_con)
        output_bounding = layers.Reshape((30, 4), name="bounds")(output_bounding)
        output_class = layers.Dense(30, activation="sigmoid", kernel_regularizer=keras.regularizers.l2(0.0001), name="labels")(fully_con)

        self.model = Model(
            inputs=[rgb_input, chm_input, hsi_input, las_input],
            outputs=[output_bounding, output_class],
        )

        self.model.summary()
def main():
    np.set_printoptions(threshold=np.nan)

    number_of_classes = 3
    input_shape = (64, 64, 1)

    x = layers.Input(shape=input_shape)
    '''
    Inputs to the model are MRI images which are down-sampled
    to 64 × 64 from 512 × 512, in order to reduce the number of
    parameters in the model and decrease the training time.
    Second (First?) layer is a convolutional layer with 64 × 9 × 9 filters
    and stride of 1 which leads to 64 feature maps of size 56×56.
    '''
    conv1 = layers.Conv2D(64, (9, 9), activation='relu', name="FirstLayer")(x)
    '''
    The second layer is a Primary Capsule layer resulting from
    256×9×9 convolutions with strides of 2.
    '''
    primaryCaps = PrimaryCap(inputs=conv1,
                             dim_capsule=8,
                             n_channels=32,
                             kernel_size=9,
                             strides=2,
                             padding='valid')
    '''
    This layer consists of 32 “Component Capsules” with dimension of 8 each of
    which has feature maps of size 24×24 (i.e., each Component
    Capsule contains 24 × 24 localized individual Capsules).
    '''
    #capLayer1 = CapsuleLayer(
    #    num_capsule=32, dim_capsule=8, routings=3, name="SecondLayer")(primaryCaps)
    # num_capsule=4, dim_capsule=8, routings=3, name="SecondLayer")(primaryCaps)
    '''
    Final capsule layer includes 3 capsules, referred to as “Class
    Capsules,’ ’one for each type of candidate brain tumor. The
    dimension of these capsules is 16.
    '''
    capLayer2 = CapsuleLayer(num_capsule=3,
                             dim_capsule=16,
                             routings=2,
                             name="ThirdLayer")(primaryCaps)

    # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape.
    # If using tensorflow, this will not be necessary. :)
    out_caps = Length(name='capsnet')(capLayer2)

    # Decoder network.
    y = layers.Input(shape=(number_of_classes, ))
    # The true label is used to mask the output of capsule layer. For training
    masked_by_y = Mask()([capLayer2, y])
    # Mask using the capsule with maximal length. For prediction
    masked = Mask()(capLayer2)

    # Shared Decoder model in training and prediction
    decoder = models.Sequential(name='decoder')
    decoder.add(
        layers.Dense(512, activation='relu', input_dim=16 * number_of_classes))
    decoder.add(layers.Dense(1024, activation='relu'))
    decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid'))
    decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon'))

    # Models for training and evaluation (prediction)
    train_model = models.Model([x, y], [out_caps, decoder(masked_by_y)])
    eval_model = models.Model(x, [out_caps, decoder(masked)])

    # Probably don't need the below chunk of code ?
    noise = layers.Input(shape=(number_of_classes, 16))
    noised_capLayer2 = layers.Add()([capLayer2, noise])
    masked_noised_y = Mask()([noised_capLayer2, y])
    manipulate_model = models.Model([x, y, noise], decoder(masked_noised_y))

    train_data_directory = 'train/'
    validation_data_directory = 'test/'
    bsize = 32

    image_datagen = ImageDataGenerator()
    # train_generator = image_datagen.flow_from_directory(
    #     train_data_directory,
    #     color_mode='grayscale',
    #     target_size=(image_resize_height, image_resize_weight),
    #     batch_size=20,
    #     class_mode='categorical')
    train_generator = create_generator(train_data_directory, batch_size=bsize)

    # validation_generator = image_datagen.flow_from_directory(
    #     validation_data_directory,
    #     color_mode='grayscale',
    #     target_size=(image_resize_height, image_resize_weight),
    #     batch_size=20,
    #     class_mode='categorical')

    validation_generator = create_generator(validation_data_directory,
                                            batch_size=bsize)

    # for x, y in train_generator:
    #     print("x shape: ", x.shape)
    #     print("y shape: ", y.shape)
    #     break

    # for x, y in validation_generator:
    #     print("val x shape: ", x.shape)
    #     print("val y shape: ", y.shape)
    #     break

    print(train_model.summary())

    train_model.compile(
        optimizer="rmsprop",  # Improved backprop algorithm
        loss='mse',  # "Misprediction" measure
        # loss='sparse_categorical_crossentropy',  # "Misprediction" measure
        metrics=['accuracy']  # Report CCE value as we train
    )

    hst = train_model.fit_generator(train_generator,
                                    steps_per_epoch=72,
                                    epochs=8,
                                    validation_data=validation_generator,
                                    validation_steps=24,
                                    verbose=1).history

    train_model.save('Test.h5')
Beispiel #12
0
inner = MaxPooling2D(pool_size=(2, 2), name='s2-maxpool')(inner)

inner = Conv2D(256, (3, 3), padding='same', name='s3-conv1')(inner)
#inner = Dropout(0.3,name='s3-dropout1')(inner)
inner = layers.BatchNormalization(name='s3-batchnorm')(inner)
inner = layers.advanced_activations.LeakyReLU(0.1,
                                              name='s3-conv2-leakyrelu')(inner)
inner = MaxPooling2D(pool_size=(2, 1), name='s3-maxpool')(inner)

inner = Conv2D(256, (4, 1), name='s4-conv1')(inner)
inner = Dropout(0.3, name='s3-dropout2')(inner)
inner = layers.advanced_activations.LeakyReLU(0.1,
                                              name='s4-conv1-leakyrelu')(inner)
inner = Conv2D(labelsn, (1, 1), name='s4-conv2')(inner)

inner = layers.Reshape((TIMESTEP, labelsn),
                       name='y_pred_nosoftmax')(inner)  ##may risk
y_pred = layers.Activation('softmax', name='y_pred')(inner)

labels = Input(name='the_labels', shape=[labelmaxn], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int32')
label_length = Input(name='label_length', shape=[1], dtype='int32')

loss_out = layers.Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')(
    [y_pred, labels, input_length, label_length])

model = Model(inputs=[input_data, labels, input_length, label_length],
              outputs=loss_out)

# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={
    'ctc': lambda y_true, y_pred: y_pred
Beispiel #13
0
def LvqCapsNet(input_shape):
    input_img = Input(shape=input_shape)

    # Block 1
    caps0 = Capsule()
    caps0.add(Conv2D(32 + 1, (3, 3), padding='same', kernel_initializer=glorot_normal()))
    caps0.add(BatchNormalization())
    caps0.add(Activation('relu'))
    caps0.add(Dropout(0.25))
    x = caps0(input_img)

    # Block 2
    caps1 = Capsule()
    caps1.add(Conv2D(64 + 1, (3, 3), padding='same', kernel_initializer=glorot_normal()))
    caps1.add(BatchNormalization())
    caps1.add(Activation('relu'))
    caps1.add(Dropout(0.25))
    x = caps1(x)

    # Block 3
    caps2 = Capsule()
    caps2.add(Conv2D(64 + 1, (3, 3), padding='same', kernel_initializer=RandomNormal(stddev=0.01)))
    caps2.add(BatchNormalization())
    caps2.add(Activation('relu'))
    caps2.add(Dropout(0.25))
    x = caps2(x)

    # Block 4
    caps3 = Capsule(prototype_distribution=32)
    caps3.add(Conv2D(64 + 1, (5, 5), strides=2, padding='same', kernel_initializer=RandomNormal(stddev=0.01)))
    caps3.add(BatchNormalization())
    caps3.add(Activation('relu'))
    caps3.add(Dropout(0.25))
    x = caps3(x)

    # Block 5
    caps4 = Capsule()
    caps4.add(Conv2D(32 + 1, (3, 3), padding='same', kernel_initializer=RandomNormal(stddev=0.01)))
    caps4.add(Dropout(0.25))
    x = caps4(x)

    # Block 6
    caps5 = Capsule()
    caps5.add(Conv2D(64 + 1, (3, 3), padding='same', kernel_initializer=glorot_normal()))
    caps5.add(Dropout(0.25))
    x = caps5(x)

    # Block 7
    caps6 = Capsule()
    caps6.add(Conv2D(64 + 1, (3, 3), padding='same', kernel_initializer=glorot_normal()))
    caps6.add(SplitModule())
    caps6.add(Activation('relu'), scope_keys=1)
    caps6.add(Flatten(), scope_keys=1)
    x = caps6(x)

    # Caps1
    caps7 = Capsule(prototype_distribution=(1, 8 * 8))
    caps7.add(InputModule(signal_shape=(-1, 64), init_diss_initializer=None, trainable=False))
    diss7 = TangentDistance(squared_dissimilarity=False, epsilon=1.e-12, linear_factor=0.66, projected_atom_shape=16)
    caps7.add(diss7)
    caps7.add(GibbsRouting(norm_axis='channels', trainable=False))
    x = caps7(x)

    # Caps2
    caps8 = Capsule(prototype_distribution=(1, 4 * 4))
    caps8.add(Reshape((8, 8, 64)))
    caps8.add(Conv2D(64, (3, 3), padding='same', kernel_initializer=glorot_normal()))
    caps8.add(Conv2D(64, (3, 3), padding='same', kernel_initializer=glorot_normal()))
    caps8.add(InputModule(signal_shape=(8 * 8, 64), init_diss_initializer=None, trainable=False))
    diss8 = TangentDistance(projected_atom_shape=16, squared_dissimilarity=False,
                            epsilon=1.e-12, linear_factor=0.66, signal_output='signals')
    caps8.add(diss8)
    caps8.add(GibbsRouting(norm_axis='channels', trainable=False))
    x = caps8(x)

    # Caps3
    digit_caps = Capsule(prototype_distribution=(1, 10))
    digit_caps.add(Reshape((4, 4, 64)))
    digit_caps.add(Conv2D(128, (3, 3), padding='same', kernel_initializer=glorot_normal()))
    digit_caps.add(InputModule(signal_shape=128, init_diss_initializer=None, trainable=False))
    diss = RestrictedTangentDistance(projected_atom_shape=16, epsilon=1.e-12, squared_dissimilarity=False,
                                     linear_factor=0.66, signal_output='signals')
    digit_caps.add(diss)
    digit_caps.add(GibbsRouting(norm_axis='channels', trainable=False,
                                diss_regularizer=MaxValue(alpha=0.0001)))
    digit_caps.add(DissimilarityTransformation(probability_transformation='neg_softmax', name='lvq_caps'))

    digitcaps = digit_caps(x)

    # intermediate model for Caps2; used for visualizations
    input_diss8 = [Input((4, 4, 64)), Input((16,))]
    model_vis_caps2 = models.Model(input_diss8, digit_caps(list_to_dict(input_diss8)))

    # Decoder network.
    y = layers.Input(shape=(10,))
    masked_by_y = Mask()([digitcaps[0], y])

    # Shared Decoder model in training and prediction
    decoder = models.Sequential(name='decoder')
    decoder.add(layers.Dense(512, activation='relu', input_dim=128 * 10))
    decoder.add(layers.Dense(1024, activation='relu'))
    decoder.add(layers.Dense(np.prod((28, 28, 1)), activation='sigmoid'))
    decoder.add(layers.Reshape(target_shape=(28, 28, 1), name='out_recon'))

    # Models for training and evaluation (prediction)
    model = models.Model([input_img, y], [digitcaps[2], decoder(masked_by_y)])

    return model, decoder,  model_vis_caps2
Beispiel #14
0
    def define_graph(self, mode, options):
        assert mode in ['training', 'inference']

        box_pred_method = options['box_pred_method']
        print(f'box_pred_method: {box_pred_method}')
        assert box_pred_method in [
            'lbf_guided', 'regress_landmark', 'regress_segbox', 'gt_segbox']

        batch_size = options['images_per_gpu']

        heads = options['heads']
        num_heads = len(heads)
        print(f'num_heads={num_heads}')
        num_masks = 0
        for class_ids in heads:
            num_masks += len(class_ids)
        assert num_masks == len(options['class_names'])
        print(f'num_masks={num_masks}')

        head_label_names = []
        for class_ids in heads:
            names_this_head = []
            for class_id in class_ids:
                names_this_head += options['class_names'][class_id]
            head_label_names.append(names_this_head)
        assert len(head_label_names) == len(heads)

        h = w = options['image_size']

        # assert h > 0 and w > 0 and h % 2**6 == 0 and w % 2**6 == 0
        if 'landmark_box_paddings448' in options:
            delta = options.get('landmark_box_padding_additional_ratio', 0.0)
            molded_padding_dict = {
                name:
                np.array(padding, np.float32) / 448.0 +
                    np.array([-delta, -delta, +delta, +delta], np.float32)
                for name, padding in options['landmark_box_paddings448'].items()
            }
        else:
            raise RuntimeError('padding information required')

        pprint(molded_padding_dict)

        # mean landmark68 pts
        mean_molded_landmark68_pts = tf.stack(
            [utils.MEAN_MOLDED_LANDMARK68_PTS],
            name='mean_molded_landmark68_pts')
        # mean head boxes
        mean_molded_head_boxes = utils.extract_landmark68_boxes_graph(
            mean_molded_landmark68_pts,
            head_label_names,
            molded_padding_dict)

        dropout_rate = options.get('dropout_rate', 0.0)
        print(f'dropout_rate={dropout_rate}')

        # Inputs
        input_molded_image = KL.Input(
            shape=[h, w, 3], name="input_molded_image")  # molded
        input_molded_image_exist = KL.Input(
            shape=[1], name='input_molded_image_exist', dtype=tf.uint8)
        print('input: %s' % input_molded_image.name)
        print('input_molded_image_exist.shape: {}, {}'.format(
            input_molded_image_exist.shape,
            input_molded_image_exist._keras_shape))

        if mode == 'training':
            input_gt_masks = KL.Input(
                shape=[num_masks, h, w], name="input_gt_masks")
            input_gt_masks_exist = KL.Input(
                shape=[1], name='input_gt_masks_exist', dtype=tf.uint8)
            print('input_gt_masks_exist.shape: {}, {}'.format(
                input_gt_masks_exist.shape, input_gt_masks_exist._keras_shape))
            molded_gt_masks = KL.Lambda(lambda xx: tf.cast(xx, tf.float32))(
                input_gt_masks)

        if box_pred_method == 'lbf_guided':
            input_molded_lbf_landmark68_pts = KL.Input(
                shape=[68, 2],
                dtype=tf.float32,
                name="input_molded_lbf_landmark68_pts")
            input_molded_lbf_landmark68_pts_exist = KL.Input(
                shape=[1],
                name='input_molded_lbf_landmark68_pts_exist',
                dtype=tf.uint8)
            print('input_molded_lbf_landmark68_pts_exist.shape: {}, {}'.format(
                input_molded_lbf_landmark68_pts_exist.shape,
                input_molded_lbf_landmark68_pts_exist._keras_shape))

        elif box_pred_method == 'regress_landmark':
            if mode == 'training':
                input_gt_molded_landmark68_pts = KL.Input(
                    shape=[68, 2],
                    dtype=tf.float32,
                    name='input_gt_molded_landmark68_pts')
                input_gt_molded_landmark68_pts_exist = KL.Input(
                    shape=[1],
                    name='input_gt_molded_landmark68_pts_exist', dtype=tf.uint8)

        elif box_pred_method == 'regress_segbox':
            def _box_to_std_deform(box):
                return utils.compute_box_deform(mean_molded_head_boxes, box)

            def _std_deform_to_box(deform):
                return utils.apply_box_deform(mean_molded_head_boxes, deform)

            if mode == 'training':
                input_gt_molded_head_boxes = KL.Input(
                    shape=[num_heads, 4],
                    dtype=tf.float32,
                    name='input_gt_molded_head_boxes')
                input_gt_molded_head_boxes_exist = KL.Input(
                    shape=[1],
                    name='input_gt_molded_head_boxes_exist', dtype=tf.uint8)

                # get box deforms
                input_gt_head_box_deforms = KL.Lambda(
                    _box_to_std_deform,
                    name='input_gt_head_box_deforms')(
                        input_gt_molded_head_boxes)

        elif box_pred_method == 'gt_segbox':
            input_gt_molded_head_boxes = KL.Input(
                shape=[num_heads, 4],
                dtype=tf.float32,
                name='input_gt_molded_head_boxes')
            input_gt_molded_head_boxes_exist = KL.Input(
                shape=[1],
                name='input_gt_molded_head_boxes_exist', dtype=tf.uint8)

        # Construct Backbone Network
        box_from = options.get('box_from', 'P2')

        def _expand_boxes_by_ratio(boxes, rel_ratio):
            y1, x1, y2, x2 = tf.split(boxes, 4, axis=-1)
            cy = (y1 + y2) / 2.0
            cx = (x1 + x2) / 2.0
            h2 = (y2 - y1) / 2.0
            w2 = (x2 - x1) / 2.0
            yy1 = cy - h2 * (1 + rel_ratio)
            xx1 = cx - w2 * (1 + rel_ratio)
            yy2 = cy + h2 * (1 + rel_ratio)
            xx2 = cx + w2 * (1 + rel_ratio)
            return tf.concat([yy1, xx1, yy2, xx2], axis=-1)

        if options['backbone'] == 'vgg16':
            print('making vgg16 backbone')
            C1, C2, C3, C4, C5 = vgg16_graph(input_molded_image)
            assert box_from == 'C5'
            mrcnn_feature_maps = [C5]
        elif options['backbone'] == 'vgg16fpn':
            print('making vgg16fpn backbone')
            C1, C2, C3, C4, C5 = vgg16_graph(input_molded_image)

            P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5])
            if box_from == 'P2':
                box_feature = P2
            elif box_from == 'C5':
                box_feature = C5
            elif box_from == 'C4':
                box_feature = C4
            mrcnn_feature_maps = [P2, P3, P4, P5]
        elif options['backbone'] == 'vgg16fpnP2':
            print('making vgg16fpnP2 backbone')
            C1, C2, C3, C4, C5 = vgg16_graph(input_molded_image)

            P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5])
            if box_from == 'P2':
                box_feature = P2
            elif box_from == 'C5':
                box_feature = C5
            elif box_from == 'C4':
                box_feature = C4
            mrcnn_feature_maps = [P2]
        elif options['backbone'] == 'resnet50':
            C1, C2, C3, C4, _ = resnet_graph(
                input_molded_image, 'resnet50', False)
            assert box_from == 'C4'
            box_feature = C4
            mrcnn_feature_maps = [C4]
        elif options['backbone'] == 'resnet50fpn':
            print('making resnet50fpn backbone')
            C1, C2, C3, C4, C5 = resnet_graph(
                input_molded_image, 'resnet50', True)

            P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5])
            if box_from == 'P2':
                box_feature = P2
            elif box_from == 'C5':
                box_feature = C5
            elif box_from == 'C4':
                box_feature = C4
            mrcnn_feature_maps = [P2, P3, P4, P5]
        elif options['backbone'] == 'resnet50fpnP2':
            print('making resnet50fpnP2 backbone')
            C1, C2, C3, C4, C5 = resnet_graph(
                input_molded_image, 'resnet50', True)

            P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5])
            if box_from == 'P2':
                box_feature = P2
            elif box_from == 'C5':
                box_feature = C5
            elif box_from == 'C4':
                box_feature = C4
            mrcnn_feature_maps = [P2]
        elif options['backbone'] == 'resnet50fpnC4':
            C1, C2, C3, C4, C5 = resnet_graph(
                input_molded_image, 'resnet50', True)

            P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5])
            if box_from == 'P2':
                box_feature = P2
            elif box_from == 'C5':
                box_feature = C5
            elif box_from == 'C4':
                box_feature = C4
            mrcnn_feature_maps = [C4]
        else:
            raise NotImplementedError()

        if box_pred_method in ['regress_landmark', 'regress_segbox']:
            # get box and optionally landmarks
            with tf.name_scope('box_neck'):
                x = box_feature
                box_neck_conv_num = options['box_neck_conv_num']
                for k in range(box_neck_conv_num):
                    x = KL.Conv2D(320, (3, 3), strides=(1, 1),
                                  padding='same', name=f'box_conv{k}')(x)
                    x = KL.BatchNormalization(name=f'box_convbn{k}')(x)
                    x = KL.Activation('relu')(x)

                x = KL.Conv2D(1280, (1, 1), name='box_conv_last')(x)
                x = KL.BatchNormalization(name=f'box_convbn_last')(x)

                x = KL.GlobalAveragePooling2D()(x)
                x = KL.Dropout(dropout_rate)(x)
            box_feature = x
            print(f'box_feature.shape={box_feature.shape}')

        if box_pred_method == 'lbf_guided':
            molded_head_boxes = KL.Lambda(
                lambda xx: utils.extract_landmark68_boxes_graph(
                    xx, head_label_names, molded_padding_dict),
                name='molded_head_boxes')(input_molded_lbf_landmark68_pts)

        elif box_pred_method == 'regress_landmark':
            x = box_feature
            x = KL.Dense(68 * 2, name='box_landmark_fc')(x)
            x = KL.Reshape((68, 2))(x)  # landmark68 offsets

            pred_molded_landmark68_pts = KL.Lambda(
                lambda xx: xx + mean_molded_landmark68_pts,
                name='pred_molded_landmark68_pts')(x)
            molded_head_boxes = KL.Lambda(
                lambda xx: utils.extract_landmark68_boxes_graph(
                    xx, head_label_names, molded_padding_dict),
                name='molded_head_boxes')(pred_molded_landmark68_pts)

            # compute landmark loss
            if mode == 'training':
                # Point loss
                def _l2_loss(pts1, pts2):
                    # (batch, 68, 2)
                    return tf.reduce_mean(
                        tf.norm(pts1 - pts2, axis=-1), axis=-1)
                landmark68_loss = KL.Lambda(lambda xx: _l2_loss(xx[0], xx[1]))(
                    [pred_molded_landmark68_pts, input_gt_molded_landmark68_pts])
                landmark68_loss = KL.Lambda(
                    lambda xx: tf.where(
                        tf.reshape(xx[0] > 0, tf.shape(xx[1])),
                        xx[1], tf.zeros_like(xx[1])),
                    name='landmark68_loss')([
                        input_gt_molded_landmark68_pts_exist, landmark68_loss])
                print('landmark68_loss.shape={}, {}'.format(
                    landmark68_loss.shape, landmark68_loss._keras_shape))

        elif box_pred_method == 'regress_segbox':
            x = box_feature
            x = KL.Dense(num_heads * 4, name='box_fc')(x)

            use_rpn_box_loss = options.get('use_rpn_box_loss', True)
            print(f'use_rpn_box_loss={use_rpn_box_loss}')

            if use_rpn_box_loss:
                pred_head_box_deforms = KL.Reshape(
                    (num_heads, 4))(x)  # box deforms

                pred_molded_head_boxes = KL.Lambda(
                    _std_deform_to_box, name='pred_molded_head_boxes')(
                    pred_head_box_deforms)
                head_box_padding_ratio = options['head_box_padding_ratio']
                molded_head_boxes = KL.Lambda(lambda xx: tf.stop_gradient(
                    xx + tf.constant([
                        - head_box_padding_ratio,
                        - head_box_padding_ratio,
                        head_box_padding_ratio,
                        head_box_padding_ratio
                    ], tf.float32)), name='molded_head_boxes')(pred_molded_head_boxes)

                # compute segbox loss
                if mode == 'training':
                    # Box loss
                    use_soft_l1_loss = options.get('use_soft_l1_loss', True)

                    def _l1_loss(box_deform1, box_deform2):
                        # (batch, num_heads, 4)
                        if use_soft_l1_loss:
                            return tf.reduce_mean(
                                tf.sqrt(tf.square(box_deform1 -
                                                  box_deform2) + K.epsilon()),
                                axis=[1, 2])
                        else:
                            return tf.reduce_mean(tf.abs(box_deform1 - box_deform2), axis=[1, 2])
                    box_loss = KL.Lambda(lambda xx: _l1_loss(xx[0], xx[1]))(
                        [input_gt_head_box_deforms, pred_head_box_deforms])
                    box_loss = KL.Lambda(
                        lambda xx: tf.where(tf.reshape(
                            xx[0] > 0, tf.shape(xx[1])), xx[1], tf.zeros_like(xx[1])),
                        name='box_loss')([
                            input_gt_molded_head_boxes_exist,
                            box_loss])

                    print('box_loss.shape={}, {}'.format(
                        box_loss.shape, box_loss._keras_shape))
            else:
                pred_molded_head_boxes = KL.Reshape((num_heads, 4))(x)

                head_box_padding_ratio = options['head_box_padding_ratio']
                molded_head_boxes = KL.Lambda(lambda xx: tf.stop_gradient(
                    xx + tf.constant([
                        - head_box_padding_ratio,
                        - head_box_padding_ratio,
                        head_box_padding_ratio,
                        head_box_padding_ratio
                    ], tf.float32)), name='molded_head_boxes')(pred_molded_head_boxes)

                # compute segbox loss
                if mode == 'training':
                    # Box loss
                    use_soft_l1_loss = options.get('use_soft_l1_loss', True)

                    def _l1_loss(box_deform1, box_deform2):
                        # (batch, num_heads, 4)
                        if use_soft_l1_loss:
                            return tf.reduce_mean(
                                tf.sqrt(tf.square(box_deform1 -
                                                  box_deform2) + K.epsilon()),
                                axis=[1, 2])
                        else:
                            return tf.reduce_mean(tf.abs(box_deform1 - box_deform2), axis=[1, 2])
                    box_loss = KL.Lambda(lambda xx: _l1_loss(xx[0], xx[1]))(
                        [input_gt_molded_head_boxes, pred_molded_head_boxes])
                    box_loss = KL.Lambda(
                        lambda xx: tf.where(tf.reshape(
                            xx[0] > 0, tf.shape(xx[1])), xx[1], tf.zeros_like(xx[1])),
                        name='box_loss')([
                            input_gt_molded_head_boxes_exist,
                            box_loss])

                    print('box_loss.shape={}, {}'.format(
                        box_loss.shape, box_loss._keras_shape))

        elif box_pred_method == 'gt_segbox':
            head_box_padding_ratio = options['head_box_padding_ratio']
            molded_head_boxes = KL.Lambda(lambda xx: tf.stop_gradient(
                xx + tf.constant([
                    - head_box_padding_ratio,
                    - head_box_padding_ratio,
                    head_box_padding_ratio,
                    head_box_padding_ratio
                ], tf.float32)), name='molded_head_boxes')(input_gt_molded_head_boxes)

        if 'fixed_head_box' in options:
            # replace certain molded_head_boxes with assigned ones
            fixed_head_box = options['fixed_head_box']
            fixed_head_box_flags = np.zeros((num_heads), np.uint8)
            fixed_head_box_values = np.zeros((num_heads, 4), np.float32)
            for head_id, box in fixed_head_box.items():
                fixed_head_box_flags[head_id] = 1
                fixed_head_box_values[head_id, :] = np.array(box, np.float32)
            print(f'fixed_head_box_flags={fixed_head_box_flags}')
            print(f'fixed_head_box_values={fixed_head_box_values}')

            fixed_head_box_flags = tf.tile(
                tf.expand_dims(tf.expand_dims(
                    tf.constant(fixed_head_box_flags), 0), -1),
                [tf.shape(molded_head_boxes)[0], 1, 4])
            fixed_head_box_values = tf.tile(
                tf.expand_dims(tf.constant(fixed_head_box_values), 0),
                [tf.shape(molded_head_boxes)[0], 1, 1])
            molded_head_boxes = KL.Lambda(lambda xx: tf.where(
                fixed_head_box_flags, fixed_head_box_values, xx))(molded_head_boxes)

        # visualize pts and boxes
        # with tf.name_scope('boxes_pts'):

        #     def _show_boxes_pts(im, boxes, pts=None):
        #         return visualize.tf_display_boxes_pts(
        #             im, boxes, pts, utils.MEAN_PIXEL)

        #     show_num = min(batch_size, 3)
        #     if box_pred_method == 'regress_landmark':
        #         label_pts = [('pred_molded_landmark68_pts',
        #                       pred_molded_landmark68_pts)]
        #         if mode == 'training':
        #             label_pts.append(
        #                 ('input_gt_molded_landmark68_pts', input_gt_molded_landmark68_pts))
        #         for label, pts in label_pts:
        #             plot_ims = []
        #             for k in range(show_num):
        #                 im = tfplot.ops.plot(_show_boxes_pts, [
        #                     input_molded_image[k, :, :, :],
        #                     molded_head_boxes[k, :, :],
        #                     pts[k, :, :]])
        #                 plot_ims.append(im)
        #             plot_ims = tf.stack(plot_ims, axis=0)
        #             tf.summary.image(
        #                 name=label, tensor=plot_ims)
        #     else:
        #         plot_ims = []
        #         for k in range(show_num):
        #             im = tfplot.ops.plot(_show_boxes_pts, [
        #                 input_molded_image[k, :, :, :],
        #                 molded_head_boxes[k, :, :]])
        #             plot_ims.append(im)
        #         plot_ims = tf.stack(plot_ims, axis=0)
        #         tf.summary.image(
        #             name='molded_head_boxes', tensor=plot_ims)

        # Construct Head Networks
        head_class_nums = [len(class_ids) for class_ids in heads]

        # ROI Pooling
        pool_size = options.get('pool_size', 56)
        deconv_num = options.get('deconv_num', 2)
        conv_num = options.get('conv_num', 1)

        molded_head_boxes = KL.Lambda(tf.stop_gradient)(molded_head_boxes)

        aligned = PyramidROIAlignAll(
            [pool_size, pool_size], name="roi_align_mask")(
                [molded_head_boxes] + mrcnn_feature_maps)
        # print(aligned._keras_shape)

        fg_masks = [None] * num_heads
        bg_masks = [None] * num_heads

        def _slice_lambda(index):
            return lambda xx: xx[:, index, :, :, :]

        head_mask_features = [None] * num_heads
        for i in range(num_heads):
            x = KL.Lambda(_slice_lambda(i))(aligned)

            for k in range(conv_num):
                x = KL.Conv2D(
                    256, (3, 3),
                    padding="same",
                    name=f"mrcnn_mask_conv{k+1}_{i}")(x)
                x = BatchNorm(axis=-1, name=f'mrcnn_mask_bn{k+1}_{i}')(x)
                x = KL.Activation('relu')(x)
                if dropout_rate > 0:
                    x = KL.Dropout(dropout_rate)(x)

            if deconv_num == 1:  # to be compatible with previous trained models
                x = KL.Conv2DTranspose(
                    256, (2, 2),
                    strides=2,
                    activation="relu",
                    name="mrcnn_mask_deconv_%d" % i)(x)
            else:
                for k in range(deconv_num):
                    x = KL.Conv2DTranspose(
                        256, (2, 2),
                        strides=2,
                        activation="relu",
                        name="mrcnn_mask_deconv%d_%d" % (k + 1, i))(x)
            # [batch, h, w, 256]
            head_mask_features[i] = x

        mask_feature_size = pool_size * 2**deconv_num

        for i in range(num_heads):
            x = head_mask_features[i]
            num_classes_this_head = head_class_nums[i]
            assert num_classes_this_head > 0

            x = KL.Conv2D(
                1 + num_classes_this_head, (1, 1), strides=1,
                name='mrcnn_mask_conv_last_%d' % i,
                activation='linear')(x)
            x = KL.Lambda(
                lambda xx: tf.nn.softmax(xx, dim=-1),
                name="mrcnn_fullmask_%d" % i)(x)

            # [batch, height, width, num_classes]
            # [batch, num_classes, height, width]
            fg_masks[i] = KL.Lambda(
                lambda xx: tf.transpose(xx[:, :, :, 1:], [0, 3, 1, 2]),
                name='mrcnn_fg_mask_%d' % i)(x)

            # [batch, height, width]
            bg_masks[i] = KL.Lambda(
                lambda xx: xx[:, :, :, 0], name='mrcnn_bg_mask_%d' % i)(x)

            print(fg_masks[i]._keras_shape, fg_masks[i].shape,
                  bg_masks[i]._keras_shape, bg_masks[i].shape)

        if len(fg_masks) > 1:
            mrcnn_fg_masks = KL.Lambda(
                lambda xx: tf.concat(xx, axis=1), name='mrcnn_fg_masks')(fg_masks)
        else:
            mrcnn_fg_masks = KL.Lambda(
                lambda xx: xx, name='mrcnn_fg_masks')(fg_masks[0])

        if len(bg_masks) > 1:
            mrcnn_bg_masks = KL.Lambda(
                lambda xx: tf.stack(xx, axis=1), name='mrcnn_bg_masks')(bg_masks)
        else:
            mrcnn_bg_masks = KL.Lambda(
                lambda xx: tf.expand_dims(xx, axis=1),
                name='mrcnn_bg_masks')(bg_masks[0])

        # [batch, num_masks+num_heads, height, width]
        mrcnn_masks = KL.Concatenate(
            axis=1, name='mrcnn_masks')([mrcnn_fg_masks, mrcnn_bg_masks])
        print('mrcnn_masks.shape={}, {}'.format(mrcnn_masks.shape,
                                                mrcnn_masks._keras_shape))

        def _tile_by_head_classes(data):
            tiled = [None] * num_masks
            for i, class_ids in enumerate(heads):
                for class_id in class_ids:
                    tiled[class_id] = data[:, i]
            assert None not in tiled
            return tf.stack(tiled, axis=1)

        # Unmold masks back to image view
        def _unmold_mask(masks, boxes):
            # masks: (batch, num_masks, h, w)
            # boxes: (batch, num_heads, 4)
            mask_h, mask_w = tf.shape(masks)[2], tf.shape(masks)[3]

            # (batch, num_masks, 4)
            boxes = _tile_by_head_classes(boxes)

            masks = tf.reshape(masks, (-1, mask_h, mask_w))
            boxes = tf.reshape(boxes, (-1, 4))

            unmolded_masks = inverse_box_crop(masks, boxes, [h, w])
            unmolded_masks = tf.reshape(unmolded_masks, (-1, num_masks, h, w))
            return unmolded_masks

        output_masks = KL.Lambda(
            lambda xx: _unmold_mask(xx[0], xx[1]),
            name='output_masks')([mrcnn_fg_masks, molded_head_boxes])
        print('output_masks.shape={}, {}'.format(
            output_masks.shape, output_masks._keras_shape))

        # if options.get('full_view_mask_loss', False):

        if mode == "training":
            head_mask_shape = [mask_feature_size, mask_feature_size]
            print('head_mask_shape={}'.format(head_mask_shape))

            # mask loss
            # extract target gt fg masks
            def _extract_gt_fg_batched(gt_masks, boxes):
                # gt_masks: [batch, num_masks, h, w]
                # boxes: [batch, num_heads, 4]

                # [batch * num_masks, h, w, 1]
                gt_masks = tf.reshape(gt_masks, [-1, h, w, 1])

                # [batch, num_masks, 4]
                boxes = _tile_by_head_classes(boxes)
                # [batch * num_masks, 4]
                boxes = tf.reshape(boxes, [-1, 4])

                # [batch * num_masks, mask_h, mask_w]
                target_masks = tf.image.crop_and_resize(
                    gt_masks, boxes, tf.range(tf.shape(gt_masks)[0]),
                    head_mask_shape)
                target_masks = tf.reshape(target_masks,
                                          [-1, num_masks] + head_mask_shape)
                return target_masks

            target_gt_fg_masks = KL.Lambda(
                lambda xx: _extract_gt_fg_batched(xx[0], xx[1]))(
                    [molded_gt_masks, molded_head_boxes])

            # extract target gt bg masks
            def _extract_gt_bg_batched(gt_masks, boxes):
                # gt_masks: [batch, num_masks, h, w]
                # boxes: [batch, num_heads, 4]

                gt_bg_masks = [None] * num_heads
                for i, class_ids in enumerate(heads):
                    gt_masks_this_head = [None] * len(class_ids)
                    for j, class_id in enumerate(class_ids):
                        # each of [batch, h, w]
                        gt_masks_this_head[j] = gt_masks[:, class_id, :, :]
                    # [batch, len(class_ids), h, w]
                    gt_masks_this_head = tf.stack(gt_masks_this_head, axis=1)
                    # [batch, h, w]
                    gt_bg_masks[i] = 1.0 - tf.reduce_max(
                        gt_masks_this_head, axis=1)

                # [batch, num_heads, h, w]
                gt_bg_masks = tf.stack(gt_bg_masks, axis=1)
                # [batch * num_heads, h, w, 1]
                gt_bg_masks = tf.reshape(gt_bg_masks, [-1, h, w, 1])

                # [batch * num_heads, 4]
                boxes = tf.reshape(boxes, [-1, 4])

                # [batch * num_heads, mask_h, mask_w]
                target_masks = tf.image.crop_and_resize(
                    gt_bg_masks, boxes, tf.range(tf.shape(gt_bg_masks)[0]),
                    head_mask_shape, extrapolation_value=1)  # !!!
                target_masks = tf.reshape(target_masks,
                                          [-1, num_heads] + head_mask_shape)
                return target_masks

            target_gt_bg_masks = KL.Lambda(
                lambda xx: _extract_gt_bg_batched(xx[0], xx[1]))(
                    [molded_gt_masks, molded_head_boxes])

            target_gt_masks = KL.Concatenate(
                axis=1, name='target_gt_masks')(
                    [target_gt_fg_masks, target_gt_bg_masks])
            print('target_gt_masks.shape={}, {}'.format(
                target_gt_masks.shape, target_gt_masks._keras_shape))

            mask_loss_im = KL.Lambda(
                lambda xx: K.binary_crossentropy(target=xx[0], output=xx[1]),
                name="mask_ls_im")([target_gt_masks, mrcnn_masks])
            print('mask_loss_im.shape: {} {}'.format(mask_loss_im._keras_shape,
                                                     mask_loss_im.shape))

            mask_loss_im_reduced = KL.Lambda(
                lambda xx: tf.reduce_mean(xx, axis=[2, 3]),
                name='mask_loss_im_reduced')(mask_loss_im)

            def _get_individual_losses(loss_im, name, index):
                return KL.Lambda(
                    lambda xx: tf.reduce_mean(xx[:, index], axis=[1, 2]),
                    name=name)(loss_im)

            # visualization
            with tf.name_scope('original_masks'):
                for i, class_ids in enumerate(heads):
                    for j, class_id in enumerate(class_ids):
                        name = head_label_names[i][j]
                        fg_target_pred_original_view = tf.expand_dims(tf.concat([
                            tf.cast(
                                input_gt_masks[:, class_id, :, :], tf.float32),
                            output_masks[:, class_id, :, :]], axis=-1), axis=-1)
                        tf.summary.image(
                            f'fg_target_pred_original_view_{i}_{name}',
                            fg_target_pred_original_view)

            with tf.name_scope('cropped_masks'):
                for i, class_ids in enumerate(heads):
                    for j, class_id in enumerate(class_ids):
                        name = head_label_names[i][j]
                        fg_target_pred_loss = tf.expand_dims(tf.concat([
                            target_gt_fg_masks[:, class_id, :, :],
                            mrcnn_fg_masks[:, class_id, :, :],
                            mask_loss_im[:, class_id]], axis=-1), axis=-1)
                        tf.summary.image(
                            f'fg_target_pred_loss_{name}', fg_target_pred_loss)
                    bg_target_pred_loss = tf.expand_dims(tf.concat([
                        target_gt_bg_masks[:, i, :, :],
                        mrcnn_bg_masks[:, i, :, :],
                        mask_loss_im[:, i + num_masks]], axis=-1), axis=-1)
                    tf.summary.image(
                        f'bg_target_pred_loss_{i}', bg_target_pred_loss)

            mask_loss = KL.Lambda(
                lambda xx: tf.reduce_mean(xx, axis=[1]))(mask_loss_im_reduced)
            mask_loss = KL.Lambda(
                lambda xx: tf.where(tf.reshape(
                    xx[0] > 0, tf.shape(xx[1])), xx[1], tf.zeros_like(xx[1])),
                name='mask_loss')([input_gt_masks_exist, mask_loss])
            print('mask_loss.shape={}, {}'.format(mask_loss.shape,
                                                  mask_loss._keras_shape))

            if box_pred_method == 'lbf_guided':
                inputs = [
                    input_molded_image_exist,
                    input_gt_masks_exist,
                    input_molded_lbf_landmark68_pts_exist,
                    input_molded_image,
                    input_gt_masks,
                    input_molded_lbf_landmark68_pts
                ]
                outputs = [mask_loss]
            elif box_pred_method == 'regress_landmark':
                inputs = [
                    input_molded_image_exist,
                    input_gt_masks_exist,
                    input_gt_molded_landmark68_pts_exist,
                    input_molded_image,
                    input_gt_masks,
                    input_gt_molded_landmark68_pts
                ]
                outputs = [mask_loss, landmark68_loss]
            elif box_pred_method == 'regress_segbox':
                inputs = [
                    input_molded_image_exist,
                    input_gt_masks_exist,
                    input_gt_molded_head_boxes_exist,
                    input_molded_image,
                    input_gt_masks,
                    input_gt_molded_head_boxes,
                ]
                outputs = [mask_loss, box_loss]
            elif box_pred_method == 'gt_segbox':
                inputs = [
                    input_molded_image_exist,
                    input_gt_masks_exist,
                    input_gt_molded_head_boxes_exist,
                    input_molded_image,
                    input_gt_masks,
                    input_gt_molded_head_boxes
                ]
                outputs = [mask_loss]
        else:
            if box_pred_method == 'lbf_guided':
                inputs = [
                    input_molded_image_exist,
                    input_molded_lbf_landmark68_pts_exist,
                    input_molded_image,
                    input_molded_lbf_landmark68_pts
                ]
                outputs = [
                    output_masks,
                    molded_head_boxes
                ]
            elif box_pred_method == 'regress_landmark':
                inputs = [
                    input_molded_image_exist,
                    input_molded_image
                ]
                outputs = [
                    output_masks,
                    molded_head_boxes,
                    pred_molded_landmark68_pts
                ]
            elif box_pred_method == 'regress_segbox':
                inputs = [
                    input_molded_image_exist,
                    input_molded_image
                ]
                outputs = [
                    output_masks,
                    molded_head_boxes
                ]
            elif box_pred_method == 'gt_segbox':
                inputs = [
                    input_molded_image_exist,
                    input_gt_molded_head_boxes_exist,
                    input_molded_image,
                    input_gt_molded_head_boxes
                ]
                outputs = [
                    output_masks,
                    molded_head_boxes
                ]
        return [inputs, outputs]
Beispiel #15
0
def pointnet2_cls_ssg(num_class, num_points, num_dim=3):
    '''
    input:  BxNx3
    output: Bxnum_class
    '''
    input = keras.Input((num_points, num_dim))  # (batch, num_points, num_dim)
    inp = input

    if num_dim > 3:
        l0_xyz = crop(2, 0, 3)(input)
        l0_points = crop(2, 3, num_dim)(input)
        use_feature = True
    else:
        l0_xyz = input
        l0_points = input  # useless
        # for the first stage, there is no high level feature, only coordinate
        use_feature = False

    l1_xyz, l1_points, _ = pointnet_sa_module(l0_xyz,
                                              l0_points,
                                              n_centroid=512,
                                              radius=0.2,
                                              n_samples=32,
                                              mlp=[64, 64, 128],
                                              bn=True,
                                              relu6=False,
                                              use_xyz=True,
                                              use_feature=use_feature,
                                              random_sample=False)

    l2_xyz, l2_points, _ = pointnet_sa_module(l1_xyz,
                                              l1_points,
                                              n_centroid=128,
                                              radius=0.4,
                                              n_samples=64,
                                              mlp=[128, 128, 256],
                                              bn=True,
                                              relu6=False,
                                              use_xyz=True,
                                              use_feature=True,
                                              random_sample=False)
    '''
    l3_xyz, l3_points, _ = pointnet_sa_module(l2_xyz, l2_points,
                                    n_centroid=32, radius=0.6,
                                    n_samples=32, mlp=[256,512,1024],
                                    bn=True, relu6=False, use_xyz=True,
                                    use_feature=True) 
    x = layers.GlobalMaxPooling1D()(l3_points)                                
    # at this stage, no sampling or grouping, use PointNet layer directly 
    # as Keras don't support None as input or output
    # the original implementation doesn't work here
    '''
    # try this instead
    x = l2_points
    x = layers.Reshape((-1, 1, 256))(x)
    x = mlp_layers(x, [256, 512, 1024])
    x = layers.GlobalMaxPooling2D()(x)

    # fullly connected layers
    # x = layers.Flatten()(x) # (Batch, :)
    x = fully_connected(x, 512, bn=True, relu6=False, activation=True)
    x = layers.Dropout(0.5)(x)
    x = fully_connected(x, 256, bn=True, relu6=False, activation=True)
    x = layers.Dropout(0.5)(x)
    x = fully_connected(x, num_class, bn=False,
                        activation=False)  # no BN nor ReLU here
    x = layers.Softmax()(x)
    return keras.models.Model(inputs=inp, outputs=x)
Beispiel #16
0
def trainModel(digitSizeID=0,
               toRuleID=0,
               layerCount=1,
               trainingSize=1,
               hiddenSize=128,
               epochSize=100,
               modelID=0):
    allData = datasets[digitSizeID][toRuleID]
    DIGITS = toDigitSize[digitSizeID]
    TARGETSIZE = toTargetSize[digitSizeID][toRuleID]
    QUERYLEN = DIGITS + 1 + DIGITS
    RNN = layers.LSTM
    HIDDEN_SIZE = hiddenSize
    BATCH_SIZE = 128
    DICT_SIZE = dictSizes[digitSizeID][toRuleID]

    print('Build model...')
    if modelID == 0:
        model = Sequential()
        model.add(RNN(HIDDEN_SIZE, input_shape=(QUERYLEN, DICT_SIZE)))
        model.add(layers.RepeatVector(TARGETSIZE))
        for i in range(0, layerCount):
            model.add(RNN(HIDDEN_SIZE, return_sequences=True))

        model.add(layers.TimeDistributed(layers.Dense(DICT_SIZE)))
        model.add(layers.Activation('softmax'))
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
        model.summary()
    elif modelID == 1:
        model = Sequential()
        model.add(
            RNN(HIDDEN_SIZE,
                input_shape=(QUERYLEN, DICT_SIZE),
                return_sequences=True))
        model.add(layers.Reshape((HIDDEN_SIZE, QUERYLEN)))
        model.add(layers.TimeDistributed(layers.Dense(TARGETSIZE)))
        model.add(layers.Reshape((TARGETSIZE, HIDDEN_SIZE)))
        for i in range(0, layerCount):
            model.add(RNN(HIDDEN_SIZE, return_sequences=True))

        model.add(layers.TimeDistributed(layers.Dense(DICT_SIZE)))
        model.add(layers.Activation('softmax'))
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
        model.summary()

    csvLog = []
    finalLoss = 0
    finalAccuracy = 0
    finalValAccuracy = 0
    configInfo = [
        toDigitSize[digitSizeID], ALLRULESID[toRuleID], layerCount,
        trainingSize, hiddenSize, epochSize, modelID
    ]

    trainingDataSubset = allData["train"][:math.floor(trainingSize *
                                                      trainingSize)]
    for i in range(0, epochSize):
        print('=' * 50)
        print('Iteration', i)
        history = model.fit(allData["train"][:, :QUERYLEN],
                            allData["train"][:, QUERYLEN:],
                            batch_size=BATCH_SIZE,
                            epochs=1,
                            validation_data=(allData["valid"][:, :QUERYLEN],
                                             allData["valid"][:, QUERYLEN:]))
        finalLoss, finalAccuracy, finalValAccuracy = history.history[
            "loss"], history.history["acc"], history.history["val_acc"]
        csvLog.append([
            *configInfo, history.history["loss"], history.history["acc"],
            history.history["val_acc"]
        ])

    with open("trainingLog.csv", 'a', newline='', encoding='utf-8') as csvfile:
        toWriter = csv.writer(csvfile)
        for r in csvLog:
            toWriter.writerow(r)

    testCorrect = 0
    finalTestAccuracy = 0
    testQuery = allData["test"][:, :QUERYLEN]
    preds = model.predict_classes(testQuery, verbose=0)
    testTargets = allData["test"][:, QUERYLEN:]

    def backToString(classes):
        return "".join([allData["oneHotMap"][c] for c in classes])

    for i in range(0, len(preds)):
        correct = backToString(
            [list(l).index(True) for l in list(testTargets[i])])
        guess = backToString(list(preds[i]))
        if correct == guess:
            testCorrect += 1
        if i < 5:
            query = backToString(
                [list(l).index(True) for l in list(testQuery[i])])
            print("Q: ", query, "; Prediction: ", guess, "; Answer: ", correct,
                  " (", correct == guess, ") ")
    finalTestAccuracy = testCorrect / len(preds)
    print("Final Test Accuracy is {}".format(finalTestAccuracy))
    resultAry = [
        *configInfo, finalLoss, finalAccuracy, finalValAccuracy,
        finalTestAccuracy
    ]
    with open("finalResults.csv", 'a', newline='',
              encoding='utf-8') as csvfile:
        toWriter = csv.writer(csvfile)
        toWriter.writerow(resultAry)
 def reshapeEasy(inp, target_shape):
     from keras import layers
     inputR = layers.Reshape(target_shape=target_shape)(inp)
     return inputR
Beispiel #18
0
def single_ae(
        enc_size,
        input_shape,
        name='single_ae',
        prefix=None,
        ae_type='dense',  # 'dense', or 'conv'
        conv_size=None,
        input_model=None,
        enc_lambda_layers=None,
        batch_norm=True,
        padding='same',
        activation=None,
        include_mu_shift_layer=False,
        do_vae=False):
    """single-layer Autoencoder (i.e. input - encoding - output"""

    # naming
    model_name = name
    if prefix is None:
        prefix = model_name

    if enc_lambda_layers is None:
        enc_lambda_layers = []

    # prepare input
    input_name = '%s_input' % prefix
    if input_model is None:
        assert input_shape is not None, 'input_shape of input_model is necessary'
        input_tensor = KL.Input(shape=input_shape, name=input_name)
        last_tensor = input_tensor
    else:
        input_tensor = input_model.input
        last_tensor = input_model.output
        input_shape = last_tensor.shape.as_list()[1:]
    input_nb_feats = last_tensor.shape.as_list()[-1]

    # prepare conv type based on input
    if ae_type == 'conv':
        ndims = len(input_shape) - 1
        convL = getattr(KL, 'Conv%dD' % ndims)
        assert conv_size is not None, 'with conv ae, need conv_size'
    conv_kwargs = {'padding': padding, 'activation': activation}

    # if want to go through a dense layer in the middle of the U, need to:
    # - flatten last layer if not flat
    # - do dense encoding and decoding
    # - unflatten (rehsape spatially) at end
    if ae_type == 'dense' and len(input_shape) > 1:
        name = '%s_ae_%s_down_flat' % (prefix, ae_type)
        last_tensor = KL.Flatten(name=name)(last_tensor)

    # recall this layer
    pre_enc_layer = last_tensor

    # encoding layer
    if ae_type == 'dense':
        assert len(
            enc_size) == 1, "enc_size should be of length 1 for dense layer"

        enc_size_str = ''.join(['%d_' % d for d in enc_size])[:-1]
        name = '%s_ae_mu_enc_dense_%s' % (prefix, enc_size_str)
        last_tensor = KL.Dense(enc_size[0], name=name)(pre_enc_layer)

    else:  # convolution
        # convolve then resize. enc_size should be [nb_dim1, nb_dim2, ..., nb_feats]
        assert len(enc_size) == len(input_shape), \
            "encoding size does not match input shape %d %d" % (len(enc_size), len(input_shape))

        if list(enc_size)[:-1] != list(input_shape)[:-1] and \
            all([f is not None for f in input_shape[:-1]]) and \
            all([f is not None for f in enc_size[:-1]]):

            assert len(
                enc_size
            ) - 1 == 2, "Sorry, I have not yet implemented non-2D resizing -- need to check out interpn!"
            name = '%s_ae_mu_enc_conv' % (prefix)
            last_tensor = convL(enc_size[-1],
                                conv_size,
                                name=name,
                                **conv_kwargs)(pre_enc_layer)

            name = '%s_ae_mu_enc' % (prefix)
            resize_fn = lambda x: tf.image.resize_bilinear(x, enc_size[:-1])
            last_tensor = KL.Lambda(resize_fn, name=name)(last_tensor)

        elif enc_size[
                -1] is None:  # convolutional, but won't tell us bottleneck
            name = '%s_ae_mu_enc' % (prefix)
            last_tensor = KL.Lambda(lambda x: x, name=name)(pre_enc_layer)

        else:
            name = '%s_ae_mu_enc' % (prefix)
            last_tensor = convL(enc_size[-1],
                                conv_size,
                                name=name,
                                **conv_kwargs)(pre_enc_layer)

    if include_mu_shift_layer:
        # shift
        name = '%s_ae_mu_shift' % (prefix)
        last_tensor = layers.LocalBiasLayer(name=name)(last_tensor)

    # encoding clean-up layers
    for layer_fcn in enc_lambda_layers:
        lambda_name = layer_fcn.__name__
        name = '%s_ae_mu_%s' % (prefix, lambda_name)
        last_tensor = KL.Lambda(layer_fcn, name=name)(last_tensor)

    if batch_norm is not None:
        name = '%s_ae_mu_bn' % (prefix)
        last_tensor = KL.BatchNormalization(axis=batch_norm,
                                            name=name)(last_tensor)

    # have a simple layer that does nothing to have a clear name before sampling
    name = '%s_ae_mu' % (prefix)
    last_tensor = KL.Lambda(lambda x: x, name=name)(last_tensor)

    # if doing variational AE, will need the sigma layer as well.
    if do_vae:
        mu_tensor = last_tensor

        # encoding layer
        if ae_type == 'dense':
            name = '%s_ae_sigma_enc_dense_%s' % (prefix, enc_size_str)
            last_tensor = KL.Dense(enc_size[0], name=name)(pre_enc_layer)

        else:
            if list(enc_size)[:-1] != list(input_shape)[:-1] and \
                all([f is not None for f in input_shape[:-1]]) and \
                all([f is not None for f in enc_size[:-1]]):

                assert len(
                    enc_size
                ) - 1 == 2, "Sorry, I have not yet implemented non-2D resizing..."
                name = '%s_ae_sigma_enc_conv' % (prefix)
                last_tensor = convL(enc_size[-1],
                                    conv_size,
                                    name=name,
                                    **conv_kwargs)(pre_enc_layer)

                name = '%s_ae_sigma_enc' % (prefix)
                resize_fn = lambda x: tf.image.resize_bilinear(
                    x, enc_size[:-1])
                last_tensor = KL.Lambda(resize_fn, name=name)(last_tensor)

            elif enc_size[
                    -1] is None:  # convolutional, but won't tell us bottleneck
                name = '%s_ae_sigma_enc' % (prefix)
                last_tensor = convL(pre_enc_layer.shape.as_list()[-1],
                                    conv_size,
                                    name=name,
                                    **conv_kwargs)(pre_enc_layer)
                # cannot use lambda, then mu and sigma will be same layer.
                # last_tensor = KL.Lambda(lambda x: x, name=name)(pre_enc_layer)

            else:
                name = '%s_ae_sigma_enc' % (prefix)
                last_tensor = convL(enc_size[-1],
                                    conv_size,
                                    name=name,
                                    **conv_kwargs)(pre_enc_layer)

        # encoding clean-up layers
        for layer_fcn in enc_lambda_layers:
            lambda_name = layer_fcn.__name__
            name = '%s_ae_sigma_%s' % (prefix, lambda_name)
            last_tensor = KL.Lambda(layer_fcn, name=name)(last_tensor)

        if batch_norm is not None:
            name = '%s_ae_sigma_bn' % (prefix)
            last_tensor = KL.BatchNormalization(axis=batch_norm,
                                                name=name)(last_tensor)

        # have a simple layer that does nothing to have a clear name before sampling
        name = '%s_ae_sigma' % (prefix)
        last_tensor = KL.Lambda(lambda x: x, name=name)(last_tensor)

        logvar_tensor = last_tensor

        # VAE sampling
        sampler = _VAESample().sample_z

        name = '%s_ae_sample' % (prefix)
        last_tensor = KL.Lambda(sampler, name=name)([mu_tensor, logvar_tensor])

    if include_mu_shift_layer:
        # shift
        name = '%s_ae_sample_shift' % (prefix)
        last_tensor = layers.LocalBiasLayer(name=name)(last_tensor)

    # decoding layer
    if ae_type == 'dense':
        name = '%s_ae_%s_dec_flat_%s' % (prefix, ae_type, enc_size_str)
        last_tensor = KL.Dense(np.prod(input_shape), name=name)(last_tensor)

        # unflatten if dense method
        if len(input_shape) > 1:
            name = '%s_ae_%s_dec' % (prefix, ae_type)
            last_tensor = KL.Reshape(input_shape, name=name)(last_tensor)

    else:

        if list(enc_size)[:-1] != list(input_shape)[:-1] and \
            all([f is not None for f in input_shape[:-1]]) and \
            all([f is not None for f in enc_size[:-1]]):

            name = '%s_ae_mu_dec' % (prefix)
            resize_fn = lambda x: tf.image.resize_bilinear(x, input_shape[:-1])
            last_tensor = KL.Lambda(resize_fn, name=name)(last_tensor)

        name = '%s_ae_%s_dec' % (prefix, ae_type)
        last_tensor = convL(input_nb_feats,
                            conv_size,
                            name=name,
                            **conv_kwargs)(last_tensor)

    if batch_norm is not None:
        name = '%s_bn_ae_%s_dec' % (prefix, ae_type)
        last_tensor = KL.BatchNormalization(axis=batch_norm,
                                            name=name)(last_tensor)

    # create the model and retun
    model = Model(inputs=input_tensor, outputs=[last_tensor], name=model_name)
    return model
Beispiel #19
0
def u_net_model(init_shape, final_size, lr_rate, req_result):
    x_input = layers.Input(init_shape)
    # Currently you have a 720p set of images. Let's rescale
    x_rescale = tf.keras.layers.experimental.preprocessing.Rescaling(
        scale=1. / 255)(x_input)
    skip_5 = layers.Conv2D(filters=32, kernel_size=(1, 1),
                           strides=1)(x_rescale)

    # ENCODING
    # For the Main Path, we have to go from 720p to 360.
    x = residual_block(x_rescale, filters=[32, 32, 32], f=3, s=2)
    skip_4 = x
    # From 360 we need to go again to 180
    x = residual_block(x, filters=[32, 32, 32], f=3, s=2)
    skip_3 = x
    # From 180 to 90
    x = residual_block(x, filters=[32, 32, 32], f=3, s=2)
    skip_2 = x
    # From 90 to 45
    x = residual_block(x, filters=[32, 32, 32], f=3, s=2)
    skip_1 = x
    # From 45 to 9
    x = residual_block(x, filters=[16, 16, 16], f=3, s=5)
    skip_0 = x

    # FLATTEN AND DENSE LAYERS
    x = layers.Flatten()(x)
    x = layers.Dense(64)(x)
    x = layers.Dense(36)(x)

    # DECODING
    # Currently the shape is a flat 36
    x = layers.Reshape(target_shape=(9, 4, 1))(x)
    x = residual_block(x, filters=[64, 64, 64], f=1, s=1)
    x = layers.Reshape(target_shape=(9, 16, 16))(x)
    x = layers.Add()([x, skip_0])
    x = layers.Activation('relu')(x)
    # Now from 9x16x16 we need to keep up-scaling using transposed convolution
    x = trans_conv_block(x, skip_1, s=5)
    # From 45x80 to 90x160
    x = trans_conv_block(x, skip_2, s=2)
    # From 90x160 to 180x320
    x = trans_conv_block(x, skip_3, s=2)
    # From 180x320 to 360x640
    x = trans_conv_block(x, skip_4, s=2)
    # From 360x640 to 720x1280
    x = trans_conv_block(x, skip_5, s=2)

    # GOING BEYOND RECONSTRUCTION
    x_out = get_output(x, req_result)
    if req_result > 3:
        x_out = tf.image.resize(x_out,
                                size=final_size,
                                preserve_aspect_ratio=True)

    x_ups = tf.image.resize(x_rescale,
                            size=final_size,
                            method=tf.image.ResizeMethod.BICUBIC,
                            preserve_aspect_ratio=True)
    x_out = layers.Add(dtype='float32')([x_out, x_ups])
    x_out = layers.Activation('relu', dtype='float32')(x_out)

    # Compile and view summary
    model = tf.keras.Model(inputs=x_input, outputs=x_out)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_rate),
                  loss=tf.keras.losses.MeanSquaredError())
    model.summary()
    return model
Beispiel #20
0
def design_dnn(nb_features,
               input_shape,
               nb_levels,
               conv_size,
               nb_labels,
               feat_mult=1,
               pool_size=2,
               padding='same',
               activation='elu',
               final_layer='dense-sigmoid',
               conv_dropout=0,
               conv_maxnorm=0,
               nb_input_features=1,
               batch_norm=False,
               name=None,
               prefix=None,
               use_strided_convolution_maxpool=True,
               nb_conv_per_level=2):
    """
    "deep" cnn with dense or global max pooling layer @ end...

    Could use sequential...
    """

    model_name = name
    if model_name is None:
        model_name = 'model_1'
    if prefix is None:
        prefix = model_name

    ndims = len(input_shape)
    input_shape = tuple(input_shape)

    convL = getattr(KL, 'Conv%dD' % ndims)
    maxpool = KL.MaxPooling3D if len(input_shape) == 3 else KL.MaxPooling2D
    if isinstance(pool_size, int):
        pool_size = (pool_size, ) * ndims

    # kwargs for the convolution layer
    conv_kwargs = {'padding': padding, 'activation': activation}
    if conv_maxnorm > 0:
        conv_kwargs['kernel_constraint'] = maxnorm(conv_maxnorm)

    # initialize a dictionary
    enc_tensors = {}

    # first layer: input
    name = '%s_input' % prefix
    enc_tensors[name] = KL.Input(shape=input_shape + (nb_input_features, ),
                                 name=name)
    last_tensor = enc_tensors[name]

    # down arm:
    # add nb_levels of conv + ReLu + conv + ReLu. Pool after each of first nb_levels - 1 layers
    for level in range(nb_levels):
        for conv in range(nb_conv_per_level):
            if conv_dropout > 0:
                name = '%s_dropout_%d_%d' % (prefix, level, conv)
                enc_tensors[name] = KL.Dropout(conv_dropout)(last_tensor)
                last_tensor = enc_tensors[name]

            name = '%s_conv_%d_%d' % (prefix, level, conv)
            nb_lvl_feats = np.round(nb_features * feat_mult**level).astype(int)
            enc_tensors[name] = convL(nb_lvl_feats,
                                      conv_size,
                                      **conv_kwargs,
                                      name=name)(last_tensor)
            last_tensor = enc_tensors[name]

        # max pool
        if use_strided_convolution_maxpool:
            name = '%s_strided_conv_%d' % (prefix, level)
            enc_tensors[name] = convL(nb_lvl_feats,
                                      pool_size,
                                      **conv_kwargs,
                                      name=name)(last_tensor)
            last_tensor = enc_tensors[name]
        else:
            name = '%s_maxpool_%d' % (prefix, level)
            enc_tensors[name] = maxpool(pool_size=pool_size,
                                        name=name,
                                        padding=padding)(last_tensor)
            last_tensor = enc_tensors[name]

    # dense layer
    if final_layer == 'dense-sigmoid':

        name = "%s_flatten" % prefix
        enc_tensors[name] = KL.Flatten(name=name)(last_tensor)
        last_tensor = enc_tensors[name]

        name = '%s_dense' % prefix
        enc_tensors[name] = KL.Dense(1, name=name,
                                     activation="sigmoid")(last_tensor)

    elif final_layer == 'dense-tanh':

        name = "%s_flatten" % prefix
        enc_tensors[name] = KL.Flatten(name=name)(last_tensor)
        last_tensor = enc_tensors[name]

        name = '%s_dense' % prefix
        enc_tensors[name] = KL.Dense(1, name=name)(last_tensor)
        last_tensor = enc_tensors[name]

        # Omittting BatchNorm for now, it seems to have a cpu vs gpu problem
        # https://github.com/tensorflow/tensorflow/pull/8906
        # https://github.com/fchollet/keras/issues/5802
        # name = '%s_%s_bn' % prefix
        # enc_tensors[name] = KL.BatchNormalization(axis=batch_norm, name=name)(last_tensor)
        # last_tensor = enc_tensors[name]

        name = '%s_%s_tanh' % prefix
        enc_tensors[name] = KL.Activation(activation="tanh",
                                          name=name)(last_tensor)

    elif final_layer == 'dense-softmax':

        name = "%s_flatten" % prefix
        enc_tensors[name] = KL.Flatten(name=name)(last_tensor)
        last_tensor = enc_tensors[name]

        name = '%s_dense' % prefix
        enc_tensors[name] = KL.Dense(nb_labels,
                                     name=name,
                                     activation="softmax")(last_tensor)

    # global max pooling layer
    elif final_layer == 'myglobalmaxpooling':

        name = '%s_batch_norm' % prefix
        enc_tensors[name] = KL.BatchNormalization(axis=batch_norm,
                                                  name=name)(last_tensor)
        last_tensor = enc_tensors[name]

        name = '%s_global_max_pool' % prefix
        enc_tensors[name] = KL.Lambda(_global_max_nd, name=name)(last_tensor)
        last_tensor = enc_tensors[name]

        name = '%s_global_max_pool_reshape' % prefix
        enc_tensors[name] = KL.Reshape((1, 1), name=name)(last_tensor)
        last_tensor = enc_tensors[name]

        # cannot do activation in lambda layer. Could code inside, but will do extra lyaer
        name = '%s_global_max_pool_sigmoid' % prefix
        enc_tensors[name] = KL.Conv1D(1,
                                      1,
                                      name=name,
                                      activation="sigmoid",
                                      use_bias=True)(last_tensor)

    elif final_layer == 'globalmaxpooling':

        name = '%s_conv_to_featmaps' % prefix
        enc_tensors[name] = KL.Conv3D(2, 1, name=name,
                                      activation="relu")(last_tensor)
        last_tensor = enc_tensors[name]

        name = '%s_global_max_pool' % prefix
        enc_tensors[name] = KL.GlobalMaxPooling3D(name=name)(last_tensor)
        last_tensor = enc_tensors[name]

        # cannot do activation in lambda layer. Could code inside, but will do extra lyaer
        name = '%s_global_max_pool_softmax' % prefix
        enc_tensors[name] = KL.Activation('softmax', name=name)(last_tensor)

    last_tensor = enc_tensors[name]

    # create the model
    model = Model(inputs=[enc_tensors['%s_input' % prefix]],
                  outputs=[last_tensor],
                  name=model_name)
    return model
Beispiel #21
0
    # naming the model
    model_name = name
    if prefix is None:
        prefix = model_name

    # first layer: input
    name = '%s_input' % prefix
    if input_model is None:
        input_tensor = KL.Input(shape=input_shape, name=name)
        last_tensor = input_tensor
    else:
        input_tensor = input_model.inputs
        last_tensor = input_model.outputs
        if isinstance(last_tensor, list):
            last_tensor = last_tensor[0]
        last_tensor = KL.Reshape(input_shape, name='predicted_output')(last_tensor)

    # get deformed labels
    n_labels = input_shape[-1]
    if validation_on_real_images:
        labels_gt = KL.Input(shape=input_shape[:-1]+[1], name='labels_input')
        input_tensor = [input_tensor[0], labels_gt]
    else:
        labels_gt = input_model.get_layer('labels_out').output

    # convert gt labels to 0...N-1 values
    n_labels = segmentation_label_list.shape[0]
    _, lut = utils.rearrange_label_list(segmentation_label_list)
    labels_gt = KL.Lambda(lambda x: tf.gather(tf.convert_to_tensor(lut, dtype='int32'),
                                              tf.cast(x, dtype='int32')), name='metric_convert_labels')(labels_gt)
Beispiel #22
0
                                  activation='relu'))(input_OD)

shape_before_Maxpool = K.int_shape(x)

x = TimeDistributed(
    layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='same'))(x)
x = TimeDistributed(
    layers.Conv2D(32, 6, padding='same', activation='relu', strides=(2, 2)))(x)
x = TimeDistributed(
    layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='same'))(x)
x = TimeDistributed(layers.Conv2D(4, 6, padding='same', activation='relu'))(x)

x = TimeDistributed(layers.Flatten())(x)
encoder_bef_reshape = layers.Dense(encoding_dim, activation='relu')(x)
encoder_output = layers.Reshape(
    (encoding_dim, num_cell, 1),
    input_shape=(num_cell, encoding_dim))(encoder_bef_reshape)

# input time information
time_input = layers.Input(shape=(1, num_cell, 1), name='time_input')
concat = layers.concatenate([encoder_output, time_input], axis=1)

output_size = encoding_dim

# predicting part
X = layers.Conv2D(64, 2, padding='same', activation='relu')(concat)

X = layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')(X)

X = layers.Conv2D(32, 2, padding='same', activation='relu')(X)
X = layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')(X)
Beispiel #23
0
x = layers.Dense(32, activation='relu')(x)

z_mean = layers.Dense(latent_dim, name='z_mean')(x)
z_log_var = layers.Dense(latent_dim, name='z_log_var')(x)

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.)
    return z_mean + K.exp(z_log_var) * epsilon

z = layers.Lambda(sampling)([z_mean, z_log_var])

decoder_input = layers.Input(K.int_shape(z)[1:])

x = layers.Dense(np.prod(shape_before_flattening[1:]), activation='relu')(decoder_input)
x = layers.Reshape(shape_before_flattening[1:])(x)
x = layers.Conv2DTranspose(32, 3, padding='same', activation='relu', strides=(2, 2))(x)
x = layers.Conv2D(1, 3, padding='same', activation='sigmoid')(x)

decoder = Model(decoder_input, x)
z_decoded = decoder(z)

class CustomVariationalLayer(keras.layers.Layer):
    def vae_loss(self, x, z_decoded):
        x = K.flatten(x)
        z_decoded = K.flatten(z_decoded)
        xent_loss = keras.metrics.binary_crossentropy(x, z_decoded)
        kl_loss = -5e-4 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
        return K.mean(xent_loss + kl_loss)
    
    def call(self, inputs):
Beispiel #24
0
def CapsNet(input_shape, n_class, routings):
    """
    Defining the CapsNet
    :param input_shape: data shape, 3d, [width, height, channels]
    :param n_class: number of classes
    :param routings: number of routing iterations
    :return: Two Keras Models, the first one used for training, and the second one for evaluation.
    """
    x = layers.Input(shape=input_shape)
    conv1 = layers.Conv2D(filters=64,
                          kernel_size=3,
                          strides=1,
                          padding='valid',
                          activation='relu',
                          name='conv1')(x)
    conv2 = layers.Conv2D(filters=128,
                          kernel_size=3,
                          strides=1,
                          padding='valid',
                          activation='relu',
                          name='conv2')(conv1)
    conv3 = layers.Conv2D(filters=256,
                          kernel_size=3,
                          strides=2,
                          padding='valid',
                          activation='relu',
                          name='conv3')(conv2)
    primarycaps = PrimaryCap(conv3,
                             dim_capsule=8,
                             n_channels=32,
                             kernel_size=9,
                             strides=2,
                             padding='valid')
    digitcaps = CapsuleLayer(num_capsule=n_class,
                             dim_capsule=16,
                             routings=routings,
                             channels=32,
                             name='digitcaps')(primarycaps)
    out_caps = Length(name='capsnet')(digitcaps)
    """
    Decoder Network
    """
    y = layers.Input(shape=(n_class, ))
    masked_by_y = Mask()([digitcaps, y])
    masked = Mask()(digitcaps)

    decoder = models.Sequential(name='decoder')
    decoder.add(
        Dense(input_dim=16 * n_class, activation="relu",
              output_dim=7 * 7 * 32))
    decoder.add(Reshape((7, 7, 32)))
    decoder.add(BatchNormalization(momentum=0.8))
    decoder.add(
        layers.Deconvolution2D(32,
                               3,
                               3,
                               subsample=(1, 1),
                               border_mode='same',
                               activation="relu"))
    decoder.add(
        layers.Deconvolution2D(16,
                               3,
                               3,
                               subsample=(2, 2),
                               border_mode='same',
                               activation="relu"))
    decoder.add(
        layers.Deconvolution2D(8,
                               3,
                               3,
                               subsample=(2, 2),
                               border_mode='same',
                               activation="relu"))
    decoder.add(
        layers.Deconvolution2D(4,
                               3,
                               3,
                               subsample=(1, 1),
                               border_mode='same',
                               activation="relu"))
    decoder.add(
        layers.Deconvolution2D(1,
                               3,
                               3,
                               subsample=(1, 1),
                               border_mode='same',
                               activation="sigmoid"))
    decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon'))
    """
    Models for training and evaluation (prediction)
    """
    train_model = models.Model([x, y], [out_caps, decoder(masked_by_y)])
    eval_model = models.Model(x, [out_caps, decoder(masked)])

    return train_model, eval_model
Beispiel #25
0
generatorInput = keras.Input(shape=(latentDim, ))

x = generatorInput

x = layers.Dropout(0.2)(x)

x = layers.Dense(width * height)(x)
x = layers.LeakyReLU()(x)

x = layers.Dropout(0.2)(x)

x = layers.Dense(width * height)(x)
x = layers.LeakyReLU()(x)

x = layers.Reshape((height, width))(x)

x = layers.LSTM(width, return_sequences=True)(x)
x = layers.LeakyReLU()(x)

x = layers.LSTM(width, return_sequences=True)(x)
x = layers.LeakyReLU()(x)

x = layers.LSTM(width, return_sequences=True)(x)
x = layers.LeakyReLU()(x)

x = layers.LSTM(width, return_sequences=True)(x)
x = layers.LeakyReLU()(x)

x = layers.Reshape((200, 1))(x)
import os
from keras.datasets import mnist
from keras.preprocessing import image

# region GAN generator network
latent_dim = 32
height = 28
width = 28
channels = 1

channel_feature_map = 18
generator_input = keras.Input(shape=(latent_dim, ))
x = layers.Dense(channel_feature_map * (width // 2) *
                 (height // 2))(generator_input)
x = layers.LeakyReLU()(x)
x = layers.Reshape((width // 2, height // 2, channel_feature_map))(x)

x = layers.Conv2D(channel_feature_map * 2, 5, padding='same')(x)
x = layers.LeakyReLU()(x)

# upsample to width * height
x = layers.Conv2DTranspose(channel_feature_map * 2,
                           4,
                           strides=2,
                           padding='same')(x)
x = layers.LeakyReLU()(x)

x = layers.Conv2D(channel_feature_map * 2, 5, padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(channel_feature_map * 2, 5, padding='same')(x)
x = layers.LeakyReLU()(x)
def model_ContextWeighted(p, embedding_matrix, max_sent_len, n_out):
    print("Parameters:", p)

    # Take sentence encoded as indices and convert it to embeddings
    sentence_input = layers.Input(shape=(max_sent_len,), dtype='int32', name='sentence_input')
    # Repeat the input N times for each edge
    x = layers.RepeatVector(MAX_EDGES_PER_GRAPH)(sentence_input)
    word_embeddings = layers.wrappers.TimeDistributed(layers.Embedding(output_dim=embedding_matrix.shape[1], input_dim=embedding_matrix.shape[0],
                                                                       input_length=max_sent_len, weights=[embedding_matrix],
                                                                       mask_zero=True, trainable=False))(x)
    word_embeddings = layers.Dropout(p['dropout1'])(word_embeddings)

    # Take token markers that identify entity positions, convert to position embeddings
    entity_markers = layers.Input(shape=(MAX_EDGES_PER_GRAPH, max_sent_len,), dtype='int8', name='entity_markers')
    pos_embeddings = layers.wrappers.TimeDistributed(layers.Embedding(output_dim=p['position_emb'],
                                                                      input_dim=POSITION_VOCAB_SIZE, input_length=max_sent_len,
                                                                      mask_zero=True, embeddings_regularizer = regularizers.l2(),
                                                                      trainable=True))(entity_markers)

    # Merge word and position embeddings and apply the specified amount of RNN layers
    x = layers.concatenate([word_embeddings, pos_embeddings])
    for i in range(p["rnn1_layers"]-1):
        lstm_layer = layers.LSTM(p['units1'], return_sequences=True)
        if p['bidirectional']:
            lstm_layer = layers.Bidirectional(lstm_layer)
        x = layers.wrappers.TimeDistributed(lstm_layer)(x)
    lstm_layer = layers.LSTM(p['units1'], return_sequences=False)
    if p['bidirectional']:
        lstm_layer = layers.Bidirectional(lstm_layer)
    sentence_matrix = layers.wrappers.TimeDistributed(lstm_layer)(x)

    ### Attention over ghosts ###
    layers_to_concat = []
    num_units = p['units1'] * (2 if p['bidirectional'] else 1)
    for i in range(MAX_EDGES_PER_GRAPH):
        # Compute a memory vector for the target entity pair
        sentence_vector = layers.Lambda(lambda l: l[:, i], output_shape=(num_units,))(sentence_matrix)
        target_sentence_memory = layers.Dense(num_units,
                                              activation="linear", use_bias=False)(sentence_vector)
        if i == 0:
            context_vectors = layers.Lambda(lambda l: l[:, i+1:],
                                            output_shape=(MAX_EDGES_PER_GRAPH-1, num_units))(sentence_matrix)
        elif i == MAX_EDGES_PER_GRAPH - 1:
            context_vectors = layers.Lambda(lambda l: l[:, :i],
                                            output_shape=(MAX_EDGES_PER_GRAPH-1, num_units))(sentence_matrix)
        else:
            context_vectors = layers.Lambda(lambda l: K.concatenate([l[:, :i], l[:, i+1:]], axis=1),
                                            output_shape=(MAX_EDGES_PER_GRAPH-1, num_units))(sentence_matrix)
        # Compute the score between each memory and the memory of the target entity pair
        sentence_scores = layers.Lambda(lambda inputs: K.batch_dot(inputs[0],
                                                                       inputs[1], axes=(1, 2)),
                                       output_shape=(MAX_EDGES_PER_GRAPH,))([target_sentence_memory, context_vectors])
        sentence_scores = layers.Activation('softmax')(sentence_scores)

        # Compute the final vector by taking the weighted sum of context vectors and the target entity vector
        context_vector = layers.Lambda(lambda inputs: K.batch_dot(inputs[0], inputs[1], axes=(1, 1)),
                                      output_shape=(num_units,))([context_vectors, sentence_scores])
        edge_vector = layers.concatenate([sentence_vector, context_vector])
        edge_vector = layers.Reshape((1, num_units * 2))(edge_vector)
        layers_to_concat.append(edge_vector)

    edge_vectors = layers.concatenate(layers_to_concat, axis=1)

    # Apply softmax
    edge_vectors = layers.Dropout(p['dropout1'])(edge_vectors)
    main_output = layers.wrappers.TimeDistributed(layers.Dense(n_out, activation="softmax", name='main_output'))(edge_vectors)

    model = models.Model(inputs=[sentence_input, entity_markers], outputs=[main_output])
    optimizer = optimizers.Adam(lr=0.001)
    model.compile(optimizer=optimizer, loss=masked_categorical_crossentropy, metrics=['accuracy'])

    return model
Beispiel #28
0
def validate_rnn_self_text_self_cross(rnn_speech, rnn_text, train_y,
                                      hidden_lstm_speech, hidden_con,
                                      hidden_lstm_text, hidden_dim, cw, val_sp,
                                      bat_size, filename):
    ##### Speech BiLSTM-SA
    speech_input = Input(shape=(len(rnn_speech[0]), len(rnn_speech[0][0])),
                         dtype='float32')
    speech_layer = Bidirectional(
        LSTM(hidden_lstm_speech, return_sequences=True))(speech_input)
    speech_att = Dense(hidden_con, activation='tanh')(speech_layer)
    speech_att_source = np.zeros((len(rnn_speech), hidden_con))
    speech_att_input = Input(shape=(hidden_con, ), dtype='float32')
    speech_att_vec = Dense(hidden_con, activation='relu')(speech_att_input)
    speech_att_vec = Lambda(lambda x: K.batch_dot(*x, axes=(1, 2)))(
        [speech_att_vec, speech_att])
    ##### Text BiLSTM-SA
    text_input = Input(shape=(len(rnn_text[0]), len(rnn_text[0][0])),
                       dtype='float32')
    text_layer = Bidirectional(LSTM(hidden_lstm_text,
                                    return_sequences=True))(text_input)
    text_att = Dense(hidden_con, activation='tanh')(text_layer)
    text_att_source = np.zeros((len(rnn_text), hidden_con))
    text_att_input = Input(shape=(hidden_con, ), dtype='float32')
    text_att_vec = Dense(hidden_con, activation='relu')(text_att_input)
    text_att_vec = Lambda(lambda x: K.batch_dot(*x, axes=(1, 2)))(
        [text_att_vec, text_att])
    #####
    speech_att_vec = Dense(hidden_con, activation='softmax')(speech_att_vec)
    text_att_vec = Dense(hidden_con, activation='softmax')(text_att_vec)
    #att_vec          = layers.concatenate([speech_att_vec, text_att_vec])
    cross_speech_att_vec = Dense(len(rnn_speech[0]),
                                 activation='softmax')(text_att_vec)
    cross_text_att_vec = Dense(len(rnn_text[0]),
                               activation='softmax')(speech_att_vec)
    #####
    cross_speech_att_vec = layers.Reshape(
        (len(rnn_speech[0]), 1))(cross_speech_att_vec)
    speech_output = layers.multiply([cross_speech_att_vec, speech_layer])
    speech_output = Lambda(lambda x: K.sum(x, axis=1))(speech_output)
    speech_output = Dense(hidden_dim, activation='relu')(speech_output)
    #####
    cross_text_att_vec = layers.Reshape(
        (len(rnn_text[0]), 1))(cross_text_att_vec)
    text_output = layers.multiply([cross_text_att_vec, text_layer])
    text_output = Lambda(lambda x: K.sum(x, axis=1))(text_output)
    text_output = Dense(hidden_dim, activation='relu')(text_output)
    ##### Total output
    output = layers.concatenate([speech_output, text_output])
    output = Dense(hidden_dim, activation='relu')(output)
    output = Dropout(0.3)(output)
    output = Dense(hidden_dim, activation='relu')(output)
    output = Dropout(0.3)(output)
    main_output = Dense(int(max(train_y) + 1), activation='softmax')(output)
    model = Sequential()
    #####
    model = Model(
        inputs=[speech_input, speech_att_input, text_input, text_att_input],
        outputs=[main_output])
    model.compile(optimizer=adam_half,
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    filepath = filename + "-{epoch:02d}-{val_acc:.4f}.hdf5"
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_acc',
                                 verbose=1,
                                 mode='max')
    #####
    callbacks_list = [metricsf1macro_4input, checkpoint]
    model.summary()
    #####
    model.fit([rnn_speech, speech_att_source, rnn_text, text_att_source],
              train_y,
              validation_split=val_sp,
              epochs=50,
              batch_size=bat_size,
              callbacks=callbacks_list,
              class_weight=cw)
    def __init__(self, input_tensor, encoder, is_training, reuse):
        net = input_tensor
        with tf.variable_scope('Decoder'):
            # Layer 1 Up: Deconvolutional capsules, skip connection, convolutional capsules
            net = capsule_layers.DeconvCapsuleLayer(kernel_size=4,
                                                    num_capsule=8,
                                                    num_atoms=16,
                                                    upsamp_type='deconv',
                                                    scaling=2,
                                                    padding='same',
                                                    routings=3,
                                                    name='deconv_cap_1')(net)
            self.upcap_1 = net

            net = layers.Concatenate(axis=-2,
                                     name='skip_1')([net, encoder.conv_cap_3])

            # Layer 2 Up: Deconvolutional capsules, skip connection, convolutional capsules
            net = capsule_layers.DeconvCapsuleLayer(kernel_size=4,
                                                    num_capsule=4,
                                                    num_atoms=8,
                                                    upsamp_type='deconv',
                                                    scaling=2,
                                                    padding='same',
                                                    routings=3,
                                                    name='deconv_cap_2')(net)
            self.upcap_2 = net

            net = layers.Concatenate(axis=-2,
                                     name='skip_2')([net, encoder.conv_cap_2])

            # Layer 3 Up: Deconvolutional capsules, skip connection
            net = capsule_layers.DeconvCapsuleLayer(kernel_size=4,
                                                    num_capsule=2,
                                                    num_atoms=8,
                                                    upsamp_type='deconv',
                                                    scaling=2,
                                                    padding='same',
                                                    routings=3,
                                                    name='deconv_cap_3')(net)
            self.upcap_3 = net

            net = layers.Concatenate(
                axis=-2, name='skip_3')([net, encoder.primary_caps])

            # Layer 4 Up: Deconvolutional capsules, skip connection
            net = capsule_layers.DeconvCapsuleLayer(kernel_size=4,
                                                    num_capsule=1,
                                                    num_atoms=16,
                                                    upsamp_type='deconv',
                                                    scaling=2,
                                                    padding='same',
                                                    routings=3,
                                                    name='deconv_cap_4')(net)
            self.upcap_4 = net

            # Reconstruction - Reshape, skip connection + 3x conventional Conv2D layers
            _, H, W, C, D = net.get_shape()

            net = layers.Reshape((H.value, W.value, D.value))(net)
            net = layers.Concatenate(axis=-1,
                                     name='skip_4')([net, encoder.conv1])

            net = layers.Conv2D(filters=64,
                                kernel_size=1,
                                padding='same',
                                kernel_initializer='he_normal',
                                activation='relu',
                                name='recon_1')(net)

            net = layers.Conv2D(filters=128,
                                kernel_size=1,
                                padding='same',
                                kernel_initializer='he_normal',
                                activation='relu',
                                name='recon_2')(net)

            if tf.rank(encoder.input_tensor) == 3:
                self.out_depth = 1
            else:
                self.out_depth = encoder.input_tensor.shape[3].value

            net = layers.Conv2D(filters=self.out_depth,
                                kernel_size=1,
                                padding='same',
                                kernel_initializer='he_normal',
                                activation='sigmoid',
                                name='out_recon')(net)

            self.output = net
Beispiel #30
0
def CapsNetR3(input_shape, n_class=2):
    x = layers.Input(shape=input_shape)

    # Layer 1: Just a conventional Conv2D layer
    conv1 = layers.Conv2D(filters=16,
                          kernel_size=5,
                          strides=1,
                          padding='same',
                          activation='relu',
                          name='conv1')(x)

    # Reshape layer to be 1 capsule x [filters] atoms
    _, H, W, C = conv1.get_shape()
    conv1_reshaped = layers.Reshape((H.value, W.value, 1, C.value))(conv1)
    # conv1_reshaped = layers.Reshape((H, W, 1, C))(conv1)

    # Layer 1: Primary Capsule: Conv cap with routing 1
    primary_caps = ConvCapsuleLayer(kernel_size=5,
                                    num_capsule=2,
                                    num_atoms=16,
                                    strides=2,
                                    padding='same',
                                    routings=1,
                                    name='primarycaps')(conv1_reshaped)

    # Layer 2: Convolutional Capsule
    conv_cap_2_1 = ConvCapsuleLayer(kernel_size=5,
                                    num_capsule=4,
                                    num_atoms=16,
                                    strides=1,
                                    padding='same',
                                    routings=3,
                                    name='conv_cap_2_1')(primary_caps)

    # Layer 2: Convolutional Capsule
    conv_cap_2_2 = ConvCapsuleLayer(kernel_size=5,
                                    num_capsule=4,
                                    num_atoms=32,
                                    strides=2,
                                    padding='same',
                                    routings=3,
                                    name='conv_cap_2_2')(conv_cap_2_1)

    # Layer 3: Convolutional Capsule
    conv_cap_3_1 = ConvCapsuleLayer(kernel_size=5,
                                    num_capsule=8,
                                    num_atoms=32,
                                    strides=1,
                                    padding='same',
                                    routings=3,
                                    name='conv_cap_3_1')(conv_cap_2_2)

    # Layer 3: Convolutional Capsule
    conv_cap_3_2 = ConvCapsuleLayer(kernel_size=5,
                                    num_capsule=8,
                                    num_atoms=64,
                                    strides=2,
                                    padding='same',
                                    routings=3,
                                    name='conv_cap_3_2')(conv_cap_3_1)

    # Layer 4: Convolutional Capsule
    conv_cap_4_1 = ConvCapsuleLayer(kernel_size=5,
                                    num_capsule=8,
                                    num_atoms=32,
                                    strides=1,
                                    padding='same',
                                    routings=3,
                                    name='conv_cap_4_1')(conv_cap_3_2)

    # Layer 1 Up: Deconvolutional Capsule
    deconv_cap_1_1 = DeconvCapsuleLayer(kernel_size=4,
                                        num_capsule=8,
                                        num_atoms=32,
                                        upsamp_type='deconv',
                                        scaling=2,
                                        padding='same',
                                        routings=3,
                                        name='deconv_cap_1_1')(conv_cap_4_1)

    # Skip connection
    up_1 = layers.Concatenate(axis=-2,
                              name='up_1')([deconv_cap_1_1, conv_cap_3_1])

    # Layer 1 Up: Deconvolutional Capsule
    deconv_cap_1_2 = ConvCapsuleLayer(kernel_size=5,
                                      num_capsule=4,
                                      num_atoms=32,
                                      strides=1,
                                      padding='same',
                                      routings=3,
                                      name='deconv_cap_1_2')(up_1)

    # Layer 2 Up: Deconvolutional Capsule
    deconv_cap_2_1 = DeconvCapsuleLayer(kernel_size=4,
                                        num_capsule=4,
                                        num_atoms=16,
                                        upsamp_type='deconv',
                                        scaling=2,
                                        padding='same',
                                        routings=3,
                                        name='deconv_cap_2_1')(deconv_cap_1_2)

    # Skip connection
    up_2 = layers.Concatenate(axis=-2,
                              name='up_2')([deconv_cap_2_1, conv_cap_2_1])

    # Layer 2 Up: Deconvolutional Capsule
    deconv_cap_2_2 = ConvCapsuleLayer(kernel_size=5,
                                      num_capsule=4,
                                      num_atoms=16,
                                      strides=1,
                                      padding='same',
                                      routings=3,
                                      name='deconv_cap_2_2')(up_2)

    # Layer 3 Up: Deconvolutional Capsule
    deconv_cap_3_1 = DeconvCapsuleLayer(kernel_size=4,
                                        num_capsule=2,
                                        num_atoms=16,
                                        upsamp_type='deconv',
                                        scaling=2,
                                        padding='same',
                                        routings=3,
                                        name='deconv_cap_3_1')(deconv_cap_2_2)

    # Skip connection
    up_3 = layers.Concatenate(axis=-2,
                              name='up_3')([deconv_cap_3_1, conv1_reshaped])

    # Layer 4: Convolutional Capsule: 1x1
    reshape = ConvCapsuleLayer(kernel_size=1,
                               num_capsule=n_class,
                               num_atoms=16,
                               strides=1,
                               padding='same',
                               routings=3,
                               name='seg_caps')(up_3)

    # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape.
    # out_seg16 = Length(num_classes=n_class, seg=True, name='out_seg')(reshape)
    # out_seg = K.permute_dimensions(seg_caps, (0, 1, 2, 4, 3))
    # out_seg = K.squeeze(out_seg, axis=4)
    # out_seg = layers.Conv2D(filters=n_class, kernel_size=1, padding='same', activation='softmax')(out_seg16)
    out_seg = caps_length(reshape, axis=2)

    # Decoder network.
    _, H, W, C = out_seg.get_shape()
    y = layers.Input(shape=input_shape[:-1] + (6, ))
    masked_by_y = Mask()(
        [out_seg, y]
    )  # The true label is used to mask the output of capsule layer. For training
    masked = Mask()(
        out_seg)  # Mask using the capsule with maximal length. For prediction

    def shared_decoder(mask_layer):
        recon_remove_dim = layers.Reshape(
            (H.value, W.value, C.value))(mask_layer)

        recon_1 = layers.Conv2D(filters=64,
                                kernel_size=1,
                                padding='same',
                                kernel_initializer='he_normal',
                                activation='relu',
                                name='recon_1')(recon_remove_dim)

        recon_2 = layers.Conv2D(filters=128,
                                kernel_size=1,
                                padding='same',
                                kernel_initializer='he_normal',
                                activation='relu',
                                name='recon_2')(recon_1)

        out_recon = layers.Conv2D(filters=1,
                                  kernel_size=1,
                                  padding='same',
                                  kernel_initializer='he_normal',
                                  activation='sigmoid',
                                  name='out_recon')(recon_2)

        return out_recon

    # Models for training and evaluation (prediction)
    train_model = models.Model(inputs=[x, y],
                               outputs=[out_seg,
                                        shared_decoder(masked_by_y)])
    # train_model = models.Model(inputs=x, outputs=out_seg)
    eval_model = models.Model(inputs=x,
                              outputs=[out_seg,
                                       shared_decoder(masked)])

    # manipulate model
    noise = layers.Input(shape=((H.value, W.value, C.value)))
    noised_seg_caps = layers.Add()([out_seg, noise])
    masked_noised_y = Mask()([noised_seg_caps, y])
    manipulate_model = models.Model(inputs=[x, y, noise],
                                    outputs=shared_decoder(masked_noised_y))

    return train_model, eval_model, manipulate_model