def f(x):
     main = convolutional_block(filter_nr, kernel_size, use_batch_norm, use_prelu, dropout, dropout_mode,
                                kernel_reg_l2, bias_reg_l2, batch_norm_first)(x)
     x = add([main, x])
     main = convolutional_block(filter_nr, kernel_size, use_batch_norm, use_prelu, dropout, dropout_mode,
                                kernel_reg_l2, bias_reg_l2, batch_norm_first)(x)
     x = add([main, x])
     if not last_block:
         x = MaxPooling1D(pool_size=3, strides=2)(x)
     return x
Ejemplo n.º 2
0
def _shortcut(input, residual):
    """Adds a shortcut between input and residual block and merges them with "sum"
    """
    # Expand channels of shortcut to match residual.
    # Stride appropriately to match residual (width, height)
    # Should be int if network architecture is correctly configured.
    input_shape = K.int_shape(input)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]

    shortcut = input
    
    # if shape is different. 
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        if SHORTCUT_OPTION == 'B':
            # 1x1 convolution to match dimension
            shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
                              kernel_size=(1, 1),
                              strides=(stride_width, stride_height),
                              padding="valid",
                              kernel_initializer="he_normal",
                              kernel_regularizer=l2(0.0001))(input)
        elif SHORTCUT_OPTION == 'A':
            # spatial pooling with padded identity mapping
            x = AveragePooling2D(pool_size=(1, 1),
                                 strides=(stride_width, stride_height))(input)
            # multiply every element of x by 0 to get zero matrix
            mul_zero = Lambda(lambda val: val * 0.0,
                              output_shape=K.int_shape(x)[1:])(x)

            shortcut = concatenate([x, mul_zero], axis=CHANNEL_AXIS)

    return add([shortcut, residual])
Ejemplo n.º 3
0
def _shortcut(input, residual, weight_decay=.0001, dropout=.0, identity=True, 
              strides=(1, 1), with_bn=False, org=False):
    # Expand channels of shortcut to match residual.
    # Stride appropriately to match residual (width, height)
    # Should be int if network architecture is correctly configured.
    # !!! The dropout argument is just a place holder. 
    # !!! It shall not be applied to identity mapping.
    # stride_width = input._keras_shape[ROW_AXIS] // residual._keras_shape[ROW_AXIS]
    # stride_height = input._keras_shape[COL_AXIS] // residual._keras_shape[COL_AXIS]
    # equal_channels = residual._keras_shape[CHANNEL_AXIS] == input._keras_shape[CHANNEL_AXIS]

    shortcut = input
    # 1 X 1 conv if shape is different. Else identity.
    # if stride_width > 1 or stride_height > 1 or not equal_channels:
    if not identity:
        shortcut = Conv2D(filters=residual._keras_shape[CHANNEL_AXIS],
                          kernel_size=(1, 1), strides=strides,
                          kernel_initializer="he_normal", padding="valid", 
                          kernel_regularizer=l2(weight_decay))(input)
        if with_bn:
            shortcut = BatchNormalization(axis=CHANNEL_AXIS)(shortcut)

    addition = add([shortcut, residual])
    if not org:
        return addition
    else:
        relu = Activation("relu")(addition)
        return Dropout(dropout)(relu)
def ___conv4_block(input, k=1, dropout=0.0):
    init = input

    channel_axis = 1 if K.image_dim_ordering() == "th" else -1

    # Check if input number of filters is same as 64 * k, else create
    # convolution2d for this input
    if K.image_dim_ordering() == "th":
        if init._keras_shape[1] != 64 * k:
            init = Convolution2D(64 * k, (1, 1), activation='linear',
                                 padding='same')(init)
    else:
        if init._keras_shape[-1] != 64 * k:
            init = Convolution2D(64 * k, (1, 1), activation='linear',
                                 padding='same')(init)

    x = Convolution2D(64 * k, (3, 3), padding='same')(input)
    x = BatchNormalization(axis=channel_axis)(x)
    x = Activation('relu')(x)

    if dropout > 0.0:
        x = Dropout(dropout)(x)

    x = Convolution2D(64 * k, (3, 3), padding='same')(x)
    x = BatchNormalization(axis=channel_axis)(x)
    x = Activation('relu')(x)

    m = add([init, x])
    return m
Ejemplo n.º 5
0
    def identity_block(self, input_tensor, filters, stage, block):
        '''The identity_block is the block that has no conv layer at shortcut

        # Arguments
            input_tensor: input tensor
            filters: list of integers, the nb_filters of 3 conv layer at main path
            stage: integer, current stage label, used for generating layer names
            block: 'a','b'..., current block label, used for generating layer names
        '''
        nb_filter1, nb_filter2 = filters
        bn_axis = 3
        conv_name_base = 'res' + str(stage) + block + '_branch'
        bn_name_base = 'bn' + str(stage) + block + '_branch'

        x = input_tensor
        x = Conv2D(nb_filter1, (self.kernel_width, self.kernel_height),
                          padding='same', name=conv_name_base + 'a')(x)
        x = BatchNormalization(axis=bn_axis, name=bn_name_base + 'a')(x)
        x = Activation('relu')(x)

        x = Conv2D(nb_filter2, (self.kernel_width, self.kernel_height),
                          padding='same', name=conv_name_base + 'b')(x)
        x = BatchNormalization(axis=bn_axis, name=bn_name_base + 'b')(x)
        x = Activation('relu')(x)

        x = add([x, input_tensor])
        x = Activation('relu')(x)
        return x
Ejemplo n.º 6
0
 def f(x, y):
     def scaling(xx, ss=1):
         return xx * ss
     scaled = Lambda(scaling, arguments={'ss': scale},
                     name='scale_{}'.format(block_name))(x)
     score = Conv2D(filters=classes, kernel_size=(1, 1),
                    activation='linear',
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(weight_decay),
                    name='score_{}'.format(block_name))(scaled)
     if y is None:
         upscore = Conv2DTranspose(filters=classes, kernel_size=kernel_size,
                                   strides=strides, padding='valid',
                                   kernel_initializer='he_normal',
                                   kernel_regularizer=l2(weight_decay),
                                   use_bias=False,
                                   name='upscore_{}'.format(block_name))(score)
     else:
         crop = CroppingLike2D(target_shape=K.int_shape(y),
                               offset=crop_offset,
                               name='crop_{}'.format(block_name))(score)
         merge = add([y, crop])
         upscore = Conv2DTranspose(filters=classes, kernel_size=kernel_size,
                                   strides=strides, padding='valid',
                                   kernel_initializer='he_normal',
                                   kernel_regularizer=l2(weight_decay),
                                   use_bias=False,
                                   name='upscore_{}'.format(block_name))(merge)
     return upscore
Ejemplo n.º 7
0
def _conv_block(inp, convs, skip=False, train=False):
    x = inp
    count = 0
    
    for conv in convs:
        if count == (len(convs) - 2) and skip:
            skip_connection = x
        count += 1
        
        if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top
        if 'train' in conv:
            trainflag=conv['train']#update the value for the key
        else:
            trainflag=train
        x = Conv2D(conv['filter'], 
                   conv['kernel'], 
                   strides=conv['stride'], 
                   padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
                   name='conv2d_' + str(conv['layer_idx']), 
                   use_bias=False if conv['bnorm'] else True, trainable=trainflag)(x)
        #if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='batch_normalization' + str(conv['layer_idx']),trainable=trainflag)(x)
        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, trainable=trainflag)(x)
        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']),trainable=trainflag)(x)

    return add([skip_connection, x]) if skip else x
def dpcnn(embedding_matrix, embedding_size, trainable_embedding, maxlen, max_features,
          filter_nr, kernel_size, repeat_block, dense_size, repeat_dense, output_size, output_activation,
          max_pooling, mean_pooling, weighted_average_attention, concat_mode,
          dropout_embedding, conv_dropout, dense_dropout, dropout_mode,
          conv_kernel_reg_l2, conv_bias_reg_l2,
          dense_kernel_reg_l2, dense_bias_reg_l2,
          use_prelu, use_batch_norm, batch_norm_first):
    """
    Note:
        Implementation of http://ai.tencent.com/ailab/media/publications/ACL3-Brady.pdf
        post activation is used instead of pre-activation, could be worth exploring
    """

    input_text = Input(shape=(maxlen,))
    if embedding_matrix is not None:
        embedding = Embedding(max_features, embedding_size,
                              weights=[embedding_matrix], trainable=trainable_embedding)(input_text)
    else:
        embedding = Embedding(max_features, embedding_size)(input_text)

    embedding = dropout_block(dropout_embedding, dropout_mode)(embedding)

    x = convolutional_block(filter_nr, kernel_size, use_batch_norm, use_prelu, conv_dropout, dropout_mode,
                            conv_kernel_reg_l2, conv_bias_reg_l2, batch_norm_first)(embedding)
    x = convolutional_block(filter_nr, kernel_size, conv_bias_reg_l2, use_prelu, conv_dropout, dropout_mode,
                            conv_kernel_reg_l2, conv_bias_reg_l2, batch_norm_first)(x)
    if embedding_size == filter_nr:
        x = add([embedding, x])
    else:
        embedding_resized = shape_matching_layer(filter_nr, use_prelu, conv_kernel_reg_l2, conv_bias_reg_l2)(embedding)
        x = add([embedding_resized, x])
    for _ in range(repeat_block):
        x = dpcnn_block(filter_nr, kernel_size, use_batch_norm, use_prelu, conv_dropout, dropout_mode,
                        conv_kernel_reg_l2, conv_bias_reg_l2, batch_norm_first)(x)

    predictions = classification_block(dense_size=dense_size, repeat_dense=repeat_dense,
                                       output_size=output_size, output_activation=output_activation,
                                       max_pooling=max_pooling,
                                       mean_pooling=mean_pooling,
                                       weighted_average_attention=weighted_average_attention,
                                       concat_mode=concat_mode,
                                       dropout=dense_dropout,
                                       kernel_reg_l2=dense_kernel_reg_l2, bias_reg_l2=dense_bias_reg_l2,
                                       use_prelu=use_prelu, use_batch_norm=use_batch_norm,
                                       batch_norm_first=batch_norm_first)(x)
    model = Model(inputs=input_text, outputs=predictions)
    return model
def residual_block(input_tensor, nb_filters, filter_sz, stage,
                   kernel_initializer='he_uniform', l2reg=0.0,
                   use_shortcuts=True):
    """Create a ResNet pre-activation bottleneck layer."""
    nb_in_filters, nb_bottleneck_filters = nb_filters

    bn_name = 'bn' + str(stage)
    conv_name = 'conv' + str(stage)
    relu_name = 'relu' + str(stage)
    merge_name = 'add' + str(stage)

    # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1
    # conv
    if stage > 1:  # first activation is just after conv1
        x = BatchNormalization(axis=1, name=bn_name + 'a')(input_tensor)
        x = Activation('relu', name=relu_name + 'a')(x)
    else:
        x = input_tensor

    x = Convolution2D(nb_bottleneck_filters, (1, 1),
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=l2(l2reg),
                      use_bias=False,
                      name=conv_name + 'a')(x)

    # batchnorm-relu-conv, from nb_bottleneck_filters to nb_bottleneck_filters
    # via FxF conv
    x = BatchNormalization(axis=1, name=bn_name + 'b')(x)
    x = Activation('relu', name=relu_name + 'b')(x)
    x = Convolution2D(nb_bottleneck_filters, (filter_sz, filter_sz),
                      padding='same',
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=l2(l2reg),
                      use_bias=False,
                      name=conv_name + 'b')(x)

    # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1
    # conv
    x = BatchNormalization(axis=1, name=bn_name + 'c')(x)
    x = Activation('relu', name=relu_name + 'c')(x)
    x = Convolution2D(nb_in_filters, (1, 1),
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=l2(l2reg),
                      name=conv_name + 'c')(x)

    # merge
    if use_shortcuts:
        x = add([x, input_tensor], name=merge_name)

    return x
Ejemplo n.º 10
0
 def f(x, y):
     score = Conv2D(filters=classes, kernel_size=(1, 1),
                    activation='linear',
                    padding='valid',
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(weight_decay),
                    name='score_{}'.format(block_name))(x)
     if y is not None:
         def scaling(xx, ss=1):
             return xx * ss
         scaled = Lambda(scaling, arguments={'ss': scale},
                         name='scale_{}'.format(block_name))(score)
         score = add([y, scaled])
     upscore = BilinearUpSampling2D(
         target_shape=target_shape,
         name='upscore_{}'.format(block_name))(score)
     return upscore
Ejemplo n.º 11
0
def _conv_block(inp, convs, do_skip=True):
    x = inp
    count = 0
    
    for conv in convs:
        if count == (len(convs) - 2) and do_skip:
            skip_connection = x
        count += 1
        
        if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # unlike tensorflow darknet prefer left and top paddings
        x = Conv2D(conv['filter'], 
                   conv['kernel'], 
                   strides=conv['stride'], 
                   padding='valid' if conv['stride'] > 1 else 'same', # unlike tensorflow darknet prefer left and top paddings
                   name='conv_' + str(conv['layer_idx']), 
                   use_bias=False if conv['bnorm'] else True)(x)
        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)

    return add([skip_connection, x]) if do_skip else x        
Ejemplo n.º 12
0
def _shortcut(input, residual):
    """Adds a shortcut between input and residual block and merges them with "sum"
    """
    input_shape = K.int_shape(input)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]

    shortcut = input
    # 1 X 1 conv if shape is different. Else identity.
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
                          kernel_size=(1, 1),
                          strides=(stride_width, stride_height),
                          padding="valid",
                          kernel_initializer="he_normal",
                          kernel_regularizer=l2(0.0001))(input)

    return add([shortcut, residual])
Ejemplo n.º 13
0
def _conv_block(inp, convs, skip=True):
    x = inp
    count = 0
    
    for conv in convs:
        if count == (len(convs) - 2) and skip:
            skip_connection = x
        count += 1
        
        if conv['kernel'] > 1: x = ZeroPadding2D(1)(x)
        x = Conv2D(conv['filter'], 
                   conv['kernel'], 
                   strides=conv['stride'], 
                   padding='valid', 
                   name='conv_' + str(conv['layer_idx']), 
                   use_bias=False if conv['bnorm'] else True)(x)
        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)

    return add([skip_connection, x]) if skip else x
Ejemplo n.º 14
0
def _shortcut(input, residual):
    """Adds a shortcut between input and residual block and merges them with "sum"
    """
    # Expand channels of shortcut to match residual.
    # Stride appropriately to match residual (width, height)
    # Should be int if network architecture is correctly configured.
    input_shape = K.int_shape(input)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]

    shortcut = input
    # 1 X 1 conv if shape is different. Else identity.
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
                          kernel_size=(1, 1),
                          strides=(stride_width, stride_height),
                          padding="valid",
                          kernel_initializer="he_normal",
                          kernel_regularizer=l2(0.0001))(input)

    return add([shortcut, residual])
Ejemplo n.º 15
0
def word_model():
    img_w = word_cfg['img_w']
    img_h = word_cfg['img_h']
    max_text_len = word_cfg['max_text_len']
    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

    # Make Networkw
    input_data = Input(name='the_input', shape=input_shape,
                       dtype='float32')  # (None, 128, 64, 1)

    # Convolution layer (VGG)
    inner = Conv2D(64, (3, 3),
                   padding='same',
                   name='conv1',
                   kernel_initializer='he_normal')(
                       input_data)  # (None, 128, 64, 64)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(2, 2),
                         name='max1')(inner)  # (None,64, 32, 64)

    inner = Conv2D(128, (3, 3),
                   padding='same',
                   name='conv2',
                   kernel_initializer='he_normal')(
                       inner)  # (None, 64, 32, 128)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(2, 2),
                         name='max2')(inner)  # (None, 32, 16, 128)

    inner = Conv2D(256, (3, 3),
                   padding='same',
                   name='conv3',
                   kernel_initializer='he_normal')(
                       inner)  # (None, 32, 16, 256)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = Conv2D(256, (3, 3),
                   padding='same',
                   name='conv4',
                   kernel_initializer='he_normal')(
                       inner)  # (None, 32, 16, 256)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2),
                         name='max3')(inner)  # (None, 32, 8, 256)

    inner = Conv2D(512, (3, 3),
                   padding='same',
                   name='conv5',
                   kernel_initializer='he_normal')(inner)  # (None, 32, 8, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = Conv2D(512, (3, 3), padding='same',
                   name='conv6')(inner)  # (None, 32, 8, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2),
                         name='max4')(inner)  # (None, 32, 4, 512)

    inner = Conv2D(512, (2, 2),
                   padding='same',
                   kernel_initializer='he_normal',
                   name='con7')(inner)  # (None, 32, 4, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)

    # CNN to RNN
    inner = Reshape(target_shape=((32, 2048)),
                    name='reshape')(inner)  # (None, 32, 2048)
    inner = Dense(64,
                  activation='relu',
                  kernel_initializer='he_normal',
                  name='dense1')(inner)  # (None, 32, 64)

    # RNN layer
    gru_1 = GRU(256,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru1')(inner)  # (None, 32, 512)
    gru_1b = GRU(256,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    reversed_gru_1b = Lambda(
        lambda inputTensor: K.reverse(inputTensor, axes=1))(gru_1b)

    gru1_merged = add([gru_1, reversed_gru_1b])  # (None, 32, 512)
    gru1_merged = BatchNormalization()(gru1_merged)

    gru_2 = GRU(256,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(256,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru2_b')(gru1_merged)
    reversed_gru_2b = Lambda(
        lambda inputTensor: K.reverse(inputTensor, axes=1))(gru_2b)

    gru2_merged = concatenate([gru_2, reversed_gru_2b])  # (None, 32, 1024)
    gru2_merged = BatchNormalization()(gru2_merged)

    # transforms RNN output to character activations:
    inner = Dense(num_classes, kernel_initializer='he_normal',
                  name='dense2')(gru2_merged)  #(None, 32, 80)
    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels', shape=[max_text_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')

    # loss function
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length, label_length])

    model = Model(inputs=[input_data, labels, input_length, label_length],
                  outputs=loss_out)

    model_predict = Model(inputs=input_data, outputs=y_pred)
    model_predict.summary()

    return model, model_predict
Ejemplo n.º 16
0
    def RCL_block(self,
                  l,
                  activation_function=LeakyReLU(),
                  features=32,
                  kernel_size=3,
                  name="RCL"):
        """Build recurrent ConvLayer. See https://doi.org/10.1109/CVPR.2015.7298958 (i.e. Figure 3)

        Parameters
        ----------
        l: Keras Layer (Tensor?)
            Previous layer of the neural network.
        activation_function: Keras Activation Function
            Activation function (standard: LeakyReLU()).
        features: int
            Number of extracted features.
        kernel_size: int
            Size of Convolution Kernel.
        name: string
            Name of the recurrent ConvLayer (standard: 'RCL').

        :param l: Keras Layer (Tensor?)
            Previous layer of the neural network.
        :param activation_function: Keras Activation Function
            Activation function (standard: LeakyReLU()).
        :param features: int
            Number of extracted features.
        :param kernel_size: int
            Size of Convolution Kernel.
        :param name: string
            Name of the recurrent ConvLayer (standard: 'RCL').

        Returns
        -------
        stack15: keras layer stack
            Recurrent ConvLayer as Keras Layer Stack

        :return: stack15: keras layer stack
            Recurrent ConvLayer as Keras Layer Stack
        """
        conv1 = Conv1D(features, kernel_size, border_mode='same', name=name)
        stack1 = conv1(l)
        stack2 = activation_function(stack1)
        stack3 = BatchNormalization()(stack2)

        # UNROLLED RECURRENT BLOCK(s)
        conv2 = Conv1D(features,
                       kernel_size,
                       border_mode='same',
                       init='he_normal')
        stack4 = conv2(stack3)
        stack5 = add([stack1, stack4])
        stack6 = activation_function(stack5)
        stack7 = BatchNormalization()(stack6)

        conv3 = Convolution1D_tied(features,
                                   kernel_size,
                                   border_mode='same',
                                   tied_to=conv2)
        stack8 = conv3(stack7)
        stack9 = add([stack1, stack8])
        stack10 = activation_function(stack9)
        stack11 = BatchNormalization()(stack10)

        conv4 = Convolution1D_tied(features,
                                   kernel_size,
                                   border_mode='same',
                                   tied_to=conv2)
        stack12 = conv4(stack11)
        stack13 = add([stack1, stack12])
        stack14 = activation_function(stack13)
        stack15 = BatchNormalization()(stack14)

        return stack15
Ejemplo n.º 17
0
def __bottleneck_block(input,
                       filters=64,
                       cardinality=8,
                       strides=1,
                       weight_decay=5e-4):
    ''' Adds a bottleneck block
    Args:
        input: input tensor
        filters: number of output filters
        cardinality: cardinality factor described number of
            grouped convolutions
        strides: performs strided convolution for downsampling if > 1
        weight_decay: weight decay factor
    Returns: a keras tensor
    '''
    init = input

    grouped_channels = int(filters / cardinality)
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    # Check if input number of filters is same as 16 * k, else create convolution2d for this input
    if K.image_data_format() == 'channels_first':
        if init._keras_shape[1] != 2 * filters:

            init = Conv1D(filters * 2,
                          1,
                          padding='same',
                          strides=strides,
                          use_bias=False,
                          kernel_initializer='he_normal',
                          kernel_regularizer=l2(weight_decay))(init)

            init = BatchNormalization(axis=channel_axis)(init)
    else:
        if init._keras_shape[-1] != 2 * filters:

            init = Conv1D(filters * 2,
                          1,
                          padding='same',
                          strides=strides,
                          use_bias=False,
                          kernel_initializer='he_normal',
                          kernel_regularizer=l2(weight_decay))(init)

            init = BatchNormalization(axis=channel_axis)(init)


#    x = Conv2D(filters, (1, 1), padding='same', use_bias=False,
#               kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(input)

#INDENT
    x = Conv1D(filters,
               1,
               padding='same',
               use_bias=False,
               kernel_initializer='he_normal',
               kernel_regularizer=l2(weight_decay))(input)

    x = BatchNormalization(axis=channel_axis)(x)
    x = LeakyReLU()(x)

    x = __grouped_convolution_block(x, grouped_channels, cardinality, strides,
                                    weight_decay)

    x = Conv1D(filters * 2,
               1,
               padding='same',
               use_bias=False,
               kernel_initializer='he_normal',
               kernel_regularizer=l2(weight_decay))(x)

    x = BatchNormalization(axis=channel_axis)(x)

    x = add([init, x])
    x = LeakyReLU()(x)

    return x
def resnet8(img_width, img_height, img_channels, output_dim):
    """
    Define model architecture.

    # Arguments
       img_width: Target image widht.
       img_height: Target image height.
       img_channels: Target image channels.
       output_dim: Dimension of model output.

    # Returns
       model: A Model instance.
    """

    # Input
    img_input = Input(shape=(img_height, img_width, img_channels))

    x1 = Conv2D(32, (5, 5), strides=[2, 2], padding='same')(img_input)
    x1 = MaxPooling2D(pool_size=(3, 3), strides=[2, 2])(x1)

    # First residual block
    x2 = keras.layers.normalization.BatchNormalization()(x1)
    x2 = Activation('relu')(x2)
    x2 = Conv2D(32, (3, 3),
                strides=[2, 2],
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x2)

    x2 = keras.layers.normalization.BatchNormalization()(x2)
    x2 = Activation('relu')(x2)
    x2 = Conv2D(32, (3, 3),
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x2)

    x1 = Conv2D(32, (1, 1), strides=[2, 2], padding='same')(x1)
    x3 = add([x1, x2])

    # Second residual block
    x4 = keras.layers.normalization.BatchNormalization()(x3)
    x4 = Activation('relu')(x4)
    x4 = Conv2D(64, (3, 3),
                strides=[2, 2],
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x4)

    x4 = keras.layers.normalization.BatchNormalization()(x4)
    x4 = Activation('relu')(x4)
    x4 = Conv2D(64, (3, 3),
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x4)

    x3 = Conv2D(64, (1, 1), strides=[2, 2], padding='same')(x3)
    x5 = add([x3, x4])

    # Third residual block
    x6 = keras.layers.normalization.BatchNormalization()(x5)
    x6 = Activation('relu')(x6)
    x6 = Conv2D(128, (3, 3),
                strides=[2, 2],
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x6)

    x6 = keras.layers.normalization.BatchNormalization()(x6)
    x6 = Activation('relu')(x6)
    x6 = Conv2D(128, (3, 3),
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x6)

    x5 = Conv2D(128, (1, 1), strides=[2, 2], padding='same')(x5)
    x7 = add([x5, x6])

    x = Flatten()(x7)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)

    # Steering channel
    steer = Dense(output_dim)(x)

    # Collision channel
    coll = Dense(output_dim)(x)
    coll = Activation('sigmoid')(coll)

    # Define steering-collision model
    model = Model(inputs=[img_input], outputs=[steer, coll])
    print(model.summary())

    return model
Ejemplo n.º 19
0
               kernel_initializer='he_normal',
               activation='sigmoid')(Conv4)

########### Layer 6 ############
Conv6 = Conv2D(filters=32,
               kernel_size=[3, 3],
               strides=[1, 1],
               padding='same',
               kernel_initializer='he_normal',
               activation='sigmoid')(Conv5)

########### Layer 7 ############
Conv7 = add([
    Conv2DTranspose(filters=16,
                    kernel_size=[2, 2],
                    strides=[2, 2],
                    padding='same',
                    kernel_initializer='he_normal',
                    activation='sigmoid')(Conv6), Conv3
])

########### Layer 8 ############
Conv8 = Conv2D(filters=8,
               kernel_size=[3, 3],
               strides=[1, 1],
               padding='same',
               kernel_initializer='he_normal',
               activation='sigmoid')(Conv7)

########### Layer 9 ############
Conv9 = add([
    Conv2DTranspose(filters=8,
                                  return_sequences=True,
                                  name='text_BiLSTM'))

# Text inputs
text_input = Input(shape=(None, max_char_seq), dtype='float32', 
                   name='all_tokens') # [n_samp, n_word_seq, n_char_seq]
encoded_words = TimeDistributed(word_encoder, 
                                name='encoded_joint_text')(text_input) # [n_samp, n_word_seq, n_hidden]

# Field indicator embedding
field_input = Input(shape=(None, len(text_fields)), dtype='float32', 
                    name='field_indicators')
encoded_fields = TimeDistributed(Dense(encoded_words.shape[-1].value), 
                                 name='field_indicator_embedding')(field_input)

encoded_word_fields = add([encoded_words, encoded_fields])

# mask the blank inputs to the LSTM
mask = mask_from_embedded_seq(text_input)
masked_combined_words = masked_seq(encoded_word_fields, mask)
field_embedding = text_encoder(masked_combined_words) # [n_samp, n_seq, n_hidden]
# mask the LSTM outputs corresponding to the blank inputs
masked_field_embedding = masked_seq(field_embedding, mask)

# NAICS input
naics_input = Input(shape=(60,), dtype='float32', name='naics')
naics_embedding = naics_res_encoder(input_layer=naics_input, n_layers=3)

# Job category
job_category_input = Input(shape=(12,), dtype='float32', name='job_category')
Ejemplo n.º 21
0
def regression_net(input_tensor=None, trainable=False):
    img_input = input_tensor
    #??
    #conv_1
    conv1_1 = Convolution2D(32, (5, 5),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv1_1')(img_input)
    pool1 = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(conv1_1)

    #conv_2
    conv2_1 = Convolution2D(64, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv2_1')(pool1)
    conv2_2 = Convolution2D(64, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(conv2_2)

    #conv_3
    conv3_1 = Convolution2D(128, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv3_1')(pool2)
    conv3_2 = Convolution2D(128, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv3_2')(conv3_1)
    pool3 = MaxPooling2D((2, 2), strides=(2, 2), name='pool3')(conv3_2)
    pool3_for_fuse = Convolution2D(128, (1, 1),
                                   strides=(1, 1),
                                   padding='same',
                                   activation='relu',
                                   name='pool3_for_fuse')(pool3)

    #conv_4
    conv4_1 = Convolution2D(256, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv4_1')(pool3)
    conv4_2 = Convolution2D(256, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv4_2')(conv4_1)
    pool4 = MaxPooling2D((2, 2), strides=(2, 2), name='pool4')(conv4_2)
    pool4_for_fuse = Convolution2D(128, (1, 1),
                                   strides=(1, 1),
                                   padding='same',
                                   activation='relu',
                                   name='pool4_for_fuse')(pool4)

    #conv_5
    conv5_1 = Convolution2D(512, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv5_1')(pool4)
    conv5_2 = Convolution2D(512, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv5_2')(conv5_1)
    pool5 = MaxPooling2D((2, 2), strides=(2, 2), name='pool5')(conv5_2)
    pool5_for_fuse = Convolution2D(128, (1, 1),
                                   strides=(1, 1),
                                   padding='same',
                                   activation='relu',
                                   name='pool5_for_fuse')(pool5)

    #conv_6
    conv6_1 = Convolution2D(512, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv6_1')(pool5)
    conv6_2 = Convolution2D(512, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv6_2')(conv6_1)
    pool6 = MaxPooling2D((2, 2), strides=(2, 2), name='pool6')(conv6_2)

    #
    conv7_1 = Convolution2D(128, (1, 1),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv7_1')(pool6)

    upscore2 = Conv2DTranspose(filters=128,
                               kernel_size=(2, 2),
                               strides=(2, 2),
                               padding='valid',
                               use_bias=False,
                               name='upscore2')(conv7_1)

    fuse_pool5 = add([upscore2, pool5_for_fuse])
    upscore4 = Conv2DTranspose(filters=128,
                               kernel_size=(2, 2),
                               strides=(2, 2),
                               padding='valid',
                               use_bias=False,
                               name='upscore4')(fuse_pool5)
    fuse_pool4 = add([upscore4, pool4_for_fuse])

    upscore8 = Conv2DTranspose(filters=128,
                               kernel_size=(2, 2),
                               strides=(2, 2),
                               padding='valid',
                               use_bias=False,
                               name='upscore8')(fuse_pool4)
    fuse_pool3 = add([upscore8, pool3_for_fuse])

    upscore16 = Conv2DTranspose(filters=128,
                                kernel_size=(2, 2),
                                strides=(2, 2),
                                padding='valid',
                                use_bias=False,
                                name='upscore16')(fuse_pool3)

    x = Convolution2D(128, (1, 1),
                      strides=(1, 1),
                      padding='same',
                      activation='relu')(upscore16)
    x = Convolution2D(8, (1, 1),
                      strides=(1, 1),
                      padding='same',
                      activation='sigmoid')(x)
    x_regr = Lambda(lambda t: 800 * t - 400)(x)
    return x_regr
Ejemplo n.º 22
0
def deepLoco(inputs):

    print("input shape:", inputs.shape)
    conv1 = Conv2D(16,
                   5,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(inputs)
    print("conv1 shape:", conv1.shape)
    conv1 = Conv2D(16,
                   5,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv1)
    print("conv1 shape:", conv1.shape)
    conv1 = Conv2D(64,
                   5,
                   activation='relu',
                   strides=2,
                   padding='same',
                   kernel_initializer='he_normal')(conv1)
    print("conv2 shape:", conv1.shape)
    # pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    # print ("pool1 shape:",pool1.shape)

    conv2 = Conv2D(64,
                   5,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv1)
    print("conv2 shape:", conv2.shape)
    conv2 = Conv2D(64,
                   5,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv2)
    print("conv2 shape:", conv2.shape)
    conv2 = Conv2D(256,
                   3,
                   activation='relu',
                   strides=2,
                   padding='same',
                   kernel_initializer='he_normal')(conv2)
    print("conv3 shape:", conv2.shape)
    # pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    # print ("pool2 shape:",pool2.shape)

    conv3 = Conv2D(256,
                   3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv2)
    print("conv3 shape:", conv3.shape)
    conv3 = Conv2D(256,
                   3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv3)
    print("conv3 shape:", conv3.shape)
    conv3 = Conv2D(256,
                   3,
                   activation='relu',
                   strides=4,
                   padding='same',
                   kernel_initializer='he_normal')(conv3)
    print("conv3 shape:", conv3.shape)
    # pool3 = MaxPooling2D(pool_size=(4, 4))(conv3)
    # print ("pool3 shape:",pool3.shape)

    flat1 = Flatten()(conv3)
    print("flat ", flat1.shape)
    dense1 = Dense(2048)(flat1)
    print("dense1 ", dense1.shape)

    reshape1 = Reshape([2048, 1])(dense1)
    print("reshape1", reshape1.shape)

    shortcut = reshape1
    # res1 = build_resnet(reshape1, basic_block, [2])
    res1 = Conv1D(1, kernel_size=3, strides=1, padding='same')(reshape1)
    print("res1 ", res1.shape)
    res1 = LeakyReLU()(res1)
    res1 = BatchNormalization()(res1)
    # res1 = add_common_layers(res1)
    add1 = add([shortcut, res1])
    print("add1 ", add1.shape)
    add1 = LeakyReLU()(add1)
    add1 = BatchNormalization()(add1)

    shortcut = add1
    res2 = Conv1D(1, kernel_size=3, strides=1, padding='same')(add1)
    res2 = LeakyReLU()(res2)
    res2 = BatchNormalization()(res2)
    # res2 = add_common_layers(res2)
    print("res2 ", res2.shape)
    add2 = add([shortcut, res2])
    print("add2 ", add2.shape)
    add2 = LeakyReLU()(add2)
    add2 = BatchNormalization()(add2)

    weights = Conv1D(1, kernel_size=3, strides=8, padding='same')(add2)
    # weights = MaxPooling1D(pool_size=8)(weights)
    weights = Activation("relu")(weights)
    print("weights ", weights.shape)
    # positions = Dense(kernel_initializer="he_normal",
    #                   activation="softmax")(add2)
    # positions = Dense(units = 16)(add2)
    # print(positions.shape)
    # positions = Reshape([2048,16,1])(positions)

    positions = Conv1D(2, kernel_size=3, strides=8, padding='same')(add2)
    # positions = MaxPooling1D(pool_size=8)(positions)
    positions = Activation("sigmoid")(positions)
    print("positions ", positions.shape)

    # return [weights, positions]
    return concatenate([weights, positions], axis=2)
def hullwhite_fnn_model(data, method, loss, exponent=6, nb_epochs=0, 
                        batch_size=16, activation='tanh', layers=4, 
                        init='he_normal', dropout=0.5, dropout_first=None, 
                        dropout_middle=None, dropout_last=None,
                        early_stop=125, lr_patience=40,
                        reduce_lr=0.5, reduce_lr_min=0.000009,
                        residual_cells=1, **kwargs):
    assert(isinstance(activation, string_types))
    if activation == "elu":
        if 'alpha' in kwargs:
            alpha = kwargs['alpha']
        else:
            alpha = 1.0
        activation = ELU(alpha)
    elif activation == "rbf":
        activation = Activation(rbf)
    else:
        activation = Activation(activation)
    
    x_train = data['x_train']
    x_valid = data['x_valid']
    x_test = data['x_test']
    y_train = data['y_train']
    y_valid = data['y_valid']
    
    if dropout_first is None:
        dropout_first = dropout
    if dropout_middle is None:
        dropout_middle = dropout_first
    if dropout_last is None:
        dropout_last = dropout_middle
        
    assert residual_cells >= 0
    
    if residual_cells == 0:
        print('Simple with no BN or residual')
    else:
        print('Residual with BN (ex Out) - Activation before Dense - with %s residual cells' % residual_cells)
    print(' - Early Stop: Patience %s; Reduce LR Patience %s, Factor: %s, Min: %s' % \
            (early_stop, lr_patience, reduce_lr, reduce_lr_min))
    print(' - Exp:%s, Layer:%s, df:%s, dm:%s, dl:%s' % \
            (exponent, layers, dropout_first, dropout_middle, dropout_last))
    print(' - Loss:%s' % loss)
    #A copy of the activation layer needs to be used, instead of the layer
    #directly because otherwise keras will not be able to load a saved configuration
    #from a json file
    act_idx = 1
    inp = Input(shape=(x_train.shape[1],))
    ly = BatchNormalization()(inp)
    ly = Dense(2**exponent, kernel_initializer=init)(ly)
    act = copy(activation)
    act.name = act.name + "_" + str(act_idx)
    act_idx = act_idx + 1
    ly = act(ly)
    ly = Dropout(dropout_first)(ly)
    if residual_cells > 0:
        for i in range(layers-1):
            middle = BatchNormalization()(ly)
            act = copy(activation)
            act.name = act.name + "_" + str(act_idx)
            act_idx = act_idx + 1
            middle = act(middle)
            middle = Dense(2**exponent, kernel_initializer=init)(middle)
            middle = Dropout(dropout_middle)(middle)
            for j in range(residual_cells-1):
                act = copy(activation)
                act.name = act.name + "_" + str(act_idx)
                act_idx = act_idx + 1
                middle = act(middle)
                middle = Dense(2**exponent, kernel_initializer=init)(middle)
                middle = Dropout(dropout_middle)(middle)
            ly = add([ly, middle])
        ly = Dropout(dropout_last)(ly)
    else:
        for i in range(layers-1):
            ly = Dense(2**exponent, kernel_initializer=init)(ly)
            act = copy(activation)
            act.name = act.name + "_" + str(act_idx)
            act_idx = act_idx + 1
            ly = act(ly)
            ly = Dropout(dropout_middle)(ly)
    ly = Dense(y_train.shape[1], kernel_initializer=init)(ly)
    nn = Model(inputs=inp, outputs=ly)
    nn.compile(method, loss=loss)
    
    if nb_epochs > 0:
        callbacks = []
        if early_stop is not None:
            earlyStopping = EarlyStopping(monitor='val_loss', patience=early_stop)
            callbacks.append(earlyStopping)
        if reduce_lr is not None:
            reduceLR = ReduceLROnPlateau(monitor='val_loss', factor=reduce_lr,
                                         patience=lr_patience, min_lr=reduce_lr_min, 
                                         verbose=1)
            callbacks.append(reduceLR)
        history2 = nn.fit(x_train, y_train, batch_size=batch_size, 
                          epochs=nb_epochs, verbose=2, callbacks=callbacks,
                          validation_data=(x_valid, y_valid))
        history = {'history': history2.history,
                   'params': history2.params}
    else:
        history = {'history': [],
                   'params': []}
    return (x_train, x_valid, x_test, nn, history)
Ejemplo n.º 24
0
def add_top_layers(model, image_size, patch_net='resnet50', block_type='resnet', 
                   depths=[512,512], repetitions=[1,1], 
                   block_fn=bottleneck_org, nb_class=2, 
                   shortcut_with_bn=True, bottleneck_enlarge_factor=4,
                   dropout=.0, weight_decay=.0001,
                   add_heatmap=False, avg_pool_size=(7,7), return_heatmap=False,
                   add_conv=True, add_shortcut=False,
                   hm_strides=(1,1), hm_pool_size=(5,5),
                   fc_init_units=64, fc_layers=2):

    def add_residual_blocks(block):
        for depth,repetition in zip(depths, repetitions):
            block = _residual_block(
                block_fn, depth, repetition,
                dropout=dropout, weight_decay=weight_decay,
                shortcut_with_bn=shortcut_with_bn,
                bottleneck_enlarge_factor=bottleneck_enlarge_factor)(block)
        pool = GlobalAveragePooling2D()(block)
        dropped = Dropout(dropout)(pool)
        return dropped

    def add_vgg_blocks(block):
        for depth,repetition in zip(depths, repetitions):
            block = _vgg_block(depth, repetition,
                               dropout=dropout, 
                               weight_decay=weight_decay)(block)
        pool = GlobalAveragePooling2D()(block)
        dropped = Dropout(dropout)(pool)
        return dropped
    
    def add_fc_layers(block):
        flattened = Flatten()(block)
        dropped = Dropout(dropout)(flattened)
        units=fc_init_units
        for i in xrange(fc_layers):
            fc = Dense(units, kernel_initializer="he_normal", 
                       kernel_regularizer=l2(weight_decay))(dropped)
            norm = BatchNormalization()(fc)
            relu = Activation('relu')(norm)
            dropped = Dropout(dropout)(relu)
            units /= 2
        return dropped, flattened

    if patch_net == 'resnet50':
        last_kept_layer = model.layers[-5]
    elif patch_net == 'yaroslav':
        last_kept_layer = model.layers[-3]
    else:
        last_kept_layer = model.layers[-4]
    block = last_kept_layer.output
    channels = 1 if patch_net == 'yaroslav' else 3
    image_input = Input(shape=(image_size[0], image_size[1], channels))
    model0 = Model(inputs=model.inputs, outputs=block)
    block = model0(image_input)
    if add_heatmap or return_heatmap:  # add softmax heatmap.
        pool1 = AveragePooling2D(pool_size=avg_pool_size, 
                                 strides=hm_strides)(block)
        if return_heatmap:
            dropped = pool1
        else:
            dropped = Dropout(dropout)(pool1)
        clf_layer = model.layers[-1]
        clf_weights = clf_layer.get_weights()
        clf_classes = clf_layer.output_shape[1]
        if return_heatmap:
            activation = activations.softmax(x, axis=CHANNEL_AXIS)
        else:
            activation = 'relu'
        heatmap_layer = Dense(clf_classes, activation=activation, 
                              kernel_regularizer=l2(weight_decay))
        heatmap = heatmap_layer(dropped)
        heatmap_layer.set_weights(clf_weights)
        if return_heatmap:
            model_heatmap = Model(inputs=image_input, outputs=heatmap)
            return model_heatmap
        block = MaxPooling2D(pool_size=hm_pool_size)(heatmap)
        top_layer_nb = 8
    else:
        top_layer_nb = 2
    if add_conv:
        if block_type == 'resnet':
            block = add_residual_blocks(block)
        elif block_type == 'vgg':
            block = add_vgg_blocks(block)
        else:
            raise Exception('Unsupported block type: ' + block_type)
    else:
        block, flattened = add_fc_layers(block)
    if add_shortcut and not add_conv:
        dense = Dense(nb_class, kernel_initializer="he_normal", 
                      kernel_regularizer=l2(weight_decay))(block)
        shortcut = Dense(nb_class, kernel_initializer="he_normal", 
                         kernel_regularizer=l2(weight_decay))(flattened)
        addition = add([dense, shortcut])
        dense = Activation('softmax')(addition)
    else:
        dense = Dense(nb_class, kernel_initializer="he_normal", 
                      activation='softmax', 
                      kernel_regularizer=l2(weight_decay))(block)
    model_addtop = Model(inputs=image_input, outputs=dense)
    # import pdb; pdb.set_trace()

    return model_addtop, top_layer_nb 
def create_model(nb_classes, input_shape, config=None):
    """Create a VGG-16 like model."""
    if len(input_shape) != 3:
        raise Exception("Input shape should be a tuple (nb_channels, nb_rows, "
                        "nb_cols) or (nb_rows, nb_cols, nb_channels), "
                        "depending on your backend.")
    if config is None:
        config = {'model': {}}

    min_feature_map_dimension = min(input_shape[:2])
    if min_feature_map_dimension < 32:
        print("ERROR: Please upsample the feature maps to have at least "
              "a size of 32 x 32. Currently, it has {}".format(input_shape))
    nb_filter = 32

    # Network definition
    # input_shape = (None, None, 3)  # for fcn
    input_ = Input(shape=input_shape)
    x = input_

    # Scale feature maps down to [63, 32] x [63, 32]
    tmp = min_feature_map_dimension / 32.
    if tmp >= 2:
        while tmp >= 2.:
            for _ in range(2):
                x = Convolution2D(nb_filter, (3, 3), padding='same',
                                  kernel_initializer='he_uniform',
                                  kernel_regularizer=l2(0.0001))(x)
                x = BatchNormalization()(x)
                x = Activation('elu')(x)
            x = MaxPooling2D(pool_size=(2, 2))(x)
            nb_filter *= 2
            tmp /= 2

    # 32x32
    x = Convolution2D(nb_filter, (3, 3), padding='same',
                      kernel_initializer='he_uniform',
                      kernel_regularizer=l2(0.0001))(x)
    x = BatchNormalization()(x)
    x = Activation('elu')(x)
    x = Convolution2D(nb_filter, (3, 3), padding='same',
                      kernel_initializer='he_uniform',
                      kernel_regularizer=l2(0.0001))(x)
    x = BatchNormalization()(x)
    x = Activation('elu')(x)

    # 16x16
    x = MaxPooling2D(pool_size=(2, 2))(x)
    inp_16 = MaxPooling2D(pool_size=(2, 2))(input_)
    res = Convolution2D(nb_filter, (1, 1), padding='same',
                        kernel_initializer='he_uniform',
                        kernel_regularizer=l2(0.0001))(inp_16)
    x = add([x, res])
    x = Convolution2D(2 * nb_filter, (3, 3), padding='same',
                      kernel_initializer='he_uniform',
                      kernel_regularizer=l2(0.0001))(x)
    x = BatchNormalization()(x)
    x = Activation('elu')(x)
    x = Convolution2D(2 * nb_filter, (3, 3), padding='same',
                      kernel_initializer='he_uniform',
                      kernel_regularizer=l2(0.0001))(x)
    x = BatchNormalization()(x)
    x = Activation('elu')(x)

    # 8x8
    x = MaxPooling2D(pool_size=(2, 2))(x)
    inp_8 = MaxPooling2D(pool_size=(2, 2))(inp_16)
    res = Convolution2D(2 * nb_filter, (1, 1), padding='same',
                        kernel_initializer='he_uniform',
                        kernel_regularizer=l2(0.0001))(inp_8)
    x = add([x, res])
    x = Convolution2D(2 * nb_filter, (3, 3), padding='same',
                      kernel_initializer='he_uniform',
                      kernel_regularizer=l2(0.0001))(x)
    x = BatchNormalization()(x)
    x = Activation('elu')(x)

    # 4x4
    x = MaxPooling2D(pool_size=(2, 2))(x)
    inp_4 = MaxPooling2D(pool_size=(2, 2))(inp_8)
    res = Convolution2D(2 * nb_filter, (1, 1), padding='same',
                        kernel_initializer='he_uniform',
                        kernel_regularizer=l2(0.0001))(inp_4)
    x = add([x, res])
    x = Convolution2D(512, (4, 4),
                      padding='valid',
                      kernel_initializer='he_uniform',
                      kernel_regularizer=l2(0.0001))(x)
    x = BatchNormalization()(x)
    x = Activation('elu')(x)
    x = Dropout(0.5)(x)

    # 1x1
    x = Convolution2D(512, (1, 1), padding='same',
                      kernel_initializer='he_uniform',
                      kernel_regularizer=l2(0.0001))(x)
    x = BatchNormalization()(x)
    x = Activation('elu')(x)
    x = Dropout(0.5)(x)
    x = Convolution2D(nb_classes, (1, 1), padding='same',
                      kernel_initializer='he_uniform',
                      kernel_regularizer=l2(0.0001))(x)
    x = GlobalAveragePooling2D()(x)  # Adjust for FCN
    x = BatchNormalization()(x)
    x = Activation('softmax')(x)
    model = Model(inputs=input_, outputs=x)
    return model
Ejemplo n.º 26
0
def create_model(input_shape, img_gen, pool_size, img_w, img_h):
    # Network parameters
    conv_filters = 16
    kernel_size = (3, 3)
    time_dense_size = 32
    rnn_size = 512
    act = 'relu'

    input_data = Input(name='the_input', shape=input_shape, dtype='float32')

    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirecitonal GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru1')(inner)
    gru_1b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len],
                   dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')

    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    l = [y_pred, labels, input_length, label_length]
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')(l)

    model = Model(inputs=[input_data, labels, input_length, label_length],
                  outputs=loss_out)

    # captures output of softmax so we can decode the output during visualization
    test_func = K.function([input_data], [y_pred])

    return model, input_data, y_pred, test_func
Ejemplo n.º 27
0
    def CreateModel(self):
        '''
		定义CNN/LSTM/CTC模型,使用函数式模型
		输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s)
		隐藏层一:3*3卷积层
		隐藏层二:池化层,池化窗口大小为2
		隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合
		隐藏层四:循环层、LSTM/GRU层
		隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合
		隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数,
		输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出
		
		'''
        # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500
        input_data = Input(name='the_input',
                           shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH,
                                  1))

        layer_h1 = Conv2D(32, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(input_data)  # 卷积层
        layer_h1 = Dropout(0.1)(layer_h1)
        layer_h2 = Conv2D(32, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h1)  # 卷积层
        layer_h3 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h2)  # 池化层
        #layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合
        layer_h3 = Dropout(0.2)(layer_h3)
        layer_h4 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h3)  # 卷积层
        layer_h4 = Dropout(0.2)(layer_h4)
        layer_h5 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h4)  # 卷积层
        layer_h6 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h5)  # 池化层

        layer_h6 = Dropout(0.3)(layer_h6)
        layer_h7 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h6)  # 卷积层
        layer_h7 = Dropout(0.3)(layer_h7)
        layer_h8 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h7)  # 卷积层
        layer_h9 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h8)  # 池化层

        layer_h9 = Dropout(0.3)(layer_h9)
        layer_h10 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h9)  # 卷积层
        layer_h10 = Dropout(0.4)(layer_h10)
        layer_h11 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h10)  # 卷积层
        layer_h12 = MaxPooling2D(pool_size=1, strides=None,
                                 padding="valid")(layer_h11)  # 池化层

        #test=Model(inputs = input_data, outputs = layer_h6)
        #test.summary()

        layer_h13 = Reshape((200, 3200))(layer_h12)  #Reshape层

        layer_h13 = Dropout(0.4)(layer_h13)
        layer_h14 = Dense(128,
                          activation="relu",
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h13)  # 全连接层
        layer_h14 = Dropout(0.4)(layer_h14)
        inner = layer_h14
        #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层

        rnn_size = 128
        gru_1 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru1')(inner)
        gru_1b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru1_b')(inner)
        gru1_merged = add([gru_1, gru_1b])
        gru_2 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru2')(gru1_merged)
        gru_2b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru2_b')(gru1_merged)

        gru2 = concatenate([gru_2, gru_2b])
        #layer_h12 = GRU(128,activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True)(layer_h11)

        layer_h15 = Dropout(0.4)(gru2)
        layer_h16 = Dense(128,
                          activation="relu",
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h15)  # 全连接层

        layer_h16 = Dropout(0.5)(layer_h16)  # 随机中断部分神经网络连接,防止过拟合
        layer_h17 = Dense(self.MS_OUTPUT_SIZE,
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h16)  # 全连接层

        y_pred = Activation('softmax', name='Activation0')(layer_h17)
        model_data = Model(inputs=input_data, outputs=y_pred)
        #model_data.summary()

        labels = Input(name='the_labels',
                       shape=[self.label_max_string_length],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer

        #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC
        loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ),
                          name='ctc')(
                              [y_pred, labels, input_length, label_length])

        model = Model(inputs=[input_data, labels, input_length, label_length],
                      outputs=loss_out)

        model.summary()

        # clipnorm seems to speeds up convergence
        #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
        ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06)

        #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=ada_d)

        # captures output of softmax so we can decode the output during visualization
        test_func = K.function([input_data], [y_pred])

        print('[*提示] 创建模型成功,模型编译成功')
        return model, model_data
conv_shape = x.get_shape()
x = Reshape(target_shape=(int(conv_shape[1]),
                          int(conv_shape[2] * conv_shape[3])))(x)

x = Dense(32, activation='relu')(x)
gru_1 = GRU(rnn_size,
            return_sequences=True,
            kernel_initializer='he_normal',
            name='gru1')(x)
gru_1b = GRU(rnn_size,
             return_sequences=True,
             go_backwards=True,
             kernel_initializer='he_normal',
             name='gru1_b')(x)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(rnn_size,
            return_sequences=True,
            kernel_initializer='he_normal',
            name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size,
             return_sequences=True,
             go_backwards=True,
             kernel_initializer='he_normal',
             name='gru2_b')(gru1_merged)
x = concatenate([gru_2, gru_2b])
x = Dropout(0.25)(x)
x = Dense(len(characters) + 1,
          kernel_initializer='he_normal',
          activation='softmax')(x)
base_model = Model(inputs=input_data, outputs=x)
Ejemplo n.º 29
0
def train(run_name, start_epoch, stop_epoch, img_w):
    # Input Parameters
    img_h = 64
    words_per_epoch = 16000
    val_split = 0.2
    val_words = int(words_per_epoch * (val_split))

    # Network parameters
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512

    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

    fdir = os.path.dirname(get_file('wordlists.tgz',
                                    origin='http://www.isosemi.com/datasets/wordlists.tgz', untar=True))

    img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
                                 bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
                                 minibatch_size=32,
                                 img_w=img_w,
                                 img_h=img_h,
                                 downsample_factor=(pool_size ** 2),
                                 val_split=words_per_epoch - val_words
                                 )
    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirecitonal GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
    gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
    if start_epoch > 0:
        weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
        model.load_weights(weight_file)
    # captures output of softmax so we can decode the output during visualization
    test_func = K.function([input_data], [y_pred])

    viz_cb = VizCallback(run_name, test_func, img_gen.next_val())

    model.fit_generator(generator=img_gen.next_train(), steps_per_epoch=(words_per_epoch - val_words),
                        epochs=stop_epoch, validation_data=img_gen.next_val(), validation_steps=val_words,
                        callbacks=[viz_cb, img_gen], initial_epoch=start_epoch)
Ejemplo n.º 30
0
        if len(conv1) == 1:
            encoder1 = conv1[0]
        else:
            encoder1 = keras.layers.concatenate(inputs=conv1)

        if len(conv2) == 1:
            encoder2 = conv2[0]
        else:
            encoder2 = keras.layers.concatenate(inputs=conv2)

        # сожмем вектор предложения до sent2vec_dim
        encoder1 = sent_repr_layer(encoder1)
        encoder2 = sent_repr_layer(encoder2)

        addition = add([encoder1, encoder2])
        minus_y1 = Lambda(lambda x: -x,
                          output_shape=(sent2vec_dim, ))(encoder1)
        mul = add([encoder2, minus_y1])
        mul = multiply([mul, mul])

        #words_final = keras.layers.concatenate(inputs=[mul, addition, addfeatures_input])
        words_final = keras.layers.concatenate(
            inputs=[mul, addition, addfeatures_input, encoder1, encoder2])
        final_size = encoder_size + nb_addfeatures
        words_final = Dense(units=final_size // 2,
                            activation='sigmoid')(words_final)

    elif classifier_arch == 'merge':
        # этот финальный классификатор берет два вектора представления предложений,
        # объединяет их в вектор двойной длины и затем прогоняет этот двойной вектор
Ejemplo n.º 31
0
def train(run_name, start_epoch, stop_epoch, img_w):
    # Input Parameters
    img_h = 64
    words_per_epoch = 16000
    val_split = 0.2
    val_words = int(words_per_epoch * (val_split))

    # Network parameters
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512
    minibatch_size = 32

    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

    fdir = os.path.dirname(
        get_file('wordlists.tgz',
                 origin='http://www.mythic-ai.com/datasets/wordlists.tgz',
                 untar=True))

    img_gen = TextImageGenerator(
        monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
        bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
        minibatch_size=minibatch_size,
        img_w=img_w,
        img_h=img_h,
        downsample_factor=(pool_size ** 2),
        val_split=words_per_epoch - val_words)
    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (img_w // (pool_size ** 2),
                        (img_h // (pool_size ** 2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirectional GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size, return_sequences=True,
                kernel_initializer='he_normal', name='gru1')(inner)
    gru_1b = GRU(rnn_size, return_sequences=True,
                 go_backwards=True, kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size, return_sequences=True,
                kernel_initializer='he_normal', name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True,
                 kernel_initializer='he_normal', name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels',
                   shape=[img_gen.absolute_max_string_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(
        ctc_lambda_func, output_shape=(1,),
        name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    model = Model(inputs=[input_data, labels, input_length, label_length],
                  outputs=loss_out)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
    if start_epoch > 0:
        weight_file = os.path.join(
            OUTPUT_DIR,
            os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
        model.load_weights(weight_file)
    # captures output of softmax so we can decode the output during visualization
    test_func = K.function([input_data], [y_pred])

    viz_cb = VizCallback(run_name, test_func, img_gen.next_val())

    model.fit_generator(
        generator=img_gen.next_train(),
        steps_per_epoch=(words_per_epoch - val_words) // minibatch_size,
        epochs=stop_epoch,
        validation_data=img_gen.next_val(),
        validation_steps=val_words // minibatch_size,
        callbacks=[viz_cb, img_gen],
        initial_epoch=start_epoch)
    def _build_network(self,
                       vocab_size,
                       maxlen,
                       emb_weights=[],
                       c_emb_weights=[],
                       hidden_units=256,
                       dimension_length=11,
                       trainable=True,
                       batch_size=1):

        print('Building model...')

        context_input = Input(name='context', batch_shape=(batch_size, maxlen))

        if (len(c_emb_weights) == 0):
            c_emb = Embedding(vocab_size,
                              256,
                              input_length=maxlen,
                              embeddings_initializer='glorot_normal',
                              trainable=trainable)(context_input)
        else:
            c_emb = Embedding(vocab_size,
                              c_emb_weights.shape[1],
                              input_length=maxlen,
                              weights=[c_emb_weights],
                              trainable=trainable)(context_input)

        c_cnn1 = Convolution1D(int(hidden_units / 2),
                               5,
                               kernel_initializer='he_normal',
                               bias_initializer='he_normal',
                               activation='sigmoid',
                               padding='valid',
                               use_bias=True,
                               input_shape=(1, maxlen))(c_emb)
        c_cnn2 = Convolution1D(hidden_units,
                               5,
                               kernel_initializer='he_normal',
                               bias_initializer='he_normal',
                               activation='sigmoid',
                               padding='valid',
                               use_bias=True,
                               input_shape=(1, maxlen - 2))(c_cnn1)

        c_lstm1 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='orthogonal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=.0,
                       unit_forget_bias=False,
                       return_sequences=True)(c_cnn2)

        c_lstm2 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='orthogonal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=.0,
                       unit_forget_bias=False,
                       return_sequences=True,
                       go_backwards=True)(c_cnn2)

        c_merged = add([c_lstm1, c_lstm2])
        c_merged = Dropout(0.25)(c_merged)

        c_merged = TimeDistributed(
            Dense(128, kernel_initializer="he_normal",
                  activation='sigmoid'))(c_merged)

        text_input = Input(name='text', batch_shape=(batch_size, maxlen))

        if (len(emb_weights) == 0):
            emb = Embedding(vocab_size,
                            256,
                            input_length=maxlen,
                            embeddings_initializer='glorot_normal',
                            trainable=trainable)(text_input)
        else:
            emb = Embedding(vocab_size,
                            c_emb_weights.shape[1],
                            input_length=maxlen,
                            weights=[emb_weights],
                            trainable=trainable)(text_input)

        t_cnn1 = Convolution1D(int(hidden_units / 2),
                               5,
                               kernel_initializer='he_normal',
                               bias_initializer='he_normal',
                               activation='sigmoid',
                               padding='valid',
                               use_bias=True,
                               input_shape=(1, maxlen))(emb)
        t_cnn2 = Convolution1D(hidden_units,
                               5,
                               kernel_initializer='he_normal',
                               bias_initializer='he_normal',
                               activation='sigmoid',
                               padding='valid',
                               use_bias=True,
                               input_shape=(1, maxlen - 2))(t_cnn1)

        t_lstm1 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='he_normal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=0.25,
                       unit_forget_bias=False,
                       return_sequences=True)(t_cnn2)

        t_lstm2 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='he_normal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=0.25,
                       unit_forget_bias=False,
                       return_sequences=True,
                       go_backwards=True)(t_cnn2)

        t_merged = add([t_lstm1, t_lstm2])
        t_merged = Dropout(0.25)(t_merged)

        t_merged = TimeDistributed(
            Dense(128, kernel_initializer="he_normal",
                  activation='sigmoid'))(t_merged)

        awc_input = Input(name='awc', batch_shape=(batch_size, 11))

        eaw = Embedding(101,
                        128,
                        input_length=dimension_length,
                        embeddings_initializer='glorot_normal',
                        trainable=True)(awc_input)

        merged = concatenate([c_merged, t_merged, eaw], axis=1)

        flat_model = Flatten()(merged)

        dnn_1 = Dense(hidden_units,
                      kernel_initializer="he_normal",
                      activation='sigmoid')(flat_model)
        dnn_1 = Dropout(0.25)(dnn_1)
        dnn_2 = Dense(2, activation='sigmoid')(dnn_1)

        softmax = Activation('softmax')(dnn_2)

        model = Model(inputs=[context_input, text_input, awc_input],
                      outputs=softmax)

        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
        print('No of parameter:', model.count_params())

        print(model.summary())

        return model

        print('Building model...')

        context_input = Input(name='context', batch_shape=(batch_size, maxlen))

        if (len(c_emb_weights) == 0):
            c_emb = Embedding(vocab_size,
                              256,
                              input_length=maxlen,
                              embeddings_initializer='glorot_normal',
                              trainable=trainable)(context_input)
        else:
            c_emb = Embedding(vocab_size,
                              c_emb_weights.shape[1],
                              input_length=maxlen,
                              weights=[c_emb_weights],
                              trainable=trainable)(context_input)

        c_cnn1 = Convolution1D(int(hidden_units / 2),
                               5,
                               kernel_initializer='he_normal',
                               bias_initializer='he_normal',
                               activation='sigmoid',
                               padding='valid',
                               use_bias=True,
                               input_shape=(1, maxlen))(c_emb)
        c_cnn2 = Convolution1D(hidden_units,
                               5,
                               kernel_initializer='he_normal',
                               bias_initializer='he_normal',
                               activation='sigmoid',
                               padding='valid',
                               use_bias=True,
                               input_shape=(1, maxlen - 2))(c_cnn1)

        c_lstm1 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='orthogonal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=.0,
                       unit_forget_bias=False,
                       return_sequences=True)(c_cnn2)

        c_lstm2 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='orthogonal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=.0,
                       unit_forget_bias=False,
                       return_sequences=True,
                       go_backwards=True)(c_cnn2)

        c_merged = add([c_lstm1, c_lstm2])
        c_merged = Dropout(0.25)(c_merged)

        c_merged = TimeDistributed(
            Dense(128, kernel_initializer="he_normal",
                  activation='sigmoid'))(c_merged)

        text_input = Input(name='text', batch_shape=(batch_size, maxlen))

        if (len(emb_weights) == 0):
            emb = Embedding(vocab_size,
                            256,
                            input_length=maxlen,
                            embeddings_initializer='glorot_normal',
                            trainable=trainable)(text_input)
        else:
            emb = Embedding(vocab_size,
                            c_emb_weights.shape[1],
                            input_length=maxlen,
                            weights=[emb_weights],
                            trainable=trainable)(text_input)

        t_cnn1 = Convolution1D(int(hidden_units / 2),
                               5,
                               kernel_initializer='he_normal',
                               bias_initializer='he_normal',
                               activation='sigmoid',
                               padding='valid',
                               use_bias=True,
                               input_shape=(1, maxlen))(emb)
        t_cnn2 = Convolution1D(hidden_units,
                               5,
                               kernel_initializer='he_normal',
                               bias_initializer='he_normal',
                               activation='sigmoid',
                               padding='valid',
                               use_bias=True,
                               input_shape=(1, maxlen - 2))(t_cnn1)

        t_lstm1 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='he_normal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=0.25,
                       unit_forget_bias=False,
                       return_sequences=True)(t_cnn2)

        t_lstm2 = LSTM(hidden_units,
                       kernel_initializer='he_normal',
                       recurrent_initializer='he_normal',
                       bias_initializer='he_normal',
                       activation='sigmoid',
                       recurrent_activation='sigmoid',
                       kernel_regularizer=regularizers.l2(0.01),
                       activity_regularizer=regularizers.l2(0.01),
                       recurrent_regularizer=regularizers.l2(0.01),
                       dropout=0.25,
                       recurrent_dropout=0.25,
                       unit_forget_bias=False,
                       return_sequences=True,
                       go_backwards=True)(t_cnn2)

        t_merged = add([t_lstm1, t_lstm2])
        t_merged = Dropout(0.25)(t_merged)

        t_merged = TimeDistributed(
            Dense(128, kernel_initializer="he_normal",
                  activation='sigmoid'))(t_merged)

        awc_input = Input(name='awc', batch_shape=(batch_size, 11))

        eaw = Embedding(101,
                        128,
                        input_length=dimension_length,
                        embeddings_initializer='glorot_normal',
                        trainable=True)(awc_input)

        merged = concatenate([c_merged, t_merged, eaw], axis=1)

        flat_model = Flatten()(merged)

        dnn_1 = Dense(hidden_units,
                      kernel_initializer="he_normal",
                      activation='sigmoid')(flat_model)
        dnn_1 = Dropout(0.25)(dnn_1)
        dnn_2 = Dense(2, activation='sigmoid')(dnn_1)

        softmax = Activation('softmax')(dnn_2)

        model = Model(inputs=[context_input, text_input, awc_input],
                      outputs=softmax)

        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
        print('No of parameter:', model.count_params())

        print(model.summary())

        return model
Ejemplo n.º 33
0
               strides=1,
               padding='same',
               kernel_initializer=kernel_initializer,
               kernel_regularizer=kernel_regularizer)(x)
    if use_bn:
        x = BatchNormalization()(x)
    x = Activation(activation)(x)
if use_shortcut:
    x_prev = Conv2D(filters=32,
                    kernel_size=1,
                    strides=1,
                    padding='same',
                    kernel_initializer=kernel_initializer,
                    kernel_regularizer=kernel_regularizer)(x_prev)
    x_prev = Activation(activation)(x_prev)
    x = add([x_prev, x])

x_prev = x
for i in range(2):
    x = Conv2D(filters=64,
               kernel_size=3,
               strides=1,
               padding='same',
               kernel_initializer=kernel_initializer,
               kernel_regularizer=kernel_regularizer)(x)
    if use_bn:
        x = BatchNormalization()(x)
    x = Activation(activation)(x)
if use_shortcut:
    x_prev = Conv2D(filters=64,
                    kernel_size=1,
Ejemplo n.º 34
0
    def CreateModel(self):
        '''
		定义CNN/LSTM/CTC模型,使用函数式模型
		输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s)
		隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2
		隐藏层:全连接层
		输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数,
		CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出
		
		'''

        input_data = Input(name='the_input',
                           shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH,
                                  1))

        layer_h = Conv2D(32, (3, 3),
                         use_bias=False,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(input_data)  # 卷积层
        #layer_h = Dropout(0.05)(layer_h)
        layer_h = Conv2D(32, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=2, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.05)(layer_h) # 随机中断部分神经网络连接,防止过拟合
        layer_h = Conv2D(64, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        #layer_h = Dropout(0.1)(layer_h)
        layer_h = Conv2D(64, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=2, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.1)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        #layer_h = Dropout(0.15)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=2, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.15)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        #layer_h = Dropout(0.2)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=1, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.2)(layer_h)
        #layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层
        #layer_h = Dropout(0.2)(layer_h)
        #layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层
        #layer_h = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h) # 池化层

        #test=Model(inputs = input_data, outputs = layer_h)
        #test.summary()

        layer_h = Reshape((200, 3200))(layer_h)  #Reshape层

        #layer_h16 = Dropout(0.3)(layer_h16) # 随机中断部分神经网络连接,防止过拟合
        layer_h = Dense(128,
                        activation="relu",
                        use_bias=True,
                        kernel_initializer='he_normal')(layer_h)  # 全连接层

        inner = layer_h
        #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层

        rnn_size = 128
        gru_1 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru1')(inner)
        gru_1b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru1_b')(inner)
        gru1_merged = add([gru_1, gru_1b])
        gru_2 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru2')(gru1_merged)
        gru_2b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru2_b')(gru1_merged)

        gru2 = concatenate([gru_2, gru_2b])

        layer_h = gru2
        #layer_h20 = Dropout(0.4)(gru2)
        layer_h = Dense(128,
                        activation="relu",
                        use_bias=True,
                        kernel_initializer='he_normal')(layer_h)  # 全连接层

        #layer_h17 = Dropout(0.3)(layer_h17)
        layer_h = Dense(self.MS_OUTPUT_SIZE,
                        use_bias=True,
                        kernel_initializer='he_normal')(layer_h)  # 全连接层

        y_pred = Activation('softmax', name='Activation0')(layer_h)
        model_data = Model(inputs=input_data, outputs=y_pred)
        #model_data.summary()

        labels = Input(name='the_labels',
                       shape=[self.label_max_string_length],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer

        #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC
        loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ),
                          name='ctc')(
                              [y_pred, labels, input_length, label_length])

        model = Model(inputs=[input_data, labels, input_length, label_length],
                      outputs=loss_out)

        model.summary()

        # clipnorm seems to speeds up convergence
        #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
        #opt = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06)
        opt = Adam(lr=0.001,
                   beta_1=0.9,
                   beta_2=0.999,
                   decay=0.0,
                   epsilon=10e-8)
        #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=opt)

        # captures output of softmax so we can decode the output during visualization
        test_func = K.function([input_data], [y_pred])

        #print('[*提示] 创建模型成功,模型编译成功')
        print('[*Info] Create Model Successful, Compiles Model Successful. ')
        return model, model_data
Ejemplo n.º 35
0
def _main(args):
    config_path = os.path.expanduser(args.config_path)
    weights_path = os.path.expanduser(args.weights_path)
    assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
        config_path)
    assert weights_path.endswith('.weights'), '{} is not a .weights file'.format(weights_path)

    output_path = os.path.expanduser(args.output_path)
    assert output_path.endswith('.h5'), 'output path {} is not a .h5 file'.format(output_path)
    output_root = os.path.splitext(output_path)[0]

    # Load weights and config.
    print('Loading weights.')
    weights_file = open(weights_path, 'rb')
    weights_header = np.ndarray(
        shape=(5, ), dtype='int32', buffer=weights_file.read(20))
    print('Weights Header: ', weights_header)
    # TODO: Check transpose flag when implementing fully connected layers.
    # transpose = (weight_header[0] > 1000) or (weight_header[1] > 1000)

    print('Parsing Darknet config.')
    unique_config_file = unique_config_sections(config_path)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)

    print('\nCreating Keras model.')
    if args.convolutional_only:
        image_height, image_width = None, None
    else:
        image_height = int(cfg_parser['net_0']['height'])
        image_width = int(cfg_parser['net_0']['width'])

    prev_layer = Input(shape=(image_height, image_width, 3))
    all_layers = [prev_layer]
    outputs = []

    weight_decay = float(cfg_parser['net_0']['decay']) if 'net_0' in cfg_parser.sections() else 5e-4
    count = 0

    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))
        if section.startswith('convolutional'):
            filters = int(cfg_parser[section]['filters'])
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            pad = int(cfg_parser[section]['pad'])
            activation = cfg_parser[section]['activation']
            batch_normalize = 'batch_normalize' in cfg_parser[section]

            # Setting weights.
            # Darknet serializes convolutional weights as:
            # [bias/beta, [gamma, mean, variance], conv_weights]
            prev_layer_shape = K.int_shape(prev_layer)

            # TODO: This assumes channel last dim_ordering.
            weights_shape = (size, size, prev_layer_shape[-1], filters)
            darknet_w_shape = (filters, weights_shape[2], size, size)
            weights_size = np.product(weights_shape)

            print('  conv2d', 'bn' if batch_normalize else activation, weights_shape)

            conv_bias = np.ndarray(
                shape=(filters, ),
                dtype='float32',
                buffer=weights_file.read(filters * 4))
            count += filters

            if batch_normalize:
                bn_weights = np.ndarray(
                    shape=(3, filters),
                    dtype='float32',
                    buffer=weights_file.read(filters * 12))
                count += 3 * filters

                # TODO: Keras BatchNormalization mistakenly refers to var
                # as std.
                bn_weight_list = [
                    bn_weights[0],  # scale gamma
                    conv_bias,  # shift beta
                    bn_weights[1],  # running mean
                    bn_weights[2]  # running var
                ]

            conv_weights = np.ndarray(
                shape=darknet_w_shape,
                dtype='float32',
                buffer=weights_file.read(weights_size * 4))
            count += weights_size

            # DarkNet conv_weights are serialized Caffe-style:
            # (out_dim, in_dim, height, width)
            # We would like to set these to Tensorflow order:
            # (height, width, in_dim, out_dim)
            # TODO: Add check for Theano dim ordering.
            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
            conv_weights = [conv_weights] if batch_normalize else [conv_weights, conv_bias]

            # Handle activation.
            act_fn = None
            if activation == 'leaky':
                pass  # Add advanced activation later.
            elif activation != 'linear':
                raise ValueError(
                    'Unknown activation function `{}` in section {}'.format(
                        activation, section))

            padding = 'same' if pad == 1 and stride == 1 else 'valid'
            # Adjust padding model for darknet.
            if stride == 2:
                prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer)

            # Create Conv2D layer
            conv_layer = (Conv2D(
                filters, (size, size),
                strides=(stride, stride),
                kernel_regularizer=l2(weight_decay),
                use_bias=not batch_normalize,
                weights=conv_weights,
                activation=act_fn,
                padding=padding))(prev_layer)

            if batch_normalize:
                conv_layer = (BatchNormalization(weights=bn_weight_list))(conv_layer)

            prev_layer = conv_layer

            if activation == 'linear':
                all_layers.append(prev_layer)
            elif activation == 'leaky':
                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
                prev_layer = act_layer
                all_layers.append(prev_layer)

        elif section.startswith('maxpool'):
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                MaxPooling2D(
                    padding='same',
                    pool_size=(size, size),
                    strides=(stride, stride))(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('avgpool'):
            if cfg_parser.items(section) != []:
                raise ValueError('{} with params unsupported.'.format(section))
            all_layers.append(GlobalAveragePooling2D()(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('route'):
            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
            if len(ids) == 2:
                for i, item in enumerate(ids):
                    if item != -1:
                        ids[i] = item + 1

            layers = [all_layers[i] for i in ids]

            if len(layers) > 1:
                print('  Concatenating route layers:')
                for layer in layers:
                    print('    '+str(layer))
                concatenate_layer = concatenate(layers)
                all_layers.append(concatenate_layer)
                prev_layer = concatenate_layer
            else:
                skip_layer = layers[0]  # only one layer to route
                all_layers.append(skip_layer)
                prev_layer = skip_layer

        elif section.startswith('shortcut'):
            ids = [int(i) for i in cfg_parser[section]['from'].split(',')][0]
            activation = cfg_parser[section]['activation']
            shortcut = add([all_layers[ids], prev_layer])
            if activation == 'linear':
                shortcut = Activation('linear')(shortcut)
            all_layers.append(shortcut)
            prev_layer = all_layers[-1]

        elif section.startswith('upsample'):
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                UpSampling2D(
                    size=(stride, stride))(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('yolo'):
            classes = int(cfg_parser[section]['classes'])
            # num = int(cfg_parser[section]['num'])
            # mask = int(cfg_parser[section]['mask'])
            n1, n2 = int(prev_layer.shape[1]), int(prev_layer.shape[2])
            n3 = 3
            n4 = (4 + 1 + classes)
            yolo = Reshape((n1, n2, n3, n4))(prev_layer)
            all_layers.append(yolo)
            prev_layer = all_layers[-1]
            outputs.append(len(all_layers) - 1)

        elif (section.startswith('net')):
            pass  # Configs not currently handled during model definition.
        else:
            raise ValueError(
                'Unsupported section header type: {}'.format(section))

    # Create and save model.
    model = Model(inputs=all_layers[0], outputs=[all_layers[i] for i in outputs])
    model.compile(optimizer=Adam(lr=1e-3), loss={
        # use custom yolo_loss Lambda layer.
        'yolo_loss': lambda y_true, y_pred: y_pred})
    #### !!! https://github.com/qqwweee/keras-yolo3/blob/master/yolo3/model.py#L345
    print('\n'+'='*98+'\n'+'{:^98}'.format('Model Summary'))
    print(model.summary())
    print('='*98+'\n')
    model.save('{}'.format(output_path))
    print('Saved Keras model to {}'.format(output_path))
    # Check to see if all weights have been read.
    remaining_weights = len(weights_file.read()) / 4
    weights_file.close()
    print('Read {} of {} from Darknet weights.'.format(count, count + 0 if remaining_weights == 0 else remaining_weights))
    if remaining_weights > 0:
        print('Warning: {} unused weights'.format(remaining_weights))

    if args.plot_model:
        plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
        print('Saved model plot to {}.png'.format(output_root))
Ejemplo n.º 36
0
def get_model(training, img_h, nclass):
    input_shape = (None, img_h, 1)  # (128, 64, 1)
    #input_shape = (280, img_h, 1)
    # Make Networkw
    inputs = Input(name='the_input', shape=input_shape,
                   dtype='float32')  # (None, 128, 64, 1)
    #inner = resnet.ResNet50(include_top=False, weights = None, input_tensor = inputs)
    inner = shufflenet.ShuffleNet_V2(include_top=False,
                                     weights=None,
                                     input_tensor=inputs)
    # Convolution layer (VGG)
    # CNN to RNN
    #inner = Reshape(target_shape=((32, 2048)), name='reshape')(inner)  # (None, 32, 2048)
    inner = TimeDistributed(Flatten(), name='flatten')(inner)
    #inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)  # (None, 32, 64)

    lstm_unit_num = 256

    # RNN layer
    lstm_1 = CuDNNLSTM(lstm_unit_num,
                       return_sequences=True,
                       kernel_initializer='he_normal',
                       name='lstm1')(inner)  # (None, 32, 512)
    lstm_1b = CuDNNLSTM(lstm_unit_num,
                        return_sequences=True,
                        go_backwards=True,
                        kernel_initializer='he_normal',
                        name='lstm1_b')(inner)
    lstm1_merged = add([lstm_1, lstm_1b])  # (None, 32, 512)
    lstm1_merged = BatchNormalization()(lstm1_merged)

    #lstm1_merged = Dropout(0.1)(lstm1_merged)

    lstm_2 = CuDNNLSTM(lstm_unit_num,
                       return_sequences=True,
                       kernel_initializer='he_normal',
                       name='lstm2')(lstm1_merged)
    lstm_2b = CuDNNLSTM(lstm_unit_num,
                        return_sequences=True,
                        go_backwards=True,
                        kernel_initializer='he_normal',
                        name='lstm2_b')(lstm1_merged)
    lstm2_merged = concatenate([lstm_2, lstm_2b])  # (None, 32, 1024)
    lstm_merged = BatchNormalization()(lstm2_merged)

    #lstm_merged = Dropout(0.1)(lstm_merged)

    # transforms RNN output to character activations:
    inner = Dense(nclass, kernel_initializer='he_normal',
                  name='dense2')(lstm2_merged)  #(None, 32, 63)
    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels', shape=[None],
                   dtype='float32')  # (None ,8)
    input_length = Input(name='input_length', shape=[1],
                         dtype='int64')  # (None, 1)
    label_length = Input(name='label_length', shape=[1],
                         dtype='int64')  # (None, 1)

    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length,
                                   label_length])  #(None, 1)
    model = None
    if training:
        model = Model(inputs=[inputs, labels, input_length, label_length],
                      outputs=loss_out)
    else:
        model = Model(inputs=inputs, outputs=y_pred)
        return model, model
    model.summary()
    multi_model = multi_gpu_model(model, gpus=GPU_NUM)
    save_model = model
    ada = Adadelta()
    #multi_model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adam', metrics=['accuracy'])
    multi_model.compile(loss={
        'ctc': lambda y_true, y_pred: y_pred
    },
                        optimizer=ada,
                        metrics=['accuracy'])
    return save_model, multi_model
def resnet8_MDN(img_width, img_height, img_channels, output_dim):
    """
    Define model architecture.
    
    # Arguments
       img_width: Target image widht.
       img_height: Target image height.
       img_channels: Target image channels.
       output_dim: Dimension of model output.
       
    # Returns
       model: A Model instance.
    """

    # Input
    img_input = Input(shape=(img_height, img_width, img_channels))

    x1 = Conv2D(32, (5, 5), strides=[2, 2], padding='same')(img_input)
    x1 = MaxPooling2D(pool_size=(3, 3), strides=[2, 2])(x1)

    # First residual block
    x2 = keras.layers.normalization.BatchNormalization()(x1)
    x2 = Activation('relu')(x2)
    x2 = Conv2D(32, (3, 3),
                strides=[2, 2],
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x2)

    x2 = keras.layers.normalization.BatchNormalization()(x2)
    x2 = Activation('relu')(x2)
    x2 = Conv2D(32, (3, 3),
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x2)

    x1 = Conv2D(32, (1, 1), strides=[2, 2], padding='same')(x1)
    x3 = add([x1, x2])

    # Second residual block
    x4 = keras.layers.normalization.BatchNormalization()(x3)
    x4 = Activation('relu')(x4)
    x4 = Conv2D(64, (3, 3),
                strides=[2, 2],
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x4)

    x4 = keras.layers.normalization.BatchNormalization()(x4)
    x4 = Activation('relu')(x4)
    x4 = Conv2D(64, (3, 3),
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x4)

    x4_ = Conv2D(64, (1, 1), strides=[2, 2], padding='same')(x3)
    x5 = add([x4_, x4])
    x4_out = Flatten()(x5)
    x4_out = Activation('relu')(x4_out)
    #
    #
    # # Third residual block
    x6 = keras.layers.normalization.BatchNormalization()(x3)
    x6 = Activation('relu')(x6)
    x6 = Conv2D(64, (3, 3),
                strides=[2, 2],
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x6)

    x6 = keras.layers.normalization.BatchNormalization()(x6)
    x6 = Activation('relu')(x6)
    x6 = Conv2D(64, (3, 3),
                padding='same',
                kernel_initializer="he_normal",
                kernel_regularizer=regularizers.l2(1e-4))(x6)

    x6_ = Conv2D(64, (1, 1), strides=[2, 2], padding='same')(x3)
    x7 = add([x6_, x6])
    x6_out = Flatten()(x7)
    x6_out = Activation('relu')(x6_out)
    #

    # Collision channel
    trans = Dense(500, activation='relu')(x6_out)
    trans = keras.layers.normalization.BatchNormalization()(trans)
    trans = Dropout(0.5)(trans)
    trans = Dense(output_dim, name='trans_output')(trans)
    # coll = Activation('sigmoid')(coll)

    dense1_1 = Dense(500, activation='relu')(x4_out)
    dense1_1 = keras.layers.normalization.BatchNormalization()(dense1_1)
    dense1_1 = Dropout(0.2)(dense1_1)

    dense2_1 = Dense(100, activation='relu')(dense1_1)
    dense2_1 = keras.layers.normalization.BatchNormalization()(dense2_1)
    dense2_1 = Dropout(0.2)(dense2_1)
    FC_mus = Dense(c * m, activation='tanh')(dense2_1)
    # FC_sigmas = Dense(m, activation=elu_plus_one_plus_epsilon)(dense1_1)  # Keras.exp, W_regularizer=l2(1e-3)
    FC_alphas = Dense(m)(dense2_1)
    outputs = concatenate([FC_mus, FC_alphas], name='direct_output')

    # outputs = Dense((c+1)*m)(dense2_1)

    model = Model(inputs=[img_input], outputs=[outputs, trans])
    # model = Model(inputs=[img_input], outputs=[outputs])

    # Define steering-collision model
    # model = Model(inputs=[img_input], outputs=[steer, coll])
    print(model.summary())

    return model
Ejemplo n.º 38
0
question_encoder = Embedding(input_dim=vocab_size,
                            output_dim=EMBEDDING_SIZE,
                            input_length=question_maxlen)(question_input)
question_encoder = Dropout(0.3)(question_encoder)

# match between story and question
match = dot([story_encoder, question_encoder], axes=[2, 2])

# encode story into vector space of question
story_encoder_c = Embedding(input_dim=vocab_size,
                           output_dim=question_maxlen,
                           input_length=story_maxlen)(story_input)
story_encoder_c = Dropout(0.3)(story_encoder_c)

# combine match and story vectors
response = add([match, story_encoder_c])
response = Permute((2, 1))(response)

# combine response and question vectors
answer = concatenate([response, question_encoder], axis=-1)
answer = LSTM(LATENT_SIZE)(answer)
answer = Dropout(0.3)(answer)
answer = Dense(vocab_size)(answer)
output = Activation("softmax")(answer)

model = Model(inputs=[story_input, question_input], outputs=output)
model.compile(optimizer="rmsprop", loss="categorical_crossentropy",
              metrics=["accuracy"])

# train model
history = model.fit([Xstrain, Xqtrain], [Ytrain], batch_size=BATCH_SIZE, 
Ejemplo n.º 39
0
def FCN(basenet='vgg16', trainable_base=False,
        num_output=21, input_shape=(None, None, 3),
        weights='imagenet'):
    """Instantiate the FCN8s architecture with keras.

    # Arguments
        basenet: type of basene {'vgg16'}
        trainable_base: Bool whether the basenet weights are trainable
        num_output: number of classes
        input_shape: input image shape
        weights: pre-trained weights to load (None for training from scratch)
    # Returns
        A Keras model instance
    """
    _handle_data_format()
    basenet = _get_basenet(basenet)
    # input
    input = Input(shape=input_shape)
    # Get skip_layers=[drop7, pool4, pool3] from the base net: VGG16
    skip_layers = basenet(skip_architecture=True)(input)

    drop7 = skip_layers[0]
    score_fr = Conv2D(filters=num_output, kernel_size=(1, 1),
                      padding='valid',
                      name='score_fr')(drop7)
    upscore2 = Conv2DTranspose(filters=num_output, kernel_size=(4, 4),
                               strides=(2, 2), padding='valid', use_bias=False,
                               data_format=K.image_data_format(),
                               name='upscore2')(score_fr)
    # scale pool4 skip for compatibility
    pool4 = skip_layers[1]
    scale_pool4 = Lambda(lambda x: x * 0.01, name='scale_pool4')(pool4)
    score_pool4 = Conv2D(filters=num_output, kernel_size=(1, 1),
                         padding='valid', name='score_pool4')(scale_pool4)
    score_pool4c = _crop(upscore2, offset=(5, 5),
                         name='score_pool4c')(score_pool4)
    fuse_pool4 = add([upscore2, score_pool4c])
    upscore_pool4 = Conv2DTranspose(filters=num_output, kernel_size=(4, 4),
                                    strides=(2, 2), padding='valid',
                                    use_bias=False,
                                    data_format=K.image_data_format(),
                                    name='upscore_pool4')(fuse_pool4)
    # scale pool3 skip for compatibility
    pool3 = skip_layers[2]
    scale_pool3 = Lambda(lambda x: x * 0.0001, name='scale_pool3')(pool3)
    score_pool3 = Conv2D(filters=num_output, kernel_size=(1, 1),
                         padding='valid', name='score_pool3')(scale_pool3)
    score_pool3c = _crop(upscore_pool4, offset=(9, 9),
                         name='score_pool3c')(score_pool3)
    fuse_pool3 = add([upscore_pool4, score_pool3c])
    # score
    upscore8 = Conv2DTranspose(filters=num_output, kernel_size=(16, 16),
                               strides=(8, 8), padding='valid',
                               use_bias=False,
                               data_format=K.image_data_format(),
                               name='upscore8')(fuse_pool3)
    score = _crop(input, offset=(31, 31), name='score')(upscore8)

    # model
    model = Model(input, score, name='fcn_vgg16')

    # load weights
    if weights == 'imagenet':
        weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
                                basenet.WEIGHTS_PATH,
                                cache_subdir='models')
        layer_names = load_weights(model, weights_path)
        if K.backend() == 'theano':
            layer_utils.convert_all_kernels_in_model(model)
        # Freezing basenet weights
        if not trainable_base:
            for layer in model.layers:
                if layer.name in layer_names:
                    layer.trainable = False

    return model
Ejemplo n.º 40
0
def encoder_model():
    inputs = Input(shape=(int(VIDEO_LENGTH / 2), 128, 208, 3))

    # 10x128x128
    conv_1 = Conv3D(filters=128,
                    strides=(1, 4, 4),
                    dilation_rate=(1, 1, 1),
                    kernel_size=(3, 11, 11),
                    padding='same')(inputs)
    x = TimeDistributed(BatchNormalization())(conv_1)
    x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
    out_1 = TimeDistributed(Dropout(0.5))(x)

    conv_2a = Conv3D(filters=64,
                     strides=(1, 1, 1),
                     dilation_rate=(2, 1, 1),
                     kernel_size=(2, 5, 5),
                     padding='same')(out_1)
    x = TimeDistributed(BatchNormalization())(conv_2a)
    x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
    out_2a = TimeDistributed(Dropout(0.5))(x)

    conv_2b = Conv3D(filters=64,
                     strides=(1, 1, 1),
                     dilation_rate=(2, 1, 1),
                     kernel_size=(2, 5, 5),
                     padding='same')(out_2a)
    x = TimeDistributed(BatchNormalization())(conv_2b)
    x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
    out_2b = TimeDistributed(Dropout(0.5))(x)

    res_1 = add([out_2a, out_2b])
    # res_1 = LeakyReLU(alpha=0.2)(res_1)

    conv_3 = Conv3D(filters=64,
                    strides=(1, 2, 2),
                    dilation_rate=(1, 1, 1),
                    kernel_size=(3, 5, 5),
                    padding='same')(res_1)
    x = TimeDistributed(BatchNormalization())(conv_3)
    x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
    out_3 = TimeDistributed(Dropout(0.5))(x)

    # 10x16x16
    conv_4a = Conv3D(filters=64,
                     strides=(1, 1, 1),
                     dilation_rate=(2, 1, 1),
                     kernel_size=(2, 3, 3),
                     padding='same')(out_3)
    x = TimeDistributed(BatchNormalization())(conv_4a)
    x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
    out_4a = TimeDistributed(Dropout(0.5))(x)

    conv_4b = Conv3D(filters=64,
                     strides=(1, 1, 1),
                     dilation_rate=(2, 1, 1),
                     kernel_size=(2, 3, 3),
                     padding='same')(out_4a)
    x = TimeDistributed(BatchNormalization())(conv_4b)
    x = TimeDistributed(LeakyReLU(alpha=0.2))(x)
    out_4b = TimeDistributed(Dropout(0.5))(x)

    z = add([out_4a, out_4b])
    # res_1 = LeakyReLU(alpha=0.2)(res_1)

    model = Model(inputs=inputs, outputs=z)

    return model
Ejemplo n.º 41
0
def train(run_name, start_epoch, stop_epoch, img_w, type_t):
    # Input Parameters
    img_h = 64
    words_per_epoch = 16000
    val_split = 0.2
    val_words = int(words_per_epoch * (val_split))

    # Network parameters
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512
    minibatch_size = 32

    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

    fdir = os.path.dirname(
        get_file('wordlists.tgz',
                 origin='http://www.mythic-ai.com/datasets/wordlists.tgz',
                 untar=True))

    img_gen = TextImageGenerator(
        monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
        bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
        minibatch_size=minibatch_size,
        img_w=img_w,
        img_h=img_h,
        downsample_factor=(pool_size**2),
        val_split=words_per_epoch - val_words,
        type_t=type_t)
    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner2 = Conv2D(conv_filters,
                    kernel_size,
                    padding='same',
                    activation=act,
                    kernel_initializer='he_normal',
                    name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner2)

    conv_to_rnn_dims = (img_w // (pool_size**2),
                        (img_h // (pool_size**2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirectional GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru1')(inner)
    gru_1b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(FindOutPutShape(),
                  kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(inputs=input_data, outputs=y_pred)

    labels = Input(name='the_labels',
                   shape=[img_gen.absolute_max_string_len],
                   dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    model = Model(inputs=[input_data, labels, input_length, label_length],
                  outputs=loss_out)

    model.summary()

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={
        'ctc': lambda y_true, y_pred: y_pred
    },
                  optimizer=sgd,
                  metrics=['accuracy'])
    if start_epoch > 0:
        weight_file = os.path.join(
            OUTPUT_DIR,
            os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
        # model.load_weights(weight_file)
    # captures output of softmax so we can decode the output during visualization
    test_func = K.function([input_data], [y_pred])

    viz_cb = VizCallback(run_name, test_func, img_gen.next_val())
    # model.load_weights('weightswithresize.h5')

    model.load_weights('weights56.h5')
    # history = model.fit_generator(generator=img_gen.next_train(),
    #                     steps_per_epoch=(words_per_epoch - val_words) // minibatch_size,
    #                     epochs=stop_epoch,
    #                     validation_data=img_gen.next_val(),
    #                     validation_steps=val_words // minibatch_size,
    #                     callbacks=[viz_cb, img_gen],
    #                     initial_epoch=start_epoch)
    imgwide = 564

    strn = "new.png"
    # img = Image.open('test2.png')
    # img = img.resize((imgwide, 64), Image.ANTIALIAS)

    img = cv2.imread(strn)
    img = cv2.resize(img, (imgwide, 64))
    # #
    kernel = np.ones((3, 3), np.float32) / 50
    img = cv2.filter2D(img, -1, kernel)

    print(img.shape)

    for i in range(50):
        img = np.insert(img, 0, 255, axis=1)
    for i in range(50):
        img = np.insert(img, img.shape[1], 255, axis=1)
    for i in range(25):
        img = np.insert(img, 0, [255], axis=0)
    for i in range(25):
        img = np.insert(img, img.shape[0], 255, axis=0)

    img = cv2.resize(img, (imgwide, 64))

    print(img.shape)

    img = np.asarray(img)
    img = img[:, :, 0]  # grab single channel

    im = img

    # plt.imshow(img,cmap='gray')
    # plt.show()
    # im = speckle(img)

    # img = img.astype(np.float32) / 255
    img = cv2.adaptiveThreshold(img, 1, cv2.ADAPTIVE_THRESH_MEAN_C, \
                                cv2.THRESH_BINARY, 11, 2)

    plt.imshow(img, cmap='gray')
    plt.show()

    # a = a.astype(np.float32) / 255
    img = np.expand_dims(img, 0)

    data = np.reshape(img, (1, 64, imgwide))

    X_data = np.ones([1, imgwide, 64, 1])
    X_data[0, 0:imgwide, :, 0] = data[0, :, :].T
    #
    decode_batch(test_func, X_data)
Ejemplo n.º 42
0
def train(img_w, train_data, val_data):
    # Input Parameters
    img_h = 64

    # Network parameters
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512

    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

    batch_size = 32
    downsample_factor = pool_size**2
    tiger_train = ImageGenerator(train_data, img_w, img_h, batch_size,
                                 downsample_factor)
    tiger_train.build_data()
    tiger_val = ImageGenerator(val_data, img_w, img_h, batch_size,
                               downsample_factor)
    tiger_val.build_data()

    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters,
                   kernel_size,
                   padding='same',
                   activation=act,
                   kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (img_w // (pool_size**2),
                        (img_h // (pool_size**2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirecitonal GRUs
    gru_1 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru1')(inner)
    gru_1b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size,
                return_sequences=True,
                kernel_initializer='he_normal',
                name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size,
                 return_sequences=True,
                 go_backwards=True,
                 kernel_initializer='he_normal',
                 name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(tiger_train.get_output_size(),
                  kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels',
                   shape=[tiger_train.max_text_len],
                   dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    model = Model(inputs=[input_data, labels, input_length, label_length],
                  outputs=loss_out)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)

    model.fit_generator(generator=tiger_train.next_batch(),
                        steps_per_epoch=tiger_train.n,
                        epochs=1,
                        validation_data=tiger_val.next_batch(),
                        validation_steps=tiger_val.n)
    return model
Ejemplo n.º 43
0
    def build_model(self):

        print('building model...')

        if K.image_data_format() == 'channels_first':
            self.input_shape = (1, self.img_w, self.img_h)
        else:
            self.input_shape = (self.img_w, self.img_h, 1)

        self.ds.build_data()

        self.valid.build_data()

        act = 'relu'

        self.input_data = Input(name='the_input',
                                shape=self.input_shape,
                                dtype='float32')
        inner = Conv2D(self.conv_filters,
                       self.kernel_size,
                       padding='same',
                       activation=act,
                       kernel_initializer='he_normal',
                       name='conv1')(self.input_data)
        inner = MaxPooling2D(pool_size=(self.pool_size, self.pool_size),
                             name='max1')(inner)
        inner = Dropout(0.2, name='drop1')(inner)
        inner = Conv2D(self.conv_filters,
                       self.kernel_size,
                       padding='same',
                       activation=act,
                       kernel_initializer='he_normal',
                       name='conv2')(inner)
        inner = Dropout(0.2, name='drop2')(inner)
        inner = Conv2D(self.conv_filters,
                       self.kernel_size,
                       padding='same',
                       activation=act,
                       kernel_initializer='he_normal',
                       name='conv3')(inner)
        inner = BatchNormalization()(inner)

        inner = MaxPooling2D(pool_size=(self.pool_size, self.pool_size),
                             name='max2')(inner)

        conv_to_rnn_dims = (self.img_w // (self.pool_size**2),
                            (self.img_h //
                             (self.pool_size**2)) * self.conv_filters)
        inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

        # cuts down input size going into RNN:
        inner = Dense(self.time_dense_size, activation=act,
                      name='dense1')(inner)

        # Two layers of bidirecitonal GRUs
        # GRU seems to work as well, if not better than LSTM:
        gru_1 = GRU(self.rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru1')(inner)
        gru_1b = GRU(self.rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru1_b')(inner)
        gru1_merged = add([gru_1, gru_1b])
        gru_2 = GRU(self.rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru2')(gru1_merged)
        gru_2b = GRU(self.rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru2_b')(gru1_merged)

        # transforms RNN output to character activations:
        inner = Dense(get_output_size(),
                      kernel_initializer='he_normal',
                      name='dense2')(concatenate([gru_2, gru_2b]))

        self.y_pred = Activation('softmax', name='softmax')(inner)

        # Model(inputs=self.input_data, outputs=self.y_pred).summary()

        labels = Input(name='the_labels',
                       shape=[self.max_text_len],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer
        loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')(
            [self.y_pred, labels, input_length, label_length])

        # clipnorm seems to speeds up convergence
        sgd = SGD(lr=0.002,
                  decay=1e-6,
                  momentum=0.9,
                  nesterov=True,
                  clipnorm=5)

        self.model = Model(
            inputs=[self.input_data, labels, input_length, label_length],
            outputs=loss_out)

        self.model.compile(loss={
            'ctc': lambda y_true, y_pred: self.y_pred
        },
                           optimizer=sgd)
Ejemplo n.º 44
0
def joint_TCN_resnet(n_classes,
                     max_len,
                     art,
                     img_size=112,
                     gap=1,
                     dropout=0.0,
                     activation="relu"):
    """Reviced TK'S TCN model. num_block = 2. initial_conv_num=64.
	Args:
		n_classes: number of classes for this kind of label.
		feat_dim: the dumention of the feature.
		max_len: the number of frames for each video.
	Returns:
		model: uncompiled model."""

    if K.image_dim_ordering() == 'tf':
        ROW_AXIS = 1
        CHANNEL_AXIS = 2
    else:
        ROW_AXIS = 2
        CHANNEL_AXIS = 1

    if art == 'V1':
        initial_stride = 1
        initial_filter_dim = 4
        initial_num = 64
        config = [[(1, 4, 64)], [(1, 4, 64)], [(1, 4, 64)], [(2, 4, 128)],
                  [(1, 4, 128)], [(1, 4, 128)], [(2, 4, 256)], [(1, 4, 256)],
                  [(1, 4, 256)]]
    elif art == 'V2':
        initial_stride = 1
        initial_filter_dim = 2
        initial_num = 256
        config = [
            [(1, 2, initial_num)],
            [(1, 2, initial_num)],
            [(2, 2, initial_num * 2)],
            [(1, 2, initial_num * 2)],
        ]
    elif art == 'V3':
        initial_stride = 1
        initial_filter_dim = 2
        initial_num = 128
        config = [
            [(1, 2, initial_num)],
            [(1, 2, initial_num)],
            [(2, 2, initial_num * 2)],
            [(1, 2, initial_num * 2)],
        ]
    elif art == 'V4':
        initial_stride = 1
        initial_filter_dim = 4
        initial_num = 64
        config = [
            [(1, 4, initial_num)],
            [(1, 4, initial_num)],
            [(2, 4, initial_num * 2)],
            [(1, 4, initial_num * 2)],
            [(2, 4, initial_num * 4)],
            [(1, 4, initial_num * 4)],
        ]
    elif art == 'V5':
        initial_stride = 1
        initial_filter_dim = 4
        initial_num = 64
        config = [
            [(1, 4, initial_num)],
            [(1, 4, initial_num)],
            [(1, 4, initial_num)],
            [(1, 4, initial_num)],
            [(2, 4, initial_num * 2)],
            [(1, 4, initial_num * 2)],
            [(1, 4, initial_num * 2)],
            [(1, 4, initial_num * 2)],
            [(2, 4, initial_num * 4)],
            [(1, 4, initial_num * 4)],
            [(1, 4, initial_num * 4)],
            [(1, 4, initial_num * 4)],
        ]
    elif art == 'V6':
        initial_stride = 1
        initial_filter_dim = 6
        initial_num = 64
        config = [
            [(1, 6, initial_num)],
            [(1, 6, initial_num)],
            [(1, 6, initial_num)],
            [(2, 6, initial_num * 2)],
            [(1, 6, initial_num * 2)],
            [(1, 6, initial_num * 2)],
            [(2, 6, initial_num * 4)],
            [(1, 6, initial_num * 4)],
            [(1, 6, initial_num * 4)],
        ]
    elif art == 'V7':
        initial_stride = 1
        initial_filter_dim = 3
        initial_num = 64
        config = [
            [(1, 3, initial_num)],
            [(1, 3, initial_num)],
            [(1, 3, initial_num)],
            [(2, 3, initial_num * 2)],
            [(1, 3, initial_num * 2)],
            [(1, 3, initial_num * 2)],
            [(2, 3, initial_num * 4)],
            [(1, 3, initial_num * 4)],
            [(1, 3, initial_num * 4)],
        ]

    def slice(x, index):
        return x[:, index, :, :, :]

    input = Input(shape=(max_len, img_size, img_size, 3))
    video = input
    # feature = K.placeholder((None,1,487))
    feature = []

    # video_batch = K.permute_dimensions(video, (1,0,2,3,4))
    # video_batch = Permute()
    # video_batch = Reshape((max_len,img_size,img_size,3))(video)
    # print 'video_batch', video_batch.shape
    # frame = video[0]

    print 'video shape', video.shape
    for i in range(max_len):
        frame = Lambda(slice,
                       output_shape=(112, 112, 3),
                       arguments={'index': i})(video)
        print 'frame.shape: ', frame.shape
        frame = Convolution2D(64,
                              3,
                              activation='relu',
                              border_mode='same',
                              name='conv1' + str(i),
                              input_shape=(img_size, img_size, 3))(frame)
        frame = MaxPooling2D(pool_size=(2, 2),
                             strides=(2, 2),
                             border_mode='valid',
                             name='pool1' + str(i))(frame)
        # 2nd layer group
        frame = Convolution2D(128,
                              3,
                              activation='relu',
                              border_mode='same',
                              name='conv2' + str(i))(frame)
        frame = MaxPooling2D(pool_size=(2, 2),
                             strides=(2, 2),
                             border_mode='valid',
                             name='pool2' + str(i))(frame)
        # 3rd layer group
        frame = Convolution2D(256,
                              3,
                              activation='relu',
                              border_mode='same',
                              name='conv3a' + str(i))(frame)
        frame = Convolution2D(256,
                              3,
                              activation='relu',
                              border_mode='same',
                              name='conv3b' + str(i))(frame)
        frame = MaxPooling2D(pool_size=(2, 2),
                             strides=(2, 2),
                             border_mode='valid',
                             name='pool3' + str(i))(frame)
        # 4th layer group
        frame = Convolution2D(512,
                              3,
                              activation='relu',
                              border_mode='same',
                              name='conv4a' + str(i))(frame)
        frame = Convolution2D(512,
                              3,
                              activation='relu',
                              border_mode='same',
                              name='conv4b' + str(i))(frame)
        frame = MaxPooling2D(pool_size=(2, 2),
                             strides=(2, 2),
                             border_mode='valid',
                             name='pool4' + str(i))(frame)
        # 5th layer group
        frame = Convolution2D(512,
                              3,
                              activation='relu',
                              border_mode='same',
                              name='conv5a' + str(i))(frame)
        frame = Convolution2D(512,
                              3,
                              activation='relu',
                              border_mode='same',
                              name='conv5b' + str(i))(frame)
        frame = ZeroPadding2D(padding=((0, 1), (0, 1)),
                              name='zeropad5' + str(i))(frame)
        frame = MaxPooling2D(pool_size=(2, 2),
                             strides=(2, 2),
                             border_mode='valid',
                             name='pool5' + str(i))(frame)
        frame = Flatten()(frame)
        # FC layers group
        frame = Dense(4096, activation='relu', name='fc6' + str(i))(frame)
        frame = Dropout(.5)(frame)
        frame = Dense(4096, activation='relu', name='fc7' + str(i))(frame)
        frame = Dropout(.5)(frame)
        frame = Dense(487, activation='softmax', name='fc8' + str(i))(frame)
        print 'Dense ', frame.shape
        # frame = K.expand_dims(frame, axis=1)
        frame = Reshape((-1, 487))(frame)
        print 'expand_dims ', frame.shape
        # print 'frame shape after expend dim ', frame.shape
        # feature = K.concatenate(frame, axis=1)
        feature.append(frame)
    # feature = K.concatenate(feature, axis=1)
    # feature = Concatenate(axis=1)(feature)
    feature = concatenate(feature, axis=1)
    # feature = frame
    # print 'feature.shape ', feature.output_shape

    model = feature
    # model = K.expand_dims(feature, axis=0)
    print 'model', model.shape
    model = Conv1D(initial_num,
                   initial_filter_dim,
                   strides=initial_stride,
                   padding="same",
                   kernel_initializer="he_normal")(model)

    for depth in range(0, len(config)):
        for stride, filter_dim, num in config[depth]:
            bn = BatchNormalization(axis=CHANNEL_AXIS)(model)
            relu = Activation(activation)(bn)
            dr = Dropout(dropout)(relu)
            res = Conv1D(num,
                         filter_dim,
                         strides=stride,
                         padding="same",
                         kernel_initializer="he_normal")(dr)

            res_shape = K.int_shape(res)
            model_shape = K.int_shape(model)
            if res_shape[CHANNEL_AXIS] != model_shape[CHANNEL_AXIS]:
                model = Conv1D(num,
                               1,
                               strides=stride,
                               padding="same",
                               kernel_initializer="he_normal")(model)

            model = add([model, res])

    bn = BatchNormalization(axis=CHANNEL_AXIS)(model)
    model = Activation(activation)(bn)

    if gap:
        pool_window_shape = K.int_shape(model)
        gap = AveragePooling1D(pool_window_shape[ROW_AXIS], strides=1)(model)
        flatten = Flatten()(gap)
    else:
        flatten = Flatten()(model)
    dense = Dense(units=n_classes,
                  activation="softmax",
                  kernel_initializer="he_normal")(flatten)
    print 'dense', dense.shape

    model = Model(inputs=video, outputs=dense)
    return model
Ejemplo n.º 45
0
    def __init__(self, img_w=512, labeltype_hinting=True, verbose=1):
        # Input Parameters
        self.img_h = 64
        self.words_per_epoch = 10
        self.val_split = 0.2
        self.val_words = int(self.words_per_epoch * (self.val_split))

        # Network parameters
        self.conv_filters = 16
        self.kernel_size = (3, 3)
        self.pool_size = 2
        self.time_dense_size = 32
        self.rnn_size = 512

        self.minibatch_size = 32

        if K.image_data_format() == 'channels_first':
            input_shape = (1, img_w, self.img_h)
        else:
            input_shape = (img_w, self.img_h, 1)

        self.img_gen = TextImageGenerator(
            monogram_file=os.path.join(os.getcwd(), 'wordlist.txt'),
            bigram_file=os.path.join(os.getcwd(), 'bigram_wordlist.txt'),
            minibatch_size=32,
            img_w=img_w,
            img_h=self.img_h,
            downsample_factor=(self.pool_size**2),
            val_split=self.words_per_epoch - self.val_words)
        act = 'relu'
        self.input_data = Input(name='the_input',
                                shape=input_shape,
                                dtype='float32')
        inner = Conv2D(self.conv_filters,
                       self.kernel_size,
                       padding='same',
                       activation=act,
                       kernel_initializer='he_normal',
                       name='conv1')(self.input_data)
        inner = MaxPooling2D(pool_size=(self.pool_size, self.pool_size),
                             name='max1')(inner)
        inner = Conv2D(self.conv_filters,
                       self.kernel_size,
                       padding='same',
                       activation=act,
                       kernel_initializer='he_normal',
                       name='conv2')(inner)
        inner = MaxPooling2D(pool_size=(self.pool_size, self.pool_size),
                             name='max2')(inner)

        conv_to_rnn_dims = (img_w // (self.pool_size**2),
                            (self.img_h //
                             (self.pool_size**2)) * self.conv_filters)
        inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

        # cuts down input size going into RNN:
        inner = Dense(self.time_dense_size, activation=act,
                      name='dense1')(inner)

        # Two layers of bidirecitonal GRUs
        # GRU seems to work as well, if not better than LSTM:
        gru_1 = GRU(self.rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru1')(inner)
        gru_1b = GRU(self.rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru1_b')(inner)
        gru1_merged = add([gru_1, gru_1b])
        gru_2 = GRU(self.rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru2')(gru1_merged)
        gru_2b = GRU(self.rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru2_b')(gru1_merged)

        # transforms RNN output to character activations:
        self.inner = Dense(self.img_gen.get_output_size(),
                           kernel_initializer='he_normal',
                           name='dense2')(concatenate([gru_2, gru_2b]))
        y_pred = Activation('softmax', name='softmax')(self.inner)

        Model(inputs=self.input_data, outputs=y_pred).summary()

        labels = Input(name='the_labels',
                       shape=[self.img_gen.absolute_max_string_len],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer
        loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')(
            [y_pred, labels, input_length, label_length])

        # clipnorm seems to speeds up convergence
        #use one of these two
        sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
        #adam= Adam(lr=0.02, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6, clipnorm=5)

        self.model = Model(
            inputs=[self.input_data, labels, input_length, label_length],
            outputs=loss_out)

        self.test_func = K.function([self.input_data], [y_pred])
Ejemplo n.º 46
0
def build_model( baseline_cnn = False ):
    #Based on kernel https://www.kaggle.com/devm2024/keras-model-for-beginners-0-210-on-lb-eda-r-d
    image_input = Input( shape = (75, 75, 3), name = 'images' )
    angle_input = Input( shape = [1], name = 'angle' )
    activation = 'elu'
    bn_momentum = 0.99
    
    # Simple CNN as baseline model
    if baseline_cnn:
        model = Sequential()

        model.add( Conv2D(16, kernel_size = (3, 3), activation = 'relu', input_shape = (75, 75, 3)) )
        model.add( BatchNormalization(momentum = bn_momentum) )
        model.add( MaxPooling2D(pool_size = (3, 3), strides = (2, 2)) )
        model.add( Dropout(0.2) )

        model.add( Conv2D(32, kernel_size = (3, 3), activation = 'relu') )
        model.add( BatchNormalization(momentum = bn_momentum) )
        model.add( MaxPooling2D(pool_size = (2, 2), strides = (2, 2)) )
        model.add( Dropout(0.2) )

        model.add( Conv2D(64, kernel_size = (3, 3), activation = 'relu') )
        model.add( BatchNormalization(momentum = bn_momentum) )
        model.add( MaxPooling2D(pool_size = (2, 2), strides = (2, 2)) )
        model.add( Dropout(0.2) )

        model.add( Conv2D(128, kernel_size = (3, 3), activation = 'relu') )
        model.add( BatchNormalization(momentum = bn_momentum) )
        model.add( MaxPooling2D(pool_size = (2, 2), strides = (2, 2)) )
        model.add( Dropout(0.2) )

        model.add( Flatten() )

        model.add( Dense(256, activation = 'relu') )
        model.add( BatchNormalization(momentum = bn_momentum) )
        model.add( Dropout(0.3) )

        model.add( Dense(128, activation = 'relu') )
        model.add( BatchNormalization(momentum = bn_momentum) )
        model.add( Dropout(0.3) )

        model.add( Dense(1, activation = 'sigmoid') )

        opt = Adam( lr = 1e-3, beta_1 = .9, beta_2 = .999, decay = 1e-3 )

        model.compile( loss = 'binary_crossentropy', optimizer = opt, metrics = ['accuracy'] )

        model.summary()

    else:
        img_1 = Conv2D( 32, kernel_size = (3, 3), activation = activation, padding = 'same' ) ((BatchNormalization(momentum=bn_momentum) ) ( image_input) )
        img_1 = MaxPooling2D( (2,2)) (img_1 )
        img_1 = Dropout( 0.2 )( img_1 )

        img_1 = Conv2D( 64, kernel_size = (3, 3), activation = activation, padding = 'same' ) ( (BatchNormalization(momentum=bn_momentum)) (img_1) )
        img_1 = MaxPooling2D( (2,2) ) ( img_1 )
        img_1 = Dropout( 0.2 )( img_1 )
  
         # Residual block
        img_2 = Conv2D( 128, kernel_size = (3, 3), activation = activation, padding = 'same' ) ( (BatchNormalization(momentum=bn_momentum)) (img_1) )
        img_2 = Dropout(0.2) ( img_2 )
        img_2 = Conv2D( 64, kernel_size = (3, 3), activation = activation, padding = 'same' ) ( (BatchNormalization(momentum=bn_momentum)) (img_2) )
        img_2 = Dropout(0.2) ( img_2 )
        
        img_res = add( [img_1, img_2] )

        # Filter resudial output
        img_res = Conv2D( 128, kernel_size = (3, 3), activation = activation ) ( (BatchNormalization(momentum=bn_momentum)) (img_res) )
        img_res = MaxPooling2D( (2,2) ) ( img_res )
        img_res = Dropout( 0.2 )( img_res )
        img_res = GlobalMaxPooling2D() ( img_res )
        
        cnn_out = ( Concatenate()( [img_res, BatchNormalization(momentum=bn_momentum)(angle_input)]) )

        dense_layer = Dropout( 0.5 ) ( BatchNormalization(momentum=bn_momentum) (Dense(256, activation = activation) (cnn_out)) )
        dense_layer = Dropout( 0.5 ) ( BatchNormalization(momentum=bn_momentum) (Dense(64, activation = activation) (dense_layer)) )
        output = Dense( 1, activation = 'sigmoid' ) ( dense_layer )
        
        model = Model( [image_input, angle_input], output )

        opt = Adam( lr = 1e-3, beta_1 = .9, beta_2 = .999, decay = 1e-3 )

        model.compile( loss = 'binary_crossentropy', optimizer = opt, metrics = ['accuracy'] )

        model.summary()

    return model
Ejemplo n.º 47
0
    def RCL_block(l_settings,
                  l,
                  pool=True,
                  increase_dim=False,
                  layer_num=None):

        ## if layer_num==1:
        ## print "\nCreating Recurrent blocks ...",

        input_num_filters = l_settings.output_shape[1]

        if increase_dim:
            out_num_filters = input_num_filters * 2

        else:
            out_num_filters = input_num_filters

        conv1 = Conv2D(out_num_filters,
                       3,
                       strides=3,
                       padding='same',
                       data_format='channels_last')
        stack1 = conv1(l)
        stack2 = BatchNormalization()(stack1)
        stack3 = PReLU()(stack2)

        conv2 = Conv2D(out_num_filters,
                       filtersize,
                       strides=1,
                       padding='same',
                       kernel_initializer='he_normal',
                       data_format='channels_last')
        stack4 = conv2(stack3)
        stack5 = add([stack1, stack4])
        stack6 = BatchNormalization()(stack5)
        stack7 = PReLU()(stack6)

        conv3 = Conv2D(out_num_filters,
                       filtersize,
                       strides=1,
                       padding='same',
                       weights=conv2.get_weights(),
                       data_format='channels_last')
        stack8 = conv3(stack7)
        stack9 = add([stack1, stack8])
        stack10 = BatchNormalization()(stack9)
        stack11 = PReLU()(stack10)

        conv4 = Conv2D(out_num_filters,
                       filtersize,
                       strides=1,
                       padding='same',
                       weights=conv2.get_weights(),
                       data_format='channels_last')
        stack12 = conv4(stack11)
        stack13 = add([stack1, stack12])
        stack14 = BatchNormalization()(stack13)
        stack15 = PReLU()(stack14)

        # will pool layers if recurrent layer number multiple of 2
        if pool:
            stack16 = MaxPooling2D((2, 2), padding='same')(stack15)
            stack17 = Dropout(0.1)(stack16)
        else:
            stack17 = Dropout(0.1)(stack15)

        return stack17
Ejemplo n.º 48
0
                            "sequence_len": tf.constant(batch_size*[max_len])
                      },
                      signature="tokens",
                      as_dict=True)["elmo"]

from keras.models import Model, Input
from keras.layers.merge import add
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional, Lambda

input_text = Input(shape=(max_len,), dtype=tf.string)
embedding = Lambda(ElmoEmbedding, output_shape=(max_len, 1024))(input_text)
x = Bidirectional(LSTM(units=512, return_sequences=True,
                       recurrent_dropout=0.2, dropout=0.2))(embedding)
x_rnn = Bidirectional(LSTM(units=512, return_sequences=True,
                           recurrent_dropout=0.2, dropout=0.2))(x)
x = add([x, x_rnn])  # residual connection to the first biLSTM
out = TimeDistributed(Dense(n_tags, activation="softmax"))(x)

model = Model(input_text, out)
model.load_weights('my_model_weights.h5', by_name=True)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

X_tr, X_val = X_tr[:1213*batch_size], X_tr[-135*batch_size:]
y_tr, y_val = y_tr[:1213*batch_size], y_tr[-135*batch_size:]
y_tr = y_tr.reshape(y_tr.shape[0], y_tr.shape[1], 1)
y_val = y_val.reshape(y_val.shape[0], y_val.shape[1], 1)

history = model.fit(np.array(X_tr), y_tr, validation_data=(np.array(X_val), y_val),
                    batch_size=batch_size, epochs=3, verbose=1)

from seqeval.metrics import precision_score, recall_score, f1_score, classification_report
Ejemplo n.º 49
0
def train(x_train, y_train, x_val, y_val, img_w, epochs=10, batch_size=64):
    # Input Parameters
    img_h = 64
    words_per_epoch = 50000
    val_split = 0.2
    val_words = int(words_per_epoch * (val_split))

    # Network Parameters
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512
    minibatch_size = 200

    # x_train = x
    # y_train = y
    # x_train, y_train, x_val, y_val = get_train_test_data(X, Y, .25)
    if K.image_data_format() == 'channels_first':
        input_shape = (1, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 1)

        # fdir = oc.path.dirname(get_file('wordlists.tgz',
        #                                 origin='wordlists.tgz', untar=True))
    fdir = 'wordlists'

    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (img_w // (pool_size ** 2),
                        (img_h // (pool_size ** 2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirectional GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size, return_sequences=True,
                kernel_initializer='he_normal', name='gru1')(inner)
    gru_1b = GRU(rnn_size, return_sequences=True,
                 go_backwards=True, kernel_initializer='he_normal',
                 name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size, return_sequences=True,
                kernel_initializer='he_normal', name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True,
                 kernel_initializer='he_normal', name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(get_output_size(), kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels',
                   shape=[absolute_max_string_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(
        ctc_lambda_func, output_shape=(1,),
        name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    model = Model(inputs=[input_data, labels, input_length, label_length],
                  outputs=loss_out)

    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)

    test_func = K.function([input_data], [y_pred])

    filepath = "weight.best.hdf5"

    checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
    csv_logger = CSVLogger('training.csv')
    callbacl_list = [checkpoint, csv_logger]

    history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_val, y_val),
                        callbacks=callbacl_list)

    return model, test_func
def get_Model(training):
    input_shape = (img_w, img_h, 1)     # (128, 64, 1)

    # Make Networkw
    inputs = Input(name='the_input', shape=input_shape, dtype='float32')  # (None, 128, 64, 1)

    # Convolution layer (VGG)
    inner = Conv2D(64, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')(inputs)  # (None, 128, 64, 64)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner)  # (None,64, 32, 64)

    inner = Conv2D(128, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')(inner)  # (None, 64, 32, 128)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner)  # (None, 32, 16, 128)

    inner = Conv2D(256, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')(inner)  # (None, 32, 16, 256)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = Conv2D(256, (3, 3), padding='same', name='conv4', kernel_initializer='he_normal')(inner)  # (None, 32, 16, 256)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner)  # (None, 32, 8, 256)

    inner = Conv2D(512, (3, 3), padding='same', name='conv5', kernel_initializer='he_normal')(inner)  # (None, 32, 8, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = Conv2D(512, (3, 3), padding='same', name='conv6')(inner)  # (None, 32, 8, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2), name='max4')(inner)  # (None, 32, 4, 512)

    inner = Conv2D(512, (2, 2), padding='same', kernel_initializer='he_normal', name='con7')(inner)  # (None, 32, 4, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)

    # CNN to RNN
    inner = Reshape(target_shape=((32, 2048)), name='reshape')(inner)  # (None, 32, 2048)
    inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)  # (None, 32, 64)

    # RNN layer
    lstm_1 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner)  # (None, 32, 512)
    lstm_1b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner)
    lstm1_merged = add([lstm_1, lstm_1b])  # (None, 32, 512)
    lstm1_merged = BatchNormalization()(lstm1_merged)
    lstm_2 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged)
    lstm_2b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged)
    lstm2_merged = concatenate([lstm_2, lstm_2b])  # (None, 32, 1024)
    lstm_merged = BatchNormalization()(lstm2_merged)

    # transforms RNN output to character activations:
    inner = Dense(num_classes, kernel_initializer='he_normal',name='dense2')(lstm2_merged) #(None, 32, 63)
    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels', shape=[max_text_len], dtype='float32') # (None ,8)
    input_length = Input(name='input_length', shape=[1], dtype='int64')     # (None, 1)
    label_length = Input(name='label_length', shape=[1], dtype='int64')     # (None, 1)

    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1)

    if training:
        return Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)
    else:
        return Model(inputs=[inputs], outputs=y_pred)
Ejemplo n.º 51
0
def decoder_model():
    inputs = Input(shape=(int(VIDEO_LENGTH / 2), 16, 26, 64))

    # 10x16x16
    convlstm_1 = ConvLSTM2D(filters=128,
                            kernel_size=(3, 3),
                            strides=(1, 1),
                            padding='same',
                            return_sequences=True,
                            recurrent_dropout=0.2)(inputs)
    x = TimeDistributed(BatchNormalization())(convlstm_1)
    out_1 = TimeDistributed(Activation('tanh'))(x)
    # x = TimeDistributed(LeakyReLU(alpha=0.2))(x)

    convlstm_2 = ConvLSTM2D(filters=128,
                            kernel_size=(3, 3),
                            strides=(1, 1),
                            padding='same',
                            return_sequences=True,
                            recurrent_dropout=0.2)(out_1)
    x = TimeDistributed(BatchNormalization())(convlstm_2)
    out_2 = TimeDistributed(Activation('tanh'))(x)
    # h_2 = TimeDistributed(LeakyReLU(alpha=0.2))(x)
    # out_2 = UpSampling3D(size=(1, 2, 2))(h_2)

    res_1 = add([out_1, out_2])
    # res_1 = LeakyReLU(alpha=0.2)(res_1)
    res_1 = UpSampling3D(size=(1, 2, 2))(res_1)

    # 10x32x32
    convlstm_3a = ConvLSTM2D(filters=64,
                             kernel_size=(3, 3),
                             strides=(1, 1),
                             padding='same',
                             return_sequences=True,
                             recurrent_dropout=0.2)(res_1)
    x = TimeDistributed(BatchNormalization())(convlstm_3a)
    out_3a = TimeDistributed(Activation('tanh'))(x)
    # h_3 = TimeDistributed(LeakyReLU(alpha=0.2))(x)
    # out_3a = UpSampling3D(size=(1, 2, 2))(h_3)

    convlstm_3b = ConvLSTM2D(filters=64,
                             kernel_size=(3, 3),
                             strides=(1, 1),
                             padding='same',
                             return_sequences=True,
                             recurrent_dropout=0.2)(out_3a)
    x = TimeDistributed(BatchNormalization())(convlstm_3b)
    out_3b = TimeDistributed(Activation('tanh'))(x)
    # h_3 = TimeDistributed(LeakyReLU(alpha=0.2))(x)
    # out_3 = UpSampling3D(size=(1, 2, 2))(h_3)

    res_2 = add([out_3a, out_3b])
    # res_2 = LeakyReLU(alpha=0.2)(res_2)
    res_2 = UpSampling3D(size=(1, 2, 2))(res_2)

    # 10x64x64
    convlstm_4a = ConvLSTM2D(filters=16,
                             kernel_size=(3, 3),
                             strides=(1, 1),
                             padding='same',
                             return_sequences=True,
                             recurrent_dropout=0.2)(res_2)
    x = TimeDistributed(BatchNormalization())(convlstm_4a)
    out_4a = TimeDistributed(Activation('tanh'))(x)
    # h_4 = TimeDistributed(LeakyReLU(alpha=0.2))(x)

    convlstm_4b = ConvLSTM2D(filters=16,
                             kernel_size=(3, 3),
                             strides=(1, 1),
                             padding='same',
                             return_sequences=True,
                             recurrent_dropout=0.2)(out_4a)
    x = TimeDistributed(BatchNormalization())(convlstm_4b)
    out_4b = TimeDistributed(Activation('tanh'))(x)
    # h_4 = TimeDistributed(LeakyReLU(alpha=0.2))(x)

    res_3 = add([out_4a, out_4b])
    # res_3 = LeakyReLU(alpha=0.2)(res_3)
    res_3 = UpSampling3D(size=(1, 2, 2))(res_3)

    # 10x128x128
    convlstm_5 = ConvLSTM2D(filters=3,
                            kernel_size=(3, 3),
                            strides=(1, 1),
                            padding='same',
                            return_sequences=True,
                            recurrent_dropout=0.2)(res_3)
    predictions = TimeDistributed(Activation('tanh'))(convlstm_5)

    model = Model(inputs=inputs, outputs=predictions)

    return model
Ejemplo n.º 52
0
def get_model(img_w,img_h,minibatch_size,pool_size):
    conv_filters = 20
    kernel_size = (2, 2)
    time_dense_size = 32
    rnn_size = 256

    input_shape = (img_w, img_h, 1)
    
    img_gen = util.TextImageGenerator(minibatch_size=minibatch_size,
                                      img_w=img_w,
                                      img_h=img_h,
                                      downsample_factor=(pool_size ** 2),
                                      absolute_max_string_len=12
    )
        
    act = 'relu'
    
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv1')(input_data)
    
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv2')(inner)
    
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
    
    conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2))
                        * conv_filters)

    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)
    
    gru_1 = GRU(rnn_size, return_sequences=True,
                kernel_initializer='he_normal', name='gru1')(inner)
    
    gru_1b = GRU(rnn_size, return_sequences=True,
                 go_backwards=True, kernel_initializer='he_normal',
                 name='gru1_b')(inner)

    gru1_merged = add([gru_1, gru_1b])

    gru_2 = GRU(rnn_size, return_sequences=True,
                kernel_initializer='he_normal', name='gru2')(gru1_merged)
    
    gru_2b = GRU(rnn_size, return_sequences=True,
                 go_backwards=True, kernel_initializer='he_normal',
                 name='gru2_b')(gru1_merged)

    inner = Dense(img_gen.get_output_size(),
                  kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    
    y_pred = Activation('softmax', name='softmax')(inner)
    
    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels',
                   shape=[img_gen.absolute_max_string_len],
                   dtype='float32')

    input_length = Input(name='input_length', shape=[1], dtype='int64')

    label_length = Input(name='label_length', shape=[1], dtype='int64')

    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred,
                                                                       labels,
                                                                       input_length,
                                                                       label_length])

    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    model = Model(inputs=[input_data,
                          labels,
                          input_length,
                          label_length], outputs=loss_out)

    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)

    test_func = K.function([input_data], [y_pred])

    return model, test_func