def f(x): main = convolutional_block(filter_nr, kernel_size, use_batch_norm, use_prelu, dropout, dropout_mode, kernel_reg_l2, bias_reg_l2, batch_norm_first)(x) x = add([main, x]) main = convolutional_block(filter_nr, kernel_size, use_batch_norm, use_prelu, dropout, dropout_mode, kernel_reg_l2, bias_reg_l2, batch_norm_first)(x) x = add([main, x]) if not last_block: x = MaxPooling1D(pool_size=3, strides=2)(x) return x
def _shortcut(input, residual): """Adds a shortcut between input and residual block and merges them with "sum" """ # Expand channels of shortcut to match residual. # Stride appropriately to match residual (width, height) # Should be int if network architecture is correctly configured. input_shape = K.int_shape(input) residual_shape = K.int_shape(residual) stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS])) stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS])) equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS] shortcut = input # if shape is different. if stride_width > 1 or stride_height > 1 or not equal_channels: if SHORTCUT_OPTION == 'B': # 1x1 convolution to match dimension shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS], kernel_size=(1, 1), strides=(stride_width, stride_height), padding="valid", kernel_initializer="he_normal", kernel_regularizer=l2(0.0001))(input) elif SHORTCUT_OPTION == 'A': # spatial pooling with padded identity mapping x = AveragePooling2D(pool_size=(1, 1), strides=(stride_width, stride_height))(input) # multiply every element of x by 0 to get zero matrix mul_zero = Lambda(lambda val: val * 0.0, output_shape=K.int_shape(x)[1:])(x) shortcut = concatenate([x, mul_zero], axis=CHANNEL_AXIS) return add([shortcut, residual])
def _shortcut(input, residual, weight_decay=.0001, dropout=.0, identity=True, strides=(1, 1), with_bn=False, org=False): # Expand channels of shortcut to match residual. # Stride appropriately to match residual (width, height) # Should be int if network architecture is correctly configured. # !!! The dropout argument is just a place holder. # !!! It shall not be applied to identity mapping. # stride_width = input._keras_shape[ROW_AXIS] // residual._keras_shape[ROW_AXIS] # stride_height = input._keras_shape[COL_AXIS] // residual._keras_shape[COL_AXIS] # equal_channels = residual._keras_shape[CHANNEL_AXIS] == input._keras_shape[CHANNEL_AXIS] shortcut = input # 1 X 1 conv if shape is different. Else identity. # if stride_width > 1 or stride_height > 1 or not equal_channels: if not identity: shortcut = Conv2D(filters=residual._keras_shape[CHANNEL_AXIS], kernel_size=(1, 1), strides=strides, kernel_initializer="he_normal", padding="valid", kernel_regularizer=l2(weight_decay))(input) if with_bn: shortcut = BatchNormalization(axis=CHANNEL_AXIS)(shortcut) addition = add([shortcut, residual]) if not org: return addition else: relu = Activation("relu")(addition) return Dropout(dropout)(relu)
def ___conv4_block(input, k=1, dropout=0.0): init = input channel_axis = 1 if K.image_dim_ordering() == "th" else -1 # Check if input number of filters is same as 64 * k, else create # convolution2d for this input if K.image_dim_ordering() == "th": if init._keras_shape[1] != 64 * k: init = Convolution2D(64 * k, (1, 1), activation='linear', padding='same')(init) else: if init._keras_shape[-1] != 64 * k: init = Convolution2D(64 * k, (1, 1), activation='linear', padding='same')(init) x = Convolution2D(64 * k, (3, 3), padding='same')(input) x = BatchNormalization(axis=channel_axis)(x) x = Activation('relu')(x) if dropout > 0.0: x = Dropout(dropout)(x) x = Convolution2D(64 * k, (3, 3), padding='same')(x) x = BatchNormalization(axis=channel_axis)(x) x = Activation('relu')(x) m = add([init, x]) return m
def identity_block(self, input_tensor, filters, stage, block): '''The identity_block is the block that has no conv layer at shortcut # Arguments input_tensor: input tensor filters: list of integers, the nb_filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names ''' nb_filter1, nb_filter2 = filters bn_axis = 3 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = input_tensor x = Conv2D(nb_filter1, (self.kernel_width, self.kernel_height), padding='same', name=conv_name_base + 'a')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + 'a')(x) x = Activation('relu')(x) x = Conv2D(nb_filter2, (self.kernel_width, self.kernel_height), padding='same', name=conv_name_base + 'b')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + 'b')(x) x = Activation('relu')(x) x = add([x, input_tensor]) x = Activation('relu')(x) return x
def f(x, y): def scaling(xx, ss=1): return xx * ss scaled = Lambda(scaling, arguments={'ss': scale}, name='scale_{}'.format(block_name))(x) score = Conv2D(filters=classes, kernel_size=(1, 1), activation='linear', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), name='score_{}'.format(block_name))(scaled) if y is None: upscore = Conv2DTranspose(filters=classes, kernel_size=kernel_size, strides=strides, padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), use_bias=False, name='upscore_{}'.format(block_name))(score) else: crop = CroppingLike2D(target_shape=K.int_shape(y), offset=crop_offset, name='crop_{}'.format(block_name))(score) merge = add([y, crop]) upscore = Conv2DTranspose(filters=classes, kernel_size=kernel_size, strides=strides, padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), use_bias=False, name='upscore_{}'.format(block_name))(merge) return upscore
def _conv_block(inp, convs, skip=False, train=False): x = inp count = 0 for conv in convs: if count == (len(convs) - 2) and skip: skip_connection = x count += 1 if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top if 'train' in conv: trainflag=conv['train']#update the value for the key else: trainflag=train x = Conv2D(conv['filter'], conv['kernel'], strides=conv['stride'], padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top name='conv2d_' + str(conv['layer_idx']), use_bias=False if conv['bnorm'] else True, trainable=trainflag)(x) #if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='batch_normalization' + str(conv['layer_idx']),trainable=trainflag)(x) if conv['bnorm']: x = BatchNormalization(epsilon=0.001, trainable=trainflag)(x) if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']),trainable=trainflag)(x) return add([skip_connection, x]) if skip else x
def dpcnn(embedding_matrix, embedding_size, trainable_embedding, maxlen, max_features, filter_nr, kernel_size, repeat_block, dense_size, repeat_dense, output_size, output_activation, max_pooling, mean_pooling, weighted_average_attention, concat_mode, dropout_embedding, conv_dropout, dense_dropout, dropout_mode, conv_kernel_reg_l2, conv_bias_reg_l2, dense_kernel_reg_l2, dense_bias_reg_l2, use_prelu, use_batch_norm, batch_norm_first): """ Note: Implementation of http://ai.tencent.com/ailab/media/publications/ACL3-Brady.pdf post activation is used instead of pre-activation, could be worth exploring """ input_text = Input(shape=(maxlen,)) if embedding_matrix is not None: embedding = Embedding(max_features, embedding_size, weights=[embedding_matrix], trainable=trainable_embedding)(input_text) else: embedding = Embedding(max_features, embedding_size)(input_text) embedding = dropout_block(dropout_embedding, dropout_mode)(embedding) x = convolutional_block(filter_nr, kernel_size, use_batch_norm, use_prelu, conv_dropout, dropout_mode, conv_kernel_reg_l2, conv_bias_reg_l2, batch_norm_first)(embedding) x = convolutional_block(filter_nr, kernel_size, conv_bias_reg_l2, use_prelu, conv_dropout, dropout_mode, conv_kernel_reg_l2, conv_bias_reg_l2, batch_norm_first)(x) if embedding_size == filter_nr: x = add([embedding, x]) else: embedding_resized = shape_matching_layer(filter_nr, use_prelu, conv_kernel_reg_l2, conv_bias_reg_l2)(embedding) x = add([embedding_resized, x]) for _ in range(repeat_block): x = dpcnn_block(filter_nr, kernel_size, use_batch_norm, use_prelu, conv_dropout, dropout_mode, conv_kernel_reg_l2, conv_bias_reg_l2, batch_norm_first)(x) predictions = classification_block(dense_size=dense_size, repeat_dense=repeat_dense, output_size=output_size, output_activation=output_activation, max_pooling=max_pooling, mean_pooling=mean_pooling, weighted_average_attention=weighted_average_attention, concat_mode=concat_mode, dropout=dense_dropout, kernel_reg_l2=dense_kernel_reg_l2, bias_reg_l2=dense_bias_reg_l2, use_prelu=use_prelu, use_batch_norm=use_batch_norm, batch_norm_first=batch_norm_first)(x) model = Model(inputs=input_text, outputs=predictions) return model
def residual_block(input_tensor, nb_filters, filter_sz, stage, kernel_initializer='he_uniform', l2reg=0.0, use_shortcuts=True): """Create a ResNet pre-activation bottleneck layer.""" nb_in_filters, nb_bottleneck_filters = nb_filters bn_name = 'bn' + str(stage) conv_name = 'conv' + str(stage) relu_name = 'relu' + str(stage) merge_name = 'add' + str(stage) # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 # conv if stage > 1: # first activation is just after conv1 x = BatchNormalization(axis=1, name=bn_name + 'a')(input_tensor) x = Activation('relu', name=relu_name + 'a')(x) else: x = input_tensor x = Convolution2D(nb_bottleneck_filters, (1, 1), kernel_initializer=kernel_initializer, kernel_regularizer=l2(l2reg), use_bias=False, name=conv_name + 'a')(x) # batchnorm-relu-conv, from nb_bottleneck_filters to nb_bottleneck_filters # via FxF conv x = BatchNormalization(axis=1, name=bn_name + 'b')(x) x = Activation('relu', name=relu_name + 'b')(x) x = Convolution2D(nb_bottleneck_filters, (filter_sz, filter_sz), padding='same', kernel_initializer=kernel_initializer, kernel_regularizer=l2(l2reg), use_bias=False, name=conv_name + 'b')(x) # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 # conv x = BatchNormalization(axis=1, name=bn_name + 'c')(x) x = Activation('relu', name=relu_name + 'c')(x) x = Convolution2D(nb_in_filters, (1, 1), kernel_initializer=kernel_initializer, kernel_regularizer=l2(l2reg), name=conv_name + 'c')(x) # merge if use_shortcuts: x = add([x, input_tensor], name=merge_name) return x
def f(x, y): score = Conv2D(filters=classes, kernel_size=(1, 1), activation='linear', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), name='score_{}'.format(block_name))(x) if y is not None: def scaling(xx, ss=1): return xx * ss scaled = Lambda(scaling, arguments={'ss': scale}, name='scale_{}'.format(block_name))(score) score = add([y, scaled]) upscore = BilinearUpSampling2D( target_shape=target_shape, name='upscore_{}'.format(block_name))(score) return upscore
def _conv_block(inp, convs, do_skip=True): x = inp count = 0 for conv in convs: if count == (len(convs) - 2) and do_skip: skip_connection = x count += 1 if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # unlike tensorflow darknet prefer left and top paddings x = Conv2D(conv['filter'], conv['kernel'], strides=conv['stride'], padding='valid' if conv['stride'] > 1 else 'same', # unlike tensorflow darknet prefer left and top paddings name='conv_' + str(conv['layer_idx']), use_bias=False if conv['bnorm'] else True)(x) if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x) if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x) return add([skip_connection, x]) if do_skip else x
def _shortcut(input, residual): """Adds a shortcut between input and residual block and merges them with "sum" """ input_shape = K.int_shape(input) residual_shape = K.int_shape(residual) stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS])) stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS])) equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS] shortcut = input # 1 X 1 conv if shape is different. Else identity. if stride_width > 1 or stride_height > 1 or not equal_channels: shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS], kernel_size=(1, 1), strides=(stride_width, stride_height), padding="valid", kernel_initializer="he_normal", kernel_regularizer=l2(0.0001))(input) return add([shortcut, residual])
def _conv_block(inp, convs, skip=True): x = inp count = 0 for conv in convs: if count == (len(convs) - 2) and skip: skip_connection = x count += 1 if conv['kernel'] > 1: x = ZeroPadding2D(1)(x) x = Conv2D(conv['filter'], conv['kernel'], strides=conv['stride'], padding='valid', name='conv_' + str(conv['layer_idx']), use_bias=False if conv['bnorm'] else True)(x) if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x) if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x) return add([skip_connection, x]) if skip else x
def _shortcut(input, residual): """Adds a shortcut between input and residual block and merges them with "sum" """ # Expand channels of shortcut to match residual. # Stride appropriately to match residual (width, height) # Should be int if network architecture is correctly configured. input_shape = K.int_shape(input) residual_shape = K.int_shape(residual) stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS])) stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS])) equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS] shortcut = input # 1 X 1 conv if shape is different. Else identity. if stride_width > 1 or stride_height > 1 or not equal_channels: shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS], kernel_size=(1, 1), strides=(stride_width, stride_height), padding="valid", kernel_initializer="he_normal", kernel_regularizer=l2(0.0001))(input) return add([shortcut, residual])
def word_model(): img_w = word_cfg['img_w'] img_h = word_cfg['img_h'] max_text_len = word_cfg['max_text_len'] if K.image_data_format() == 'channels_first': input_shape = (1, img_w, img_h) else: input_shape = (img_w, img_h, 1) # Make Networkw input_data = Input(name='the_input', shape=input_shape, dtype='float32') # (None, 128, 64, 1) # Convolution layer (VGG) inner = Conv2D(64, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')( input_data) # (None, 128, 64, 64) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner) # (None,64, 32, 64) inner = Conv2D(128, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')( inner) # (None, 64, 32, 128) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner) # (None, 32, 16, 128) inner = Conv2D(256, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')( inner) # (None, 32, 16, 256) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = Conv2D(256, (3, 3), padding='same', name='conv4', kernel_initializer='he_normal')( inner) # (None, 32, 16, 256) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner) # (None, 32, 8, 256) inner = Conv2D(512, (3, 3), padding='same', name='conv5', kernel_initializer='he_normal')(inner) # (None, 32, 8, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = Conv2D(512, (3, 3), padding='same', name='conv6')(inner) # (None, 32, 8, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(1, 2), name='max4')(inner) # (None, 32, 4, 512) inner = Conv2D(512, (2, 2), padding='same', kernel_initializer='he_normal', name='con7')(inner) # (None, 32, 4, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) # CNN to RNN inner = Reshape(target_shape=((32, 2048)), name='reshape')(inner) # (None, 32, 2048) inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner) # (None, 32, 64) # RNN layer gru_1 = GRU(256, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) # (None, 32, 512) gru_1b = GRU(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) reversed_gru_1b = Lambda( lambda inputTensor: K.reverse(inputTensor, axes=1))(gru_1b) gru1_merged = add([gru_1, reversed_gru_1b]) # (None, 32, 512) gru1_merged = BatchNormalization()(gru1_merged) gru_2 = GRU(256, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) reversed_gru_2b = Lambda( lambda inputTensor: K.reverse(inputTensor, axes=1))(gru_2b) gru2_merged = concatenate([gru_2, reversed_gru_2b]) # (None, 32, 1024) gru2_merged = BatchNormalization()(gru2_merged) # transforms RNN output to character activations: inner = Dense(num_classes, kernel_initializer='he_normal', name='dense2')(gru2_merged) #(None, 32, 80) y_pred = Activation('softmax', name='softmax')(inner) labels = Input(name='the_labels', shape=[max_text_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # loss function loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model_predict = Model(inputs=input_data, outputs=y_pred) model_predict.summary() return model, model_predict
def RCL_block(self, l, activation_function=LeakyReLU(), features=32, kernel_size=3, name="RCL"): """Build recurrent ConvLayer. See https://doi.org/10.1109/CVPR.2015.7298958 (i.e. Figure 3) Parameters ---------- l: Keras Layer (Tensor?) Previous layer of the neural network. activation_function: Keras Activation Function Activation function (standard: LeakyReLU()). features: int Number of extracted features. kernel_size: int Size of Convolution Kernel. name: string Name of the recurrent ConvLayer (standard: 'RCL'). :param l: Keras Layer (Tensor?) Previous layer of the neural network. :param activation_function: Keras Activation Function Activation function (standard: LeakyReLU()). :param features: int Number of extracted features. :param kernel_size: int Size of Convolution Kernel. :param name: string Name of the recurrent ConvLayer (standard: 'RCL'). Returns ------- stack15: keras layer stack Recurrent ConvLayer as Keras Layer Stack :return: stack15: keras layer stack Recurrent ConvLayer as Keras Layer Stack """ conv1 = Conv1D(features, kernel_size, border_mode='same', name=name) stack1 = conv1(l) stack2 = activation_function(stack1) stack3 = BatchNormalization()(stack2) # UNROLLED RECURRENT BLOCK(s) conv2 = Conv1D(features, kernel_size, border_mode='same', init='he_normal') stack4 = conv2(stack3) stack5 = add([stack1, stack4]) stack6 = activation_function(stack5) stack7 = BatchNormalization()(stack6) conv3 = Convolution1D_tied(features, kernel_size, border_mode='same', tied_to=conv2) stack8 = conv3(stack7) stack9 = add([stack1, stack8]) stack10 = activation_function(stack9) stack11 = BatchNormalization()(stack10) conv4 = Convolution1D_tied(features, kernel_size, border_mode='same', tied_to=conv2) stack12 = conv4(stack11) stack13 = add([stack1, stack12]) stack14 = activation_function(stack13) stack15 = BatchNormalization()(stack14) return stack15
def __bottleneck_block(input, filters=64, cardinality=8, strides=1, weight_decay=5e-4): ''' Adds a bottleneck block Args: input: input tensor filters: number of output filters cardinality: cardinality factor described number of grouped convolutions strides: performs strided convolution for downsampling if > 1 weight_decay: weight decay factor Returns: a keras tensor ''' init = input grouped_channels = int(filters / cardinality) channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 # Check if input number of filters is same as 16 * k, else create convolution2d for this input if K.image_data_format() == 'channels_first': if init._keras_shape[1] != 2 * filters: init = Conv1D(filters * 2, 1, padding='same', strides=strides, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(init) init = BatchNormalization(axis=channel_axis)(init) else: if init._keras_shape[-1] != 2 * filters: init = Conv1D(filters * 2, 1, padding='same', strides=strides, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(init) init = BatchNormalization(axis=channel_axis)(init) # x = Conv2D(filters, (1, 1), padding='same', use_bias=False, # kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(input) #INDENT x = Conv1D(filters, 1, padding='same', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(input) x = BatchNormalization(axis=channel_axis)(x) x = LeakyReLU()(x) x = __grouped_convolution_block(x, grouped_channels, cardinality, strides, weight_decay) x = Conv1D(filters * 2, 1, padding='same', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(axis=channel_axis)(x) x = add([init, x]) x = LeakyReLU()(x) return x
def resnet8(img_width, img_height, img_channels, output_dim): """ Define model architecture. # Arguments img_width: Target image widht. img_height: Target image height. img_channels: Target image channels. output_dim: Dimension of model output. # Returns model: A Model instance. """ # Input img_input = Input(shape=(img_height, img_width, img_channels)) x1 = Conv2D(32, (5, 5), strides=[2, 2], padding='same')(img_input) x1 = MaxPooling2D(pool_size=(3, 3), strides=[2, 2])(x1) # First residual block x2 = keras.layers.normalization.BatchNormalization()(x1) x2 = Activation('relu')(x2) x2 = Conv2D(32, (3, 3), strides=[2, 2], padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x2) x2 = keras.layers.normalization.BatchNormalization()(x2) x2 = Activation('relu')(x2) x2 = Conv2D(32, (3, 3), padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x2) x1 = Conv2D(32, (1, 1), strides=[2, 2], padding='same')(x1) x3 = add([x1, x2]) # Second residual block x4 = keras.layers.normalization.BatchNormalization()(x3) x4 = Activation('relu')(x4) x4 = Conv2D(64, (3, 3), strides=[2, 2], padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x4) x4 = keras.layers.normalization.BatchNormalization()(x4) x4 = Activation('relu')(x4) x4 = Conv2D(64, (3, 3), padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x4) x3 = Conv2D(64, (1, 1), strides=[2, 2], padding='same')(x3) x5 = add([x3, x4]) # Third residual block x6 = keras.layers.normalization.BatchNormalization()(x5) x6 = Activation('relu')(x6) x6 = Conv2D(128, (3, 3), strides=[2, 2], padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x6) x6 = keras.layers.normalization.BatchNormalization()(x6) x6 = Activation('relu')(x6) x6 = Conv2D(128, (3, 3), padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x6) x5 = Conv2D(128, (1, 1), strides=[2, 2], padding='same')(x5) x7 = add([x5, x6]) x = Flatten()(x7) x = Activation('relu')(x) x = Dropout(0.5)(x) # Steering channel steer = Dense(output_dim)(x) # Collision channel coll = Dense(output_dim)(x) coll = Activation('sigmoid')(coll) # Define steering-collision model model = Model(inputs=[img_input], outputs=[steer, coll]) print(model.summary()) return model
kernel_initializer='he_normal', activation='sigmoid')(Conv4) ########### Layer 6 ############ Conv6 = Conv2D(filters=32, kernel_size=[3, 3], strides=[1, 1], padding='same', kernel_initializer='he_normal', activation='sigmoid')(Conv5) ########### Layer 7 ############ Conv7 = add([ Conv2DTranspose(filters=16, kernel_size=[2, 2], strides=[2, 2], padding='same', kernel_initializer='he_normal', activation='sigmoid')(Conv6), Conv3 ]) ########### Layer 8 ############ Conv8 = Conv2D(filters=8, kernel_size=[3, 3], strides=[1, 1], padding='same', kernel_initializer='he_normal', activation='sigmoid')(Conv7) ########### Layer 9 ############ Conv9 = add([ Conv2DTranspose(filters=8,
return_sequences=True, name='text_BiLSTM')) # Text inputs text_input = Input(shape=(None, max_char_seq), dtype='float32', name='all_tokens') # [n_samp, n_word_seq, n_char_seq] encoded_words = TimeDistributed(word_encoder, name='encoded_joint_text')(text_input) # [n_samp, n_word_seq, n_hidden] # Field indicator embedding field_input = Input(shape=(None, len(text_fields)), dtype='float32', name='field_indicators') encoded_fields = TimeDistributed(Dense(encoded_words.shape[-1].value), name='field_indicator_embedding')(field_input) encoded_word_fields = add([encoded_words, encoded_fields]) # mask the blank inputs to the LSTM mask = mask_from_embedded_seq(text_input) masked_combined_words = masked_seq(encoded_word_fields, mask) field_embedding = text_encoder(masked_combined_words) # [n_samp, n_seq, n_hidden] # mask the LSTM outputs corresponding to the blank inputs masked_field_embedding = masked_seq(field_embedding, mask) # NAICS input naics_input = Input(shape=(60,), dtype='float32', name='naics') naics_embedding = naics_res_encoder(input_layer=naics_input, n_layers=3) # Job category job_category_input = Input(shape=(12,), dtype='float32', name='job_category')
def regression_net(input_tensor=None, trainable=False): img_input = input_tensor #?? #conv_1 conv1_1 = Convolution2D(32, (5, 5), strides=(1, 1), padding='same', activation='relu', name='conv1_1')(img_input) pool1 = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(conv1_1) #conv_2 conv2_1 = Convolution2D(64, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv2_1')(pool1) conv2_2 = Convolution2D(64, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv2_2')(conv2_1) pool2 = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(conv2_2) #conv_3 conv3_1 = Convolution2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv3_1')(pool2) conv3_2 = Convolution2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv3_2')(conv3_1) pool3 = MaxPooling2D((2, 2), strides=(2, 2), name='pool3')(conv3_2) pool3_for_fuse = Convolution2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu', name='pool3_for_fuse')(pool3) #conv_4 conv4_1 = Convolution2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv4_1')(pool3) conv4_2 = Convolution2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv4_2')(conv4_1) pool4 = MaxPooling2D((2, 2), strides=(2, 2), name='pool4')(conv4_2) pool4_for_fuse = Convolution2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu', name='pool4_for_fuse')(pool4) #conv_5 conv5_1 = Convolution2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv5_1')(pool4) conv5_2 = Convolution2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv5_2')(conv5_1) pool5 = MaxPooling2D((2, 2), strides=(2, 2), name='pool5')(conv5_2) pool5_for_fuse = Convolution2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu', name='pool5_for_fuse')(pool5) #conv_6 conv6_1 = Convolution2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv6_1')(pool5) conv6_2 = Convolution2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv6_2')(conv6_1) pool6 = MaxPooling2D((2, 2), strides=(2, 2), name='pool6')(conv6_2) # conv7_1 = Convolution2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu', name='conv7_1')(pool6) upscore2 = Conv2DTranspose(filters=128, kernel_size=(2, 2), strides=(2, 2), padding='valid', use_bias=False, name='upscore2')(conv7_1) fuse_pool5 = add([upscore2, pool5_for_fuse]) upscore4 = Conv2DTranspose(filters=128, kernel_size=(2, 2), strides=(2, 2), padding='valid', use_bias=False, name='upscore4')(fuse_pool5) fuse_pool4 = add([upscore4, pool4_for_fuse]) upscore8 = Conv2DTranspose(filters=128, kernel_size=(2, 2), strides=(2, 2), padding='valid', use_bias=False, name='upscore8')(fuse_pool4) fuse_pool3 = add([upscore8, pool3_for_fuse]) upscore16 = Conv2DTranspose(filters=128, kernel_size=(2, 2), strides=(2, 2), padding='valid', use_bias=False, name='upscore16')(fuse_pool3) x = Convolution2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu')(upscore16) x = Convolution2D(8, (1, 1), strides=(1, 1), padding='same', activation='sigmoid')(x) x_regr = Lambda(lambda t: 800 * t - 400)(x) return x_regr
def deepLoco(inputs): print("input shape:", inputs.shape) conv1 = Conv2D(16, 5, activation='relu', padding='same', kernel_initializer='he_normal')(inputs) print("conv1 shape:", conv1.shape) conv1 = Conv2D(16, 5, activation='relu', padding='same', kernel_initializer='he_normal')(conv1) print("conv1 shape:", conv1.shape) conv1 = Conv2D(64, 5, activation='relu', strides=2, padding='same', kernel_initializer='he_normal')(conv1) print("conv2 shape:", conv1.shape) # pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) # print ("pool1 shape:",pool1.shape) conv2 = Conv2D(64, 5, activation='relu', padding='same', kernel_initializer='he_normal')(conv1) print("conv2 shape:", conv2.shape) conv2 = Conv2D(64, 5, activation='relu', padding='same', kernel_initializer='he_normal')(conv2) print("conv2 shape:", conv2.shape) conv2 = Conv2D(256, 3, activation='relu', strides=2, padding='same', kernel_initializer='he_normal')(conv2) print("conv3 shape:", conv2.shape) # pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) # print ("pool2 shape:",pool2.shape) conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2) print("conv3 shape:", conv3.shape) conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3) print("conv3 shape:", conv3.shape) conv3 = Conv2D(256, 3, activation='relu', strides=4, padding='same', kernel_initializer='he_normal')(conv3) print("conv3 shape:", conv3.shape) # pool3 = MaxPooling2D(pool_size=(4, 4))(conv3) # print ("pool3 shape:",pool3.shape) flat1 = Flatten()(conv3) print("flat ", flat1.shape) dense1 = Dense(2048)(flat1) print("dense1 ", dense1.shape) reshape1 = Reshape([2048, 1])(dense1) print("reshape1", reshape1.shape) shortcut = reshape1 # res1 = build_resnet(reshape1, basic_block, [2]) res1 = Conv1D(1, kernel_size=3, strides=1, padding='same')(reshape1) print("res1 ", res1.shape) res1 = LeakyReLU()(res1) res1 = BatchNormalization()(res1) # res1 = add_common_layers(res1) add1 = add([shortcut, res1]) print("add1 ", add1.shape) add1 = LeakyReLU()(add1) add1 = BatchNormalization()(add1) shortcut = add1 res2 = Conv1D(1, kernel_size=3, strides=1, padding='same')(add1) res2 = LeakyReLU()(res2) res2 = BatchNormalization()(res2) # res2 = add_common_layers(res2) print("res2 ", res2.shape) add2 = add([shortcut, res2]) print("add2 ", add2.shape) add2 = LeakyReLU()(add2) add2 = BatchNormalization()(add2) weights = Conv1D(1, kernel_size=3, strides=8, padding='same')(add2) # weights = MaxPooling1D(pool_size=8)(weights) weights = Activation("relu")(weights) print("weights ", weights.shape) # positions = Dense(kernel_initializer="he_normal", # activation="softmax")(add2) # positions = Dense(units = 16)(add2) # print(positions.shape) # positions = Reshape([2048,16,1])(positions) positions = Conv1D(2, kernel_size=3, strides=8, padding='same')(add2) # positions = MaxPooling1D(pool_size=8)(positions) positions = Activation("sigmoid")(positions) print("positions ", positions.shape) # return [weights, positions] return concatenate([weights, positions], axis=2)
def hullwhite_fnn_model(data, method, loss, exponent=6, nb_epochs=0, batch_size=16, activation='tanh', layers=4, init='he_normal', dropout=0.5, dropout_first=None, dropout_middle=None, dropout_last=None, early_stop=125, lr_patience=40, reduce_lr=0.5, reduce_lr_min=0.000009, residual_cells=1, **kwargs): assert(isinstance(activation, string_types)) if activation == "elu": if 'alpha' in kwargs: alpha = kwargs['alpha'] else: alpha = 1.0 activation = ELU(alpha) elif activation == "rbf": activation = Activation(rbf) else: activation = Activation(activation) x_train = data['x_train'] x_valid = data['x_valid'] x_test = data['x_test'] y_train = data['y_train'] y_valid = data['y_valid'] if dropout_first is None: dropout_first = dropout if dropout_middle is None: dropout_middle = dropout_first if dropout_last is None: dropout_last = dropout_middle assert residual_cells >= 0 if residual_cells == 0: print('Simple with no BN or residual') else: print('Residual with BN (ex Out) - Activation before Dense - with %s residual cells' % residual_cells) print(' - Early Stop: Patience %s; Reduce LR Patience %s, Factor: %s, Min: %s' % \ (early_stop, lr_patience, reduce_lr, reduce_lr_min)) print(' - Exp:%s, Layer:%s, df:%s, dm:%s, dl:%s' % \ (exponent, layers, dropout_first, dropout_middle, dropout_last)) print(' - Loss:%s' % loss) #A copy of the activation layer needs to be used, instead of the layer #directly because otherwise keras will not be able to load a saved configuration #from a json file act_idx = 1 inp = Input(shape=(x_train.shape[1],)) ly = BatchNormalization()(inp) ly = Dense(2**exponent, kernel_initializer=init)(ly) act = copy(activation) act.name = act.name + "_" + str(act_idx) act_idx = act_idx + 1 ly = act(ly) ly = Dropout(dropout_first)(ly) if residual_cells > 0: for i in range(layers-1): middle = BatchNormalization()(ly) act = copy(activation) act.name = act.name + "_" + str(act_idx) act_idx = act_idx + 1 middle = act(middle) middle = Dense(2**exponent, kernel_initializer=init)(middle) middle = Dropout(dropout_middle)(middle) for j in range(residual_cells-1): act = copy(activation) act.name = act.name + "_" + str(act_idx) act_idx = act_idx + 1 middle = act(middle) middle = Dense(2**exponent, kernel_initializer=init)(middle) middle = Dropout(dropout_middle)(middle) ly = add([ly, middle]) ly = Dropout(dropout_last)(ly) else: for i in range(layers-1): ly = Dense(2**exponent, kernel_initializer=init)(ly) act = copy(activation) act.name = act.name + "_" + str(act_idx) act_idx = act_idx + 1 ly = act(ly) ly = Dropout(dropout_middle)(ly) ly = Dense(y_train.shape[1], kernel_initializer=init)(ly) nn = Model(inputs=inp, outputs=ly) nn.compile(method, loss=loss) if nb_epochs > 0: callbacks = [] if early_stop is not None: earlyStopping = EarlyStopping(monitor='val_loss', patience=early_stop) callbacks.append(earlyStopping) if reduce_lr is not None: reduceLR = ReduceLROnPlateau(monitor='val_loss', factor=reduce_lr, patience=lr_patience, min_lr=reduce_lr_min, verbose=1) callbacks.append(reduceLR) history2 = nn.fit(x_train, y_train, batch_size=batch_size, epochs=nb_epochs, verbose=2, callbacks=callbacks, validation_data=(x_valid, y_valid)) history = {'history': history2.history, 'params': history2.params} else: history = {'history': [], 'params': []} return (x_train, x_valid, x_test, nn, history)
def add_top_layers(model, image_size, patch_net='resnet50', block_type='resnet', depths=[512,512], repetitions=[1,1], block_fn=bottleneck_org, nb_class=2, shortcut_with_bn=True, bottleneck_enlarge_factor=4, dropout=.0, weight_decay=.0001, add_heatmap=False, avg_pool_size=(7,7), return_heatmap=False, add_conv=True, add_shortcut=False, hm_strides=(1,1), hm_pool_size=(5,5), fc_init_units=64, fc_layers=2): def add_residual_blocks(block): for depth,repetition in zip(depths, repetitions): block = _residual_block( block_fn, depth, repetition, dropout=dropout, weight_decay=weight_decay, shortcut_with_bn=shortcut_with_bn, bottleneck_enlarge_factor=bottleneck_enlarge_factor)(block) pool = GlobalAveragePooling2D()(block) dropped = Dropout(dropout)(pool) return dropped def add_vgg_blocks(block): for depth,repetition in zip(depths, repetitions): block = _vgg_block(depth, repetition, dropout=dropout, weight_decay=weight_decay)(block) pool = GlobalAveragePooling2D()(block) dropped = Dropout(dropout)(pool) return dropped def add_fc_layers(block): flattened = Flatten()(block) dropped = Dropout(dropout)(flattened) units=fc_init_units for i in xrange(fc_layers): fc = Dense(units, kernel_initializer="he_normal", kernel_regularizer=l2(weight_decay))(dropped) norm = BatchNormalization()(fc) relu = Activation('relu')(norm) dropped = Dropout(dropout)(relu) units /= 2 return dropped, flattened if patch_net == 'resnet50': last_kept_layer = model.layers[-5] elif patch_net == 'yaroslav': last_kept_layer = model.layers[-3] else: last_kept_layer = model.layers[-4] block = last_kept_layer.output channels = 1 if patch_net == 'yaroslav' else 3 image_input = Input(shape=(image_size[0], image_size[1], channels)) model0 = Model(inputs=model.inputs, outputs=block) block = model0(image_input) if add_heatmap or return_heatmap: # add softmax heatmap. pool1 = AveragePooling2D(pool_size=avg_pool_size, strides=hm_strides)(block) if return_heatmap: dropped = pool1 else: dropped = Dropout(dropout)(pool1) clf_layer = model.layers[-1] clf_weights = clf_layer.get_weights() clf_classes = clf_layer.output_shape[1] if return_heatmap: activation = activations.softmax(x, axis=CHANNEL_AXIS) else: activation = 'relu' heatmap_layer = Dense(clf_classes, activation=activation, kernel_regularizer=l2(weight_decay)) heatmap = heatmap_layer(dropped) heatmap_layer.set_weights(clf_weights) if return_heatmap: model_heatmap = Model(inputs=image_input, outputs=heatmap) return model_heatmap block = MaxPooling2D(pool_size=hm_pool_size)(heatmap) top_layer_nb = 8 else: top_layer_nb = 2 if add_conv: if block_type == 'resnet': block = add_residual_blocks(block) elif block_type == 'vgg': block = add_vgg_blocks(block) else: raise Exception('Unsupported block type: ' + block_type) else: block, flattened = add_fc_layers(block) if add_shortcut and not add_conv: dense = Dense(nb_class, kernel_initializer="he_normal", kernel_regularizer=l2(weight_decay))(block) shortcut = Dense(nb_class, kernel_initializer="he_normal", kernel_regularizer=l2(weight_decay))(flattened) addition = add([dense, shortcut]) dense = Activation('softmax')(addition) else: dense = Dense(nb_class, kernel_initializer="he_normal", activation='softmax', kernel_regularizer=l2(weight_decay))(block) model_addtop = Model(inputs=image_input, outputs=dense) # import pdb; pdb.set_trace() return model_addtop, top_layer_nb
def create_model(nb_classes, input_shape, config=None): """Create a VGG-16 like model.""" if len(input_shape) != 3: raise Exception("Input shape should be a tuple (nb_channels, nb_rows, " "nb_cols) or (nb_rows, nb_cols, nb_channels), " "depending on your backend.") if config is None: config = {'model': {}} min_feature_map_dimension = min(input_shape[:2]) if min_feature_map_dimension < 32: print("ERROR: Please upsample the feature maps to have at least " "a size of 32 x 32. Currently, it has {}".format(input_shape)) nb_filter = 32 # Network definition # input_shape = (None, None, 3) # for fcn input_ = Input(shape=input_shape) x = input_ # Scale feature maps down to [63, 32] x [63, 32] tmp = min_feature_map_dimension / 32. if tmp >= 2: while tmp >= 2.: for _ in range(2): x = Convolution2D(nb_filter, (3, 3), padding='same', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(x) x = BatchNormalization()(x) x = Activation('elu')(x) x = MaxPooling2D(pool_size=(2, 2))(x) nb_filter *= 2 tmp /= 2 # 32x32 x = Convolution2D(nb_filter, (3, 3), padding='same', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(x) x = BatchNormalization()(x) x = Activation('elu')(x) x = Convolution2D(nb_filter, (3, 3), padding='same', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(x) x = BatchNormalization()(x) x = Activation('elu')(x) # 16x16 x = MaxPooling2D(pool_size=(2, 2))(x) inp_16 = MaxPooling2D(pool_size=(2, 2))(input_) res = Convolution2D(nb_filter, (1, 1), padding='same', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(inp_16) x = add([x, res]) x = Convolution2D(2 * nb_filter, (3, 3), padding='same', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(x) x = BatchNormalization()(x) x = Activation('elu')(x) x = Convolution2D(2 * nb_filter, (3, 3), padding='same', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(x) x = BatchNormalization()(x) x = Activation('elu')(x) # 8x8 x = MaxPooling2D(pool_size=(2, 2))(x) inp_8 = MaxPooling2D(pool_size=(2, 2))(inp_16) res = Convolution2D(2 * nb_filter, (1, 1), padding='same', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(inp_8) x = add([x, res]) x = Convolution2D(2 * nb_filter, (3, 3), padding='same', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(x) x = BatchNormalization()(x) x = Activation('elu')(x) # 4x4 x = MaxPooling2D(pool_size=(2, 2))(x) inp_4 = MaxPooling2D(pool_size=(2, 2))(inp_8) res = Convolution2D(2 * nb_filter, (1, 1), padding='same', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(inp_4) x = add([x, res]) x = Convolution2D(512, (4, 4), padding='valid', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(x) x = BatchNormalization()(x) x = Activation('elu')(x) x = Dropout(0.5)(x) # 1x1 x = Convolution2D(512, (1, 1), padding='same', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(x) x = BatchNormalization()(x) x = Activation('elu')(x) x = Dropout(0.5)(x) x = Convolution2D(nb_classes, (1, 1), padding='same', kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001))(x) x = GlobalAveragePooling2D()(x) # Adjust for FCN x = BatchNormalization()(x) x = Activation('softmax')(x) model = Model(inputs=input_, outputs=x) return model
def create_model(input_shape, img_gen, pool_size, img_w, img_h): # Network parameters conv_filters = 16 kernel_size = (3, 3) time_dense_size = 32 rnn_size = 512 act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirecitonal GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer l = [y_pred, labels, input_length, label_length] loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')(l) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) return model, input_data, y_pred, test_func
def CreateModel(self): ''' 定义CNN/LSTM/CTC模型,使用函数式模型 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) 隐藏层一:3*3卷积层 隐藏层二:池化层,池化窗口大小为2 隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层四:循环层、LSTM/GRU层 隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, 输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出 ''' # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500 input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layer_h1 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 layer_h1 = Dropout(0.1)(layer_h1) layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 layer_h3 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h2) # 池化层 #layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 layer_h3 = Dropout(0.2)(layer_h3) layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 layer_h4 = Dropout(0.2)(layer_h4) layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 layer_h6 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h5) # 池化层 layer_h6 = Dropout(0.3)(layer_h6) layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 layer_h7 = Dropout(0.3)(layer_h7) layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 layer_h9 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h8) # 池化层 layer_h9 = Dropout(0.3)(layer_h9) layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 layer_h10 = Dropout(0.4)(layer_h10) layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 layer_h12 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 #test=Model(inputs = input_data, outputs = layer_h6) #test.summary() layer_h13 = Reshape((200, 3200))(layer_h12) #Reshape层 layer_h13 = Dropout(0.4)(layer_h13) layer_h14 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h13) # 全连接层 layer_h14 = Dropout(0.4)(layer_h14) inner = layer_h14 #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 rnn_size = 128 gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) gru2 = concatenate([gru_2, gru_2b]) #layer_h12 = GRU(128,activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True)(layer_h11) layer_h15 = Dropout(0.4)(gru2) layer_h16 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h15) # 全连接层 layer_h16 = Dropout(0.5)(layer_h16) # 随机中断部分神经网络连接,防止过拟合 layer_h17 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h16) # 全连接层 y_pred = Activation('softmax', name='Activation0')(layer_h17) model_data = Model(inputs=input_data, outputs=y_pred) #model_data.summary() labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.summary() # clipnorm seems to speeds up convergence #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06) #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=ada_d) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) print('[*提示] 创建模型成功,模型编译成功') return model, model_data
conv_shape = x.get_shape() x = Reshape(target_shape=(int(conv_shape[1]), int(conv_shape[2] * conv_shape[3])))(x) x = Dense(32, activation='relu')(x) gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(x) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(x) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) x = concatenate([gru_2, gru_2b]) x = Dropout(0.25)(x) x = Dense(len(characters) + 1, kernel_initializer='he_normal', activation='softmax')(x) base_model = Model(inputs=input_data, outputs=x)
def train(run_name, start_epoch, stop_epoch, img_w): # Input Parameters img_h = 64 words_per_epoch = 16000 val_split = 0.2 val_words = int(words_per_epoch * (val_split)) # Network parameters conv_filters = 16 kernel_size = (3, 3) pool_size = 2 time_dense_size = 32 rnn_size = 512 if K.image_data_format() == 'channels_first': input_shape = (1, img_w, img_h) else: input_shape = (img_w, img_h, 1) fdir = os.path.dirname(get_file('wordlists.tgz', origin='http://www.isosemi.com/datasets/wordlists.tgz', untar=True)) img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'), bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'), minibatch_size=32, img_w=img_w, img_h=img_h, downsample_factor=(pool_size ** 2), val_split=words_per_epoch - val_words ) act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirecitonal GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) Model(inputs=input_data, outputs=y_pred).summary() labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) if start_epoch > 0: weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) model.load_weights(weight_file) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) viz_cb = VizCallback(run_name, test_func, img_gen.next_val()) model.fit_generator(generator=img_gen.next_train(), steps_per_epoch=(words_per_epoch - val_words), epochs=stop_epoch, validation_data=img_gen.next_val(), validation_steps=val_words, callbacks=[viz_cb, img_gen], initial_epoch=start_epoch)
if len(conv1) == 1: encoder1 = conv1[0] else: encoder1 = keras.layers.concatenate(inputs=conv1) if len(conv2) == 1: encoder2 = conv2[0] else: encoder2 = keras.layers.concatenate(inputs=conv2) # сожмем вектор предложения до sent2vec_dim encoder1 = sent_repr_layer(encoder1) encoder2 = sent_repr_layer(encoder2) addition = add([encoder1, encoder2]) minus_y1 = Lambda(lambda x: -x, output_shape=(sent2vec_dim, ))(encoder1) mul = add([encoder2, minus_y1]) mul = multiply([mul, mul]) #words_final = keras.layers.concatenate(inputs=[mul, addition, addfeatures_input]) words_final = keras.layers.concatenate( inputs=[mul, addition, addfeatures_input, encoder1, encoder2]) final_size = encoder_size + nb_addfeatures words_final = Dense(units=final_size // 2, activation='sigmoid')(words_final) elif classifier_arch == 'merge': # этот финальный классификатор берет два вектора представления предложений, # объединяет их в вектор двойной длины и затем прогоняет этот двойной вектор
def train(run_name, start_epoch, stop_epoch, img_w): # Input Parameters img_h = 64 words_per_epoch = 16000 val_split = 0.2 val_words = int(words_per_epoch * (val_split)) # Network parameters conv_filters = 16 kernel_size = (3, 3) pool_size = 2 time_dense_size = 32 rnn_size = 512 minibatch_size = 32 if K.image_data_format() == 'channels_first': input_shape = (1, img_w, img_h) else: input_shape = (img_w, img_h, 1) fdir = os.path.dirname( get_file('wordlists.tgz', origin='http://www.mythic-ai.com/datasets/wordlists.tgz', untar=True)) img_gen = TextImageGenerator( monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'), bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'), minibatch_size=minibatch_size, img_w=img_w, img_h=img_h, downsample_factor=(pool_size ** 2), val_split=words_per_epoch - val_words) act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirectional GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) Model(inputs=input_data, outputs=y_pred).summary() labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda( ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) if start_epoch > 0: weight_file = os.path.join( OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) model.load_weights(weight_file) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) viz_cb = VizCallback(run_name, test_func, img_gen.next_val()) model.fit_generator( generator=img_gen.next_train(), steps_per_epoch=(words_per_epoch - val_words) // minibatch_size, epochs=stop_epoch, validation_data=img_gen.next_val(), validation_steps=val_words // minibatch_size, callbacks=[viz_cb, img_gen], initial_epoch=start_epoch)
def _build_network(self, vocab_size, maxlen, emb_weights=[], c_emb_weights=[], hidden_units=256, dimension_length=11, trainable=True, batch_size=1): print('Building model...') context_input = Input(name='context', batch_shape=(batch_size, maxlen)) if (len(c_emb_weights) == 0): c_emb = Embedding(vocab_size, 256, input_length=maxlen, embeddings_initializer='glorot_normal', trainable=trainable)(context_input) else: c_emb = Embedding(vocab_size, c_emb_weights.shape[1], input_length=maxlen, weights=[c_emb_weights], trainable=trainable)(context_input) c_cnn1 = Convolution1D(int(hidden_units / 2), 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen))(c_emb) c_cnn2 = Convolution1D(hidden_units, 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen - 2))(c_cnn1) c_lstm1 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=.0, unit_forget_bias=False, return_sequences=True)(c_cnn2) c_lstm2 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=.0, unit_forget_bias=False, return_sequences=True, go_backwards=True)(c_cnn2) c_merged = add([c_lstm1, c_lstm2]) c_merged = Dropout(0.25)(c_merged) c_merged = TimeDistributed( Dense(128, kernel_initializer="he_normal", activation='sigmoid'))(c_merged) text_input = Input(name='text', batch_shape=(batch_size, maxlen)) if (len(emb_weights) == 0): emb = Embedding(vocab_size, 256, input_length=maxlen, embeddings_initializer='glorot_normal', trainable=trainable)(text_input) else: emb = Embedding(vocab_size, c_emb_weights.shape[1], input_length=maxlen, weights=[emb_weights], trainable=trainable)(text_input) t_cnn1 = Convolution1D(int(hidden_units / 2), 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen))(emb) t_cnn2 = Convolution1D(hidden_units, 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen - 2))(t_cnn1) t_lstm1 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=0.25, unit_forget_bias=False, return_sequences=True)(t_cnn2) t_lstm2 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=0.25, unit_forget_bias=False, return_sequences=True, go_backwards=True)(t_cnn2) t_merged = add([t_lstm1, t_lstm2]) t_merged = Dropout(0.25)(t_merged) t_merged = TimeDistributed( Dense(128, kernel_initializer="he_normal", activation='sigmoid'))(t_merged) awc_input = Input(name='awc', batch_shape=(batch_size, 11)) eaw = Embedding(101, 128, input_length=dimension_length, embeddings_initializer='glorot_normal', trainable=True)(awc_input) merged = concatenate([c_merged, t_merged, eaw], axis=1) flat_model = Flatten()(merged) dnn_1 = Dense(hidden_units, kernel_initializer="he_normal", activation='sigmoid')(flat_model) dnn_1 = Dropout(0.25)(dnn_1) dnn_2 = Dense(2, activation='sigmoid')(dnn_1) softmax = Activation('softmax')(dnn_2) model = Model(inputs=[context_input, text_input, awc_input], outputs=softmax) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('No of parameter:', model.count_params()) print(model.summary()) return model print('Building model...') context_input = Input(name='context', batch_shape=(batch_size, maxlen)) if (len(c_emb_weights) == 0): c_emb = Embedding(vocab_size, 256, input_length=maxlen, embeddings_initializer='glorot_normal', trainable=trainable)(context_input) else: c_emb = Embedding(vocab_size, c_emb_weights.shape[1], input_length=maxlen, weights=[c_emb_weights], trainable=trainable)(context_input) c_cnn1 = Convolution1D(int(hidden_units / 2), 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen))(c_emb) c_cnn2 = Convolution1D(hidden_units, 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen - 2))(c_cnn1) c_lstm1 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=.0, unit_forget_bias=False, return_sequences=True)(c_cnn2) c_lstm2 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=.0, unit_forget_bias=False, return_sequences=True, go_backwards=True)(c_cnn2) c_merged = add([c_lstm1, c_lstm2]) c_merged = Dropout(0.25)(c_merged) c_merged = TimeDistributed( Dense(128, kernel_initializer="he_normal", activation='sigmoid'))(c_merged) text_input = Input(name='text', batch_shape=(batch_size, maxlen)) if (len(emb_weights) == 0): emb = Embedding(vocab_size, 256, input_length=maxlen, embeddings_initializer='glorot_normal', trainable=trainable)(text_input) else: emb = Embedding(vocab_size, c_emb_weights.shape[1], input_length=maxlen, weights=[emb_weights], trainable=trainable)(text_input) t_cnn1 = Convolution1D(int(hidden_units / 2), 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen))(emb) t_cnn2 = Convolution1D(hidden_units, 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen - 2))(t_cnn1) t_lstm1 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=0.25, unit_forget_bias=False, return_sequences=True)(t_cnn2) t_lstm2 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=0.25, unit_forget_bias=False, return_sequences=True, go_backwards=True)(t_cnn2) t_merged = add([t_lstm1, t_lstm2]) t_merged = Dropout(0.25)(t_merged) t_merged = TimeDistributed( Dense(128, kernel_initializer="he_normal", activation='sigmoid'))(t_merged) awc_input = Input(name='awc', batch_shape=(batch_size, 11)) eaw = Embedding(101, 128, input_length=dimension_length, embeddings_initializer='glorot_normal', trainable=True)(awc_input) merged = concatenate([c_merged, t_merged, eaw], axis=1) flat_model = Flatten()(merged) dnn_1 = Dense(hidden_units, kernel_initializer="he_normal", activation='sigmoid')(flat_model) dnn_1 = Dropout(0.25)(dnn_1) dnn_2 = Dense(2, activation='sigmoid')(dnn_1) softmax = Activation('softmax')(dnn_2) model = Model(inputs=[context_input, text_input, awc_input], outputs=softmax) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('No of parameter:', model.count_params()) print(model.summary()) return model
strides=1, padding='same', kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(x) if use_bn: x = BatchNormalization()(x) x = Activation(activation)(x) if use_shortcut: x_prev = Conv2D(filters=32, kernel_size=1, strides=1, padding='same', kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(x_prev) x_prev = Activation(activation)(x_prev) x = add([x_prev, x]) x_prev = x for i in range(2): x = Conv2D(filters=64, kernel_size=3, strides=1, padding='same', kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(x) if use_bn: x = BatchNormalization()(x) x = Activation(activation)(x) if use_shortcut: x_prev = Conv2D(filters=64, kernel_size=1,
def CreateModel(self): ''' 定义CNN/LSTM/CTC模型,使用函数式模型 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) 隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2 隐藏层:全连接层 输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出 ''' input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layer_h = Conv2D(32, (3, 3), use_bias=False, activation='relu', padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 #layer_h = Dropout(0.05)(layer_h) layer_h = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 layer_h = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h) # 池化层 #layer_h = Dropout(0.05)(layer_h) # 随机中断部分神经网络连接,防止过拟合 layer_h = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 #layer_h = Dropout(0.1)(layer_h) layer_h = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 layer_h = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h) # 池化层 #layer_h = Dropout(0.1)(layer_h) layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 #layer_h = Dropout(0.15)(layer_h) layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 layer_h = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h) # 池化层 #layer_h = Dropout(0.15)(layer_h) layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 #layer_h = Dropout(0.2)(layer_h) layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 layer_h = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h) # 池化层 #layer_h = Dropout(0.2)(layer_h) #layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 #layer_h = Dropout(0.2)(layer_h) #layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 #layer_h = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h) # 池化层 #test=Model(inputs = input_data, outputs = layer_h) #test.summary() layer_h = Reshape((200, 3200))(layer_h) #Reshape层 #layer_h16 = Dropout(0.3)(layer_h16) # 随机中断部分神经网络连接,防止过拟合 layer_h = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h) # 全连接层 inner = layer_h #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 rnn_size = 128 gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) gru2 = concatenate([gru_2, gru_2b]) layer_h = gru2 #layer_h20 = Dropout(0.4)(gru2) layer_h = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h) # 全连接层 #layer_h17 = Dropout(0.3)(layer_h17) layer_h = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h) # 全连接层 y_pred = Activation('softmax', name='Activation0')(layer_h) model_data = Model(inputs=input_data, outputs=y_pred) #model_data.summary() labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.summary() # clipnorm seems to speeds up convergence #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) #opt = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06) opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.0, epsilon=10e-8) #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=opt) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) #print('[*提示] 创建模型成功,模型编译成功') print('[*Info] Create Model Successful, Compiles Model Successful. ') return model, model_data
def _main(args): config_path = os.path.expanduser(args.config_path) weights_path = os.path.expanduser(args.weights_path) assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( config_path) assert weights_path.endswith('.weights'), '{} is not a .weights file'.format(weights_path) output_path = os.path.expanduser(args.output_path) assert output_path.endswith('.h5'), 'output path {} is not a .h5 file'.format(output_path) output_root = os.path.splitext(output_path)[0] # Load weights and config. print('Loading weights.') weights_file = open(weights_path, 'rb') weights_header = np.ndarray( shape=(5, ), dtype='int32', buffer=weights_file.read(20)) print('Weights Header: ', weights_header) # TODO: Check transpose flag when implementing fully connected layers. # transpose = (weight_header[0] > 1000) or (weight_header[1] > 1000) print('Parsing Darknet config.') unique_config_file = unique_config_sections(config_path) cfg_parser = configparser.ConfigParser() cfg_parser.read_file(unique_config_file) print('\nCreating Keras model.') if args.convolutional_only: image_height, image_width = None, None else: image_height = int(cfg_parser['net_0']['height']) image_width = int(cfg_parser['net_0']['width']) prev_layer = Input(shape=(image_height, image_width, 3)) all_layers = [prev_layer] outputs = [] weight_decay = float(cfg_parser['net_0']['decay']) if 'net_0' in cfg_parser.sections() else 5e-4 count = 0 for section in cfg_parser.sections(): print('Parsing section {}'.format(section)) if section.startswith('convolutional'): filters = int(cfg_parser[section]['filters']) size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) pad = int(cfg_parser[section]['pad']) activation = cfg_parser[section]['activation'] batch_normalize = 'batch_normalize' in cfg_parser[section] # Setting weights. # Darknet serializes convolutional weights as: # [bias/beta, [gamma, mean, variance], conv_weights] prev_layer_shape = K.int_shape(prev_layer) # TODO: This assumes channel last dim_ordering. weights_shape = (size, size, prev_layer_shape[-1], filters) darknet_w_shape = (filters, weights_shape[2], size, size) weights_size = np.product(weights_shape) print(' conv2d', 'bn' if batch_normalize else activation, weights_shape) conv_bias = np.ndarray( shape=(filters, ), dtype='float32', buffer=weights_file.read(filters * 4)) count += filters if batch_normalize: bn_weights = np.ndarray( shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12)) count += 3 * filters # TODO: Keras BatchNormalization mistakenly refers to var # as std. bn_weight_list = [ bn_weights[0], # scale gamma conv_bias, # shift beta bn_weights[1], # running mean bn_weights[2] # running var ] conv_weights = np.ndarray( shape=darknet_w_shape, dtype='float32', buffer=weights_file.read(weights_size * 4)) count += weights_size # DarkNet conv_weights are serialized Caffe-style: # (out_dim, in_dim, height, width) # We would like to set these to Tensorflow order: # (height, width, in_dim, out_dim) # TODO: Add check for Theano dim ordering. conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) conv_weights = [conv_weights] if batch_normalize else [conv_weights, conv_bias] # Handle activation. act_fn = None if activation == 'leaky': pass # Add advanced activation later. elif activation != 'linear': raise ValueError( 'Unknown activation function `{}` in section {}'.format( activation, section)) padding = 'same' if pad == 1 and stride == 1 else 'valid' # Adjust padding model for darknet. if stride == 2: prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer) # Create Conv2D layer conv_layer = (Conv2D( filters, (size, size), strides=(stride, stride), kernel_regularizer=l2(weight_decay), use_bias=not batch_normalize, weights=conv_weights, activation=act_fn, padding=padding))(prev_layer) if batch_normalize: conv_layer = (BatchNormalization(weights=bn_weight_list))(conv_layer) prev_layer = conv_layer if activation == 'linear': all_layers.append(prev_layer) elif activation == 'leaky': act_layer = LeakyReLU(alpha=0.1)(prev_layer) prev_layer = act_layer all_layers.append(prev_layer) elif section.startswith('maxpool'): size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) all_layers.append( MaxPooling2D( padding='same', pool_size=(size, size), strides=(stride, stride))(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('avgpool'): if cfg_parser.items(section) != []: raise ValueError('{} with params unsupported.'.format(section)) all_layers.append(GlobalAveragePooling2D()(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('route'): ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] if len(ids) == 2: for i, item in enumerate(ids): if item != -1: ids[i] = item + 1 layers = [all_layers[i] for i in ids] if len(layers) > 1: print(' Concatenating route layers:') for layer in layers: print(' '+str(layer)) concatenate_layer = concatenate(layers) all_layers.append(concatenate_layer) prev_layer = concatenate_layer else: skip_layer = layers[0] # only one layer to route all_layers.append(skip_layer) prev_layer = skip_layer elif section.startswith('shortcut'): ids = [int(i) for i in cfg_parser[section]['from'].split(',')][0] activation = cfg_parser[section]['activation'] shortcut = add([all_layers[ids], prev_layer]) if activation == 'linear': shortcut = Activation('linear')(shortcut) all_layers.append(shortcut) prev_layer = all_layers[-1] elif section.startswith('upsample'): stride = int(cfg_parser[section]['stride']) all_layers.append( UpSampling2D( size=(stride, stride))(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('yolo'): classes = int(cfg_parser[section]['classes']) # num = int(cfg_parser[section]['num']) # mask = int(cfg_parser[section]['mask']) n1, n2 = int(prev_layer.shape[1]), int(prev_layer.shape[2]) n3 = 3 n4 = (4 + 1 + classes) yolo = Reshape((n1, n2, n3, n4))(prev_layer) all_layers.append(yolo) prev_layer = all_layers[-1] outputs.append(len(all_layers) - 1) elif (section.startswith('net')): pass # Configs not currently handled during model definition. else: raise ValueError( 'Unsupported section header type: {}'.format(section)) # Create and save model. model = Model(inputs=all_layers[0], outputs=[all_layers[i] for i in outputs]) model.compile(optimizer=Adam(lr=1e-3), loss={ # use custom yolo_loss Lambda layer. 'yolo_loss': lambda y_true, y_pred: y_pred}) #### !!! https://github.com/qqwweee/keras-yolo3/blob/master/yolo3/model.py#L345 print('\n'+'='*98+'\n'+'{:^98}'.format('Model Summary')) print(model.summary()) print('='*98+'\n') model.save('{}'.format(output_path)) print('Saved Keras model to {}'.format(output_path)) # Check to see if all weights have been read. remaining_weights = len(weights_file.read()) / 4 weights_file.close() print('Read {} of {} from Darknet weights.'.format(count, count + 0 if remaining_weights == 0 else remaining_weights)) if remaining_weights > 0: print('Warning: {} unused weights'.format(remaining_weights)) if args.plot_model: plot(model, to_file='{}.png'.format(output_root), show_shapes=True) print('Saved model plot to {}.png'.format(output_root))
def get_model(training, img_h, nclass): input_shape = (None, img_h, 1) # (128, 64, 1) #input_shape = (280, img_h, 1) # Make Networkw inputs = Input(name='the_input', shape=input_shape, dtype='float32') # (None, 128, 64, 1) #inner = resnet.ResNet50(include_top=False, weights = None, input_tensor = inputs) inner = shufflenet.ShuffleNet_V2(include_top=False, weights=None, input_tensor=inputs) # Convolution layer (VGG) # CNN to RNN #inner = Reshape(target_shape=((32, 2048)), name='reshape')(inner) # (None, 32, 2048) inner = TimeDistributed(Flatten(), name='flatten')(inner) #inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner) # (None, 32, 64) lstm_unit_num = 256 # RNN layer lstm_1 = CuDNNLSTM(lstm_unit_num, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner) # (None, 32, 512) lstm_1b = CuDNNLSTM(lstm_unit_num, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner) lstm1_merged = add([lstm_1, lstm_1b]) # (None, 32, 512) lstm1_merged = BatchNormalization()(lstm1_merged) #lstm1_merged = Dropout(0.1)(lstm1_merged) lstm_2 = CuDNNLSTM(lstm_unit_num, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged) lstm_2b = CuDNNLSTM(lstm_unit_num, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged) lstm2_merged = concatenate([lstm_2, lstm_2b]) # (None, 32, 1024) lstm_merged = BatchNormalization()(lstm2_merged) #lstm_merged = Dropout(0.1)(lstm_merged) # transforms RNN output to character activations: inner = Dense(nclass, kernel_initializer='he_normal', name='dense2')(lstm2_merged) #(None, 32, 63) y_pred = Activation('softmax', name='softmax')(inner) labels = Input(name='the_labels', shape=[None], dtype='float32') # (None ,8) input_length = Input(name='input_length', shape=[1], dtype='int64') # (None, 1) label_length = Input(name='label_length', shape=[1], dtype='int64') # (None, 1) # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1) model = None if training: model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out) else: model = Model(inputs=inputs, outputs=y_pred) return model, model model.summary() multi_model = multi_gpu_model(model, gpus=GPU_NUM) save_model = model ada = Adadelta() #multi_model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adam', metrics=['accuracy']) multi_model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=ada, metrics=['accuracy']) return save_model, multi_model
def resnet8_MDN(img_width, img_height, img_channels, output_dim): """ Define model architecture. # Arguments img_width: Target image widht. img_height: Target image height. img_channels: Target image channels. output_dim: Dimension of model output. # Returns model: A Model instance. """ # Input img_input = Input(shape=(img_height, img_width, img_channels)) x1 = Conv2D(32, (5, 5), strides=[2, 2], padding='same')(img_input) x1 = MaxPooling2D(pool_size=(3, 3), strides=[2, 2])(x1) # First residual block x2 = keras.layers.normalization.BatchNormalization()(x1) x2 = Activation('relu')(x2) x2 = Conv2D(32, (3, 3), strides=[2, 2], padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x2) x2 = keras.layers.normalization.BatchNormalization()(x2) x2 = Activation('relu')(x2) x2 = Conv2D(32, (3, 3), padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x2) x1 = Conv2D(32, (1, 1), strides=[2, 2], padding='same')(x1) x3 = add([x1, x2]) # Second residual block x4 = keras.layers.normalization.BatchNormalization()(x3) x4 = Activation('relu')(x4) x4 = Conv2D(64, (3, 3), strides=[2, 2], padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x4) x4 = keras.layers.normalization.BatchNormalization()(x4) x4 = Activation('relu')(x4) x4 = Conv2D(64, (3, 3), padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x4) x4_ = Conv2D(64, (1, 1), strides=[2, 2], padding='same')(x3) x5 = add([x4_, x4]) x4_out = Flatten()(x5) x4_out = Activation('relu')(x4_out) # # # # Third residual block x6 = keras.layers.normalization.BatchNormalization()(x3) x6 = Activation('relu')(x6) x6 = Conv2D(64, (3, 3), strides=[2, 2], padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x6) x6 = keras.layers.normalization.BatchNormalization()(x6) x6 = Activation('relu')(x6) x6 = Conv2D(64, (3, 3), padding='same', kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(1e-4))(x6) x6_ = Conv2D(64, (1, 1), strides=[2, 2], padding='same')(x3) x7 = add([x6_, x6]) x6_out = Flatten()(x7) x6_out = Activation('relu')(x6_out) # # Collision channel trans = Dense(500, activation='relu')(x6_out) trans = keras.layers.normalization.BatchNormalization()(trans) trans = Dropout(0.5)(trans) trans = Dense(output_dim, name='trans_output')(trans) # coll = Activation('sigmoid')(coll) dense1_1 = Dense(500, activation='relu')(x4_out) dense1_1 = keras.layers.normalization.BatchNormalization()(dense1_1) dense1_1 = Dropout(0.2)(dense1_1) dense2_1 = Dense(100, activation='relu')(dense1_1) dense2_1 = keras.layers.normalization.BatchNormalization()(dense2_1) dense2_1 = Dropout(0.2)(dense2_1) FC_mus = Dense(c * m, activation='tanh')(dense2_1) # FC_sigmas = Dense(m, activation=elu_plus_one_plus_epsilon)(dense1_1) # Keras.exp, W_regularizer=l2(1e-3) FC_alphas = Dense(m)(dense2_1) outputs = concatenate([FC_mus, FC_alphas], name='direct_output') # outputs = Dense((c+1)*m)(dense2_1) model = Model(inputs=[img_input], outputs=[outputs, trans]) # model = Model(inputs=[img_input], outputs=[outputs]) # Define steering-collision model # model = Model(inputs=[img_input], outputs=[steer, coll]) print(model.summary()) return model
question_encoder = Embedding(input_dim=vocab_size, output_dim=EMBEDDING_SIZE, input_length=question_maxlen)(question_input) question_encoder = Dropout(0.3)(question_encoder) # match between story and question match = dot([story_encoder, question_encoder], axes=[2, 2]) # encode story into vector space of question story_encoder_c = Embedding(input_dim=vocab_size, output_dim=question_maxlen, input_length=story_maxlen)(story_input) story_encoder_c = Dropout(0.3)(story_encoder_c) # combine match and story vectors response = add([match, story_encoder_c]) response = Permute((2, 1))(response) # combine response and question vectors answer = concatenate([response, question_encoder], axis=-1) answer = LSTM(LATENT_SIZE)(answer) answer = Dropout(0.3)(answer) answer = Dense(vocab_size)(answer) output = Activation("softmax")(answer) model = Model(inputs=[story_input, question_input], outputs=output) model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) # train model history = model.fit([Xstrain, Xqtrain], [Ytrain], batch_size=BATCH_SIZE,
def FCN(basenet='vgg16', trainable_base=False, num_output=21, input_shape=(None, None, 3), weights='imagenet'): """Instantiate the FCN8s architecture with keras. # Arguments basenet: type of basene {'vgg16'} trainable_base: Bool whether the basenet weights are trainable num_output: number of classes input_shape: input image shape weights: pre-trained weights to load (None for training from scratch) # Returns A Keras model instance """ _handle_data_format() basenet = _get_basenet(basenet) # input input = Input(shape=input_shape) # Get skip_layers=[drop7, pool4, pool3] from the base net: VGG16 skip_layers = basenet(skip_architecture=True)(input) drop7 = skip_layers[0] score_fr = Conv2D(filters=num_output, kernel_size=(1, 1), padding='valid', name='score_fr')(drop7) upscore2 = Conv2DTranspose(filters=num_output, kernel_size=(4, 4), strides=(2, 2), padding='valid', use_bias=False, data_format=K.image_data_format(), name='upscore2')(score_fr) # scale pool4 skip for compatibility pool4 = skip_layers[1] scale_pool4 = Lambda(lambda x: x * 0.01, name='scale_pool4')(pool4) score_pool4 = Conv2D(filters=num_output, kernel_size=(1, 1), padding='valid', name='score_pool4')(scale_pool4) score_pool4c = _crop(upscore2, offset=(5, 5), name='score_pool4c')(score_pool4) fuse_pool4 = add([upscore2, score_pool4c]) upscore_pool4 = Conv2DTranspose(filters=num_output, kernel_size=(4, 4), strides=(2, 2), padding='valid', use_bias=False, data_format=K.image_data_format(), name='upscore_pool4')(fuse_pool4) # scale pool3 skip for compatibility pool3 = skip_layers[2] scale_pool3 = Lambda(lambda x: x * 0.0001, name='scale_pool3')(pool3) score_pool3 = Conv2D(filters=num_output, kernel_size=(1, 1), padding='valid', name='score_pool3')(scale_pool3) score_pool3c = _crop(upscore_pool4, offset=(9, 9), name='score_pool3c')(score_pool3) fuse_pool3 = add([upscore_pool4, score_pool3c]) # score upscore8 = Conv2DTranspose(filters=num_output, kernel_size=(16, 16), strides=(8, 8), padding='valid', use_bias=False, data_format=K.image_data_format(), name='upscore8')(fuse_pool3) score = _crop(input, offset=(31, 31), name='score')(upscore8) # model model = Model(input, score, name='fcn_vgg16') # load weights if weights == 'imagenet': weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5', basenet.WEIGHTS_PATH, cache_subdir='models') layer_names = load_weights(model, weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) # Freezing basenet weights if not trainable_base: for layer in model.layers: if layer.name in layer_names: layer.trainable = False return model
def encoder_model(): inputs = Input(shape=(int(VIDEO_LENGTH / 2), 128, 208, 3)) # 10x128x128 conv_1 = Conv3D(filters=128, strides=(1, 4, 4), dilation_rate=(1, 1, 1), kernel_size=(3, 11, 11), padding='same')(inputs) x = TimeDistributed(BatchNormalization())(conv_1) x = TimeDistributed(LeakyReLU(alpha=0.2))(x) out_1 = TimeDistributed(Dropout(0.5))(x) conv_2a = Conv3D(filters=64, strides=(1, 1, 1), dilation_rate=(2, 1, 1), kernel_size=(2, 5, 5), padding='same')(out_1) x = TimeDistributed(BatchNormalization())(conv_2a) x = TimeDistributed(LeakyReLU(alpha=0.2))(x) out_2a = TimeDistributed(Dropout(0.5))(x) conv_2b = Conv3D(filters=64, strides=(1, 1, 1), dilation_rate=(2, 1, 1), kernel_size=(2, 5, 5), padding='same')(out_2a) x = TimeDistributed(BatchNormalization())(conv_2b) x = TimeDistributed(LeakyReLU(alpha=0.2))(x) out_2b = TimeDistributed(Dropout(0.5))(x) res_1 = add([out_2a, out_2b]) # res_1 = LeakyReLU(alpha=0.2)(res_1) conv_3 = Conv3D(filters=64, strides=(1, 2, 2), dilation_rate=(1, 1, 1), kernel_size=(3, 5, 5), padding='same')(res_1) x = TimeDistributed(BatchNormalization())(conv_3) x = TimeDistributed(LeakyReLU(alpha=0.2))(x) out_3 = TimeDistributed(Dropout(0.5))(x) # 10x16x16 conv_4a = Conv3D(filters=64, strides=(1, 1, 1), dilation_rate=(2, 1, 1), kernel_size=(2, 3, 3), padding='same')(out_3) x = TimeDistributed(BatchNormalization())(conv_4a) x = TimeDistributed(LeakyReLU(alpha=0.2))(x) out_4a = TimeDistributed(Dropout(0.5))(x) conv_4b = Conv3D(filters=64, strides=(1, 1, 1), dilation_rate=(2, 1, 1), kernel_size=(2, 3, 3), padding='same')(out_4a) x = TimeDistributed(BatchNormalization())(conv_4b) x = TimeDistributed(LeakyReLU(alpha=0.2))(x) out_4b = TimeDistributed(Dropout(0.5))(x) z = add([out_4a, out_4b]) # res_1 = LeakyReLU(alpha=0.2)(res_1) model = Model(inputs=inputs, outputs=z) return model
def train(run_name, start_epoch, stop_epoch, img_w, type_t): # Input Parameters img_h = 64 words_per_epoch = 16000 val_split = 0.2 val_words = int(words_per_epoch * (val_split)) # Network parameters conv_filters = 16 kernel_size = (3, 3) pool_size = 2 time_dense_size = 32 rnn_size = 512 minibatch_size = 32 if K.image_data_format() == 'channels_first': input_shape = (1, img_w, img_h) else: input_shape = (img_w, img_h, 1) fdir = os.path.dirname( get_file('wordlists.tgz', origin='http://www.mythic-ai.com/datasets/wordlists.tgz', untar=True)) img_gen = TextImageGenerator( monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'), bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'), minibatch_size=minibatch_size, img_w=img_w, img_h=img_h, downsample_factor=(pool_size**2), val_split=words_per_epoch - val_words, type_t=type_t) act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner2 = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner2) conv_to_rnn_dims = (img_w // (pool_size**2), (img_h // (pool_size**2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirectional GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(FindOutPutShape(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) Model(inputs=input_data, outputs=y_pred) labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.summary() # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=sgd, metrics=['accuracy']) if start_epoch > 0: weight_file = os.path.join( OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) # model.load_weights(weight_file) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) viz_cb = VizCallback(run_name, test_func, img_gen.next_val()) # model.load_weights('weightswithresize.h5') model.load_weights('weights56.h5') # history = model.fit_generator(generator=img_gen.next_train(), # steps_per_epoch=(words_per_epoch - val_words) // minibatch_size, # epochs=stop_epoch, # validation_data=img_gen.next_val(), # validation_steps=val_words // minibatch_size, # callbacks=[viz_cb, img_gen], # initial_epoch=start_epoch) imgwide = 564 strn = "new.png" # img = Image.open('test2.png') # img = img.resize((imgwide, 64), Image.ANTIALIAS) img = cv2.imread(strn) img = cv2.resize(img, (imgwide, 64)) # # kernel = np.ones((3, 3), np.float32) / 50 img = cv2.filter2D(img, -1, kernel) print(img.shape) for i in range(50): img = np.insert(img, 0, 255, axis=1) for i in range(50): img = np.insert(img, img.shape[1], 255, axis=1) for i in range(25): img = np.insert(img, 0, [255], axis=0) for i in range(25): img = np.insert(img, img.shape[0], 255, axis=0) img = cv2.resize(img, (imgwide, 64)) print(img.shape) img = np.asarray(img) img = img[:, :, 0] # grab single channel im = img # plt.imshow(img,cmap='gray') # plt.show() # im = speckle(img) # img = img.astype(np.float32) / 255 img = cv2.adaptiveThreshold(img, 1, cv2.ADAPTIVE_THRESH_MEAN_C, \ cv2.THRESH_BINARY, 11, 2) plt.imshow(img, cmap='gray') plt.show() # a = a.astype(np.float32) / 255 img = np.expand_dims(img, 0) data = np.reshape(img, (1, 64, imgwide)) X_data = np.ones([1, imgwide, 64, 1]) X_data[0, 0:imgwide, :, 0] = data[0, :, :].T # decode_batch(test_func, X_data)
def train(img_w, train_data, val_data): # Input Parameters img_h = 64 # Network parameters conv_filters = 16 kernel_size = (3, 3) pool_size = 2 time_dense_size = 32 rnn_size = 512 if K.image_data_format() == 'channels_first': input_shape = (1, img_w, img_h) else: input_shape = (img_w, img_h, 1) batch_size = 32 downsample_factor = pool_size**2 tiger_train = ImageGenerator(train_data, img_w, img_h, batch_size, downsample_factor) tiger_train.build_data() tiger_val = ImageGenerator(val_data, img_w, img_h, batch_size, downsample_factor) tiger_val.build_data() act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size**2), (img_h // (pool_size**2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirecitonal GRUs gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(tiger_train.get_output_size(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) Model(inputs=input_data, outputs=y_pred).summary() labels = Input(name='the_labels', shape=[tiger_train.max_text_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) model.fit_generator(generator=tiger_train.next_batch(), steps_per_epoch=tiger_train.n, epochs=1, validation_data=tiger_val.next_batch(), validation_steps=tiger_val.n) return model
def build_model(self): print('building model...') if K.image_data_format() == 'channels_first': self.input_shape = (1, self.img_w, self.img_h) else: self.input_shape = (self.img_w, self.img_h, 1) self.ds.build_data() self.valid.build_data() act = 'relu' self.input_data = Input(name='the_input', shape=self.input_shape, dtype='float32') inner = Conv2D(self.conv_filters, self.kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(self.input_data) inner = MaxPooling2D(pool_size=(self.pool_size, self.pool_size), name='max1')(inner) inner = Dropout(0.2, name='drop1')(inner) inner = Conv2D(self.conv_filters, self.kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = Dropout(0.2, name='drop2')(inner) inner = Conv2D(self.conv_filters, self.kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv3')(inner) inner = BatchNormalization()(inner) inner = MaxPooling2D(pool_size=(self.pool_size, self.pool_size), name='max2')(inner) conv_to_rnn_dims = (self.img_w // (self.pool_size**2), (self.img_h // (self.pool_size**2)) * self.conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(self.time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirecitonal GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(self.rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(self.rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(self.rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(self.rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(get_output_size(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) self.y_pred = Activation('softmax', name='softmax')(inner) # Model(inputs=self.input_data, outputs=self.y_pred).summary() labels = Input(name='the_labels', shape=[self.max_text_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')( [self.y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.002, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) self.model = Model( inputs=[self.input_data, labels, input_length, label_length], outputs=loss_out) self.model.compile(loss={ 'ctc': lambda y_true, y_pred: self.y_pred }, optimizer=sgd)
def joint_TCN_resnet(n_classes, max_len, art, img_size=112, gap=1, dropout=0.0, activation="relu"): """Reviced TK'S TCN model. num_block = 2. initial_conv_num=64. Args: n_classes: number of classes for this kind of label. feat_dim: the dumention of the feature. max_len: the number of frames for each video. Returns: model: uncompiled model.""" if K.image_dim_ordering() == 'tf': ROW_AXIS = 1 CHANNEL_AXIS = 2 else: ROW_AXIS = 2 CHANNEL_AXIS = 1 if art == 'V1': initial_stride = 1 initial_filter_dim = 4 initial_num = 64 config = [[(1, 4, 64)], [(1, 4, 64)], [(1, 4, 64)], [(2, 4, 128)], [(1, 4, 128)], [(1, 4, 128)], [(2, 4, 256)], [(1, 4, 256)], [(1, 4, 256)]] elif art == 'V2': initial_stride = 1 initial_filter_dim = 2 initial_num = 256 config = [ [(1, 2, initial_num)], [(1, 2, initial_num)], [(2, 2, initial_num * 2)], [(1, 2, initial_num * 2)], ] elif art == 'V3': initial_stride = 1 initial_filter_dim = 2 initial_num = 128 config = [ [(1, 2, initial_num)], [(1, 2, initial_num)], [(2, 2, initial_num * 2)], [(1, 2, initial_num * 2)], ] elif art == 'V4': initial_stride = 1 initial_filter_dim = 4 initial_num = 64 config = [ [(1, 4, initial_num)], [(1, 4, initial_num)], [(2, 4, initial_num * 2)], [(1, 4, initial_num * 2)], [(2, 4, initial_num * 4)], [(1, 4, initial_num * 4)], ] elif art == 'V5': initial_stride = 1 initial_filter_dim = 4 initial_num = 64 config = [ [(1, 4, initial_num)], [(1, 4, initial_num)], [(1, 4, initial_num)], [(1, 4, initial_num)], [(2, 4, initial_num * 2)], [(1, 4, initial_num * 2)], [(1, 4, initial_num * 2)], [(1, 4, initial_num * 2)], [(2, 4, initial_num * 4)], [(1, 4, initial_num * 4)], [(1, 4, initial_num * 4)], [(1, 4, initial_num * 4)], ] elif art == 'V6': initial_stride = 1 initial_filter_dim = 6 initial_num = 64 config = [ [(1, 6, initial_num)], [(1, 6, initial_num)], [(1, 6, initial_num)], [(2, 6, initial_num * 2)], [(1, 6, initial_num * 2)], [(1, 6, initial_num * 2)], [(2, 6, initial_num * 4)], [(1, 6, initial_num * 4)], [(1, 6, initial_num * 4)], ] elif art == 'V7': initial_stride = 1 initial_filter_dim = 3 initial_num = 64 config = [ [(1, 3, initial_num)], [(1, 3, initial_num)], [(1, 3, initial_num)], [(2, 3, initial_num * 2)], [(1, 3, initial_num * 2)], [(1, 3, initial_num * 2)], [(2, 3, initial_num * 4)], [(1, 3, initial_num * 4)], [(1, 3, initial_num * 4)], ] def slice(x, index): return x[:, index, :, :, :] input = Input(shape=(max_len, img_size, img_size, 3)) video = input # feature = K.placeholder((None,1,487)) feature = [] # video_batch = K.permute_dimensions(video, (1,0,2,3,4)) # video_batch = Permute() # video_batch = Reshape((max_len,img_size,img_size,3))(video) # print 'video_batch', video_batch.shape # frame = video[0] print 'video shape', video.shape for i in range(max_len): frame = Lambda(slice, output_shape=(112, 112, 3), arguments={'index': i})(video) print 'frame.shape: ', frame.shape frame = Convolution2D(64, 3, activation='relu', border_mode='same', name='conv1' + str(i), input_shape=(img_size, img_size, 3))(frame) frame = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool1' + str(i))(frame) # 2nd layer group frame = Convolution2D(128, 3, activation='relu', border_mode='same', name='conv2' + str(i))(frame) frame = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool2' + str(i))(frame) # 3rd layer group frame = Convolution2D(256, 3, activation='relu', border_mode='same', name='conv3a' + str(i))(frame) frame = Convolution2D(256, 3, activation='relu', border_mode='same', name='conv3b' + str(i))(frame) frame = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool3' + str(i))(frame) # 4th layer group frame = Convolution2D(512, 3, activation='relu', border_mode='same', name='conv4a' + str(i))(frame) frame = Convolution2D(512, 3, activation='relu', border_mode='same', name='conv4b' + str(i))(frame) frame = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool4' + str(i))(frame) # 5th layer group frame = Convolution2D(512, 3, activation='relu', border_mode='same', name='conv5a' + str(i))(frame) frame = Convolution2D(512, 3, activation='relu', border_mode='same', name='conv5b' + str(i))(frame) frame = ZeroPadding2D(padding=((0, 1), (0, 1)), name='zeropad5' + str(i))(frame) frame = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode='valid', name='pool5' + str(i))(frame) frame = Flatten()(frame) # FC layers group frame = Dense(4096, activation='relu', name='fc6' + str(i))(frame) frame = Dropout(.5)(frame) frame = Dense(4096, activation='relu', name='fc7' + str(i))(frame) frame = Dropout(.5)(frame) frame = Dense(487, activation='softmax', name='fc8' + str(i))(frame) print 'Dense ', frame.shape # frame = K.expand_dims(frame, axis=1) frame = Reshape((-1, 487))(frame) print 'expand_dims ', frame.shape # print 'frame shape after expend dim ', frame.shape # feature = K.concatenate(frame, axis=1) feature.append(frame) # feature = K.concatenate(feature, axis=1) # feature = Concatenate(axis=1)(feature) feature = concatenate(feature, axis=1) # feature = frame # print 'feature.shape ', feature.output_shape model = feature # model = K.expand_dims(feature, axis=0) print 'model', model.shape model = Conv1D(initial_num, initial_filter_dim, strides=initial_stride, padding="same", kernel_initializer="he_normal")(model) for depth in range(0, len(config)): for stride, filter_dim, num in config[depth]: bn = BatchNormalization(axis=CHANNEL_AXIS)(model) relu = Activation(activation)(bn) dr = Dropout(dropout)(relu) res = Conv1D(num, filter_dim, strides=stride, padding="same", kernel_initializer="he_normal")(dr) res_shape = K.int_shape(res) model_shape = K.int_shape(model) if res_shape[CHANNEL_AXIS] != model_shape[CHANNEL_AXIS]: model = Conv1D(num, 1, strides=stride, padding="same", kernel_initializer="he_normal")(model) model = add([model, res]) bn = BatchNormalization(axis=CHANNEL_AXIS)(model) model = Activation(activation)(bn) if gap: pool_window_shape = K.int_shape(model) gap = AveragePooling1D(pool_window_shape[ROW_AXIS], strides=1)(model) flatten = Flatten()(gap) else: flatten = Flatten()(model) dense = Dense(units=n_classes, activation="softmax", kernel_initializer="he_normal")(flatten) print 'dense', dense.shape model = Model(inputs=video, outputs=dense) return model
def __init__(self, img_w=512, labeltype_hinting=True, verbose=1): # Input Parameters self.img_h = 64 self.words_per_epoch = 10 self.val_split = 0.2 self.val_words = int(self.words_per_epoch * (self.val_split)) # Network parameters self.conv_filters = 16 self.kernel_size = (3, 3) self.pool_size = 2 self.time_dense_size = 32 self.rnn_size = 512 self.minibatch_size = 32 if K.image_data_format() == 'channels_first': input_shape = (1, img_w, self.img_h) else: input_shape = (img_w, self.img_h, 1) self.img_gen = TextImageGenerator( monogram_file=os.path.join(os.getcwd(), 'wordlist.txt'), bigram_file=os.path.join(os.getcwd(), 'bigram_wordlist.txt'), minibatch_size=32, img_w=img_w, img_h=self.img_h, downsample_factor=(self.pool_size**2), val_split=self.words_per_epoch - self.val_words) act = 'relu' self.input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(self.conv_filters, self.kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(self.input_data) inner = MaxPooling2D(pool_size=(self.pool_size, self.pool_size), name='max1')(inner) inner = Conv2D(self.conv_filters, self.kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(self.pool_size, self.pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (self.pool_size**2), (self.img_h // (self.pool_size**2)) * self.conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(self.time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirecitonal GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(self.rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(self.rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(self.rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(self.rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: self.inner = Dense(self.img_gen.get_output_size(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(self.inner) Model(inputs=self.input_data, outputs=y_pred).summary() labels = Input(name='the_labels', shape=[self.img_gen.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence #use one of these two sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) #adam= Adam(lr=0.02, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6, clipnorm=5) self.model = Model( inputs=[self.input_data, labels, input_length, label_length], outputs=loss_out) self.test_func = K.function([self.input_data], [y_pred])
def build_model( baseline_cnn = False ): #Based on kernel https://www.kaggle.com/devm2024/keras-model-for-beginners-0-210-on-lb-eda-r-d image_input = Input( shape = (75, 75, 3), name = 'images' ) angle_input = Input( shape = [1], name = 'angle' ) activation = 'elu' bn_momentum = 0.99 # Simple CNN as baseline model if baseline_cnn: model = Sequential() model.add( Conv2D(16, kernel_size = (3, 3), activation = 'relu', input_shape = (75, 75, 3)) ) model.add( BatchNormalization(momentum = bn_momentum) ) model.add( MaxPooling2D(pool_size = (3, 3), strides = (2, 2)) ) model.add( Dropout(0.2) ) model.add( Conv2D(32, kernel_size = (3, 3), activation = 'relu') ) model.add( BatchNormalization(momentum = bn_momentum) ) model.add( MaxPooling2D(pool_size = (2, 2), strides = (2, 2)) ) model.add( Dropout(0.2) ) model.add( Conv2D(64, kernel_size = (3, 3), activation = 'relu') ) model.add( BatchNormalization(momentum = bn_momentum) ) model.add( MaxPooling2D(pool_size = (2, 2), strides = (2, 2)) ) model.add( Dropout(0.2) ) model.add( Conv2D(128, kernel_size = (3, 3), activation = 'relu') ) model.add( BatchNormalization(momentum = bn_momentum) ) model.add( MaxPooling2D(pool_size = (2, 2), strides = (2, 2)) ) model.add( Dropout(0.2) ) model.add( Flatten() ) model.add( Dense(256, activation = 'relu') ) model.add( BatchNormalization(momentum = bn_momentum) ) model.add( Dropout(0.3) ) model.add( Dense(128, activation = 'relu') ) model.add( BatchNormalization(momentum = bn_momentum) ) model.add( Dropout(0.3) ) model.add( Dense(1, activation = 'sigmoid') ) opt = Adam( lr = 1e-3, beta_1 = .9, beta_2 = .999, decay = 1e-3 ) model.compile( loss = 'binary_crossentropy', optimizer = opt, metrics = ['accuracy'] ) model.summary() else: img_1 = Conv2D( 32, kernel_size = (3, 3), activation = activation, padding = 'same' ) ((BatchNormalization(momentum=bn_momentum) ) ( image_input) ) img_1 = MaxPooling2D( (2,2)) (img_1 ) img_1 = Dropout( 0.2 )( img_1 ) img_1 = Conv2D( 64, kernel_size = (3, 3), activation = activation, padding = 'same' ) ( (BatchNormalization(momentum=bn_momentum)) (img_1) ) img_1 = MaxPooling2D( (2,2) ) ( img_1 ) img_1 = Dropout( 0.2 )( img_1 ) # Residual block img_2 = Conv2D( 128, kernel_size = (3, 3), activation = activation, padding = 'same' ) ( (BatchNormalization(momentum=bn_momentum)) (img_1) ) img_2 = Dropout(0.2) ( img_2 ) img_2 = Conv2D( 64, kernel_size = (3, 3), activation = activation, padding = 'same' ) ( (BatchNormalization(momentum=bn_momentum)) (img_2) ) img_2 = Dropout(0.2) ( img_2 ) img_res = add( [img_1, img_2] ) # Filter resudial output img_res = Conv2D( 128, kernel_size = (3, 3), activation = activation ) ( (BatchNormalization(momentum=bn_momentum)) (img_res) ) img_res = MaxPooling2D( (2,2) ) ( img_res ) img_res = Dropout( 0.2 )( img_res ) img_res = GlobalMaxPooling2D() ( img_res ) cnn_out = ( Concatenate()( [img_res, BatchNormalization(momentum=bn_momentum)(angle_input)]) ) dense_layer = Dropout( 0.5 ) ( BatchNormalization(momentum=bn_momentum) (Dense(256, activation = activation) (cnn_out)) ) dense_layer = Dropout( 0.5 ) ( BatchNormalization(momentum=bn_momentum) (Dense(64, activation = activation) (dense_layer)) ) output = Dense( 1, activation = 'sigmoid' ) ( dense_layer ) model = Model( [image_input, angle_input], output ) opt = Adam( lr = 1e-3, beta_1 = .9, beta_2 = .999, decay = 1e-3 ) model.compile( loss = 'binary_crossentropy', optimizer = opt, metrics = ['accuracy'] ) model.summary() return model
def RCL_block(l_settings, l, pool=True, increase_dim=False, layer_num=None): ## if layer_num==1: ## print "\nCreating Recurrent blocks ...", input_num_filters = l_settings.output_shape[1] if increase_dim: out_num_filters = input_num_filters * 2 else: out_num_filters = input_num_filters conv1 = Conv2D(out_num_filters, 3, strides=3, padding='same', data_format='channels_last') stack1 = conv1(l) stack2 = BatchNormalization()(stack1) stack3 = PReLU()(stack2) conv2 = Conv2D(out_num_filters, filtersize, strides=1, padding='same', kernel_initializer='he_normal', data_format='channels_last') stack4 = conv2(stack3) stack5 = add([stack1, stack4]) stack6 = BatchNormalization()(stack5) stack7 = PReLU()(stack6) conv3 = Conv2D(out_num_filters, filtersize, strides=1, padding='same', weights=conv2.get_weights(), data_format='channels_last') stack8 = conv3(stack7) stack9 = add([stack1, stack8]) stack10 = BatchNormalization()(stack9) stack11 = PReLU()(stack10) conv4 = Conv2D(out_num_filters, filtersize, strides=1, padding='same', weights=conv2.get_weights(), data_format='channels_last') stack12 = conv4(stack11) stack13 = add([stack1, stack12]) stack14 = BatchNormalization()(stack13) stack15 = PReLU()(stack14) # will pool layers if recurrent layer number multiple of 2 if pool: stack16 = MaxPooling2D((2, 2), padding='same')(stack15) stack17 = Dropout(0.1)(stack16) else: stack17 = Dropout(0.1)(stack15) return stack17
"sequence_len": tf.constant(batch_size*[max_len]) }, signature="tokens", as_dict=True)["elmo"] from keras.models import Model, Input from keras.layers.merge import add from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional, Lambda input_text = Input(shape=(max_len,), dtype=tf.string) embedding = Lambda(ElmoEmbedding, output_shape=(max_len, 1024))(input_text) x = Bidirectional(LSTM(units=512, return_sequences=True, recurrent_dropout=0.2, dropout=0.2))(embedding) x_rnn = Bidirectional(LSTM(units=512, return_sequences=True, recurrent_dropout=0.2, dropout=0.2))(x) x = add([x, x_rnn]) # residual connection to the first biLSTM out = TimeDistributed(Dense(n_tags, activation="softmax"))(x) model = Model(input_text, out) model.load_weights('my_model_weights.h5', by_name=True) model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) X_tr, X_val = X_tr[:1213*batch_size], X_tr[-135*batch_size:] y_tr, y_val = y_tr[:1213*batch_size], y_tr[-135*batch_size:] y_tr = y_tr.reshape(y_tr.shape[0], y_tr.shape[1], 1) y_val = y_val.reshape(y_val.shape[0], y_val.shape[1], 1) history = model.fit(np.array(X_tr), y_tr, validation_data=(np.array(X_val), y_val), batch_size=batch_size, epochs=3, verbose=1) from seqeval.metrics import precision_score, recall_score, f1_score, classification_report
def train(x_train, y_train, x_val, y_val, img_w, epochs=10, batch_size=64): # Input Parameters img_h = 64 words_per_epoch = 50000 val_split = 0.2 val_words = int(words_per_epoch * (val_split)) # Network Parameters conv_filters = 16 kernel_size = (3, 3) pool_size = 2 time_dense_size = 32 rnn_size = 512 minibatch_size = 200 # x_train = x # y_train = y # x_train, y_train, x_val, y_val = get_train_test_data(X, Y, .25) if K.image_data_format() == 'channels_first': input_shape = (1, img_w, img_h) else: input_shape = (img_w, img_h, 1) # fdir = oc.path.dirname(get_file('wordlists.tgz', # origin='wordlists.tgz', untar=True)) fdir = 'wordlists' act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirectional GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(get_output_size(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) Model(inputs=input_data, outputs=y_pred).summary() labels = Input(name='the_labels', shape=[absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda( ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) test_func = K.function([input_data], [y_pred]) filepath = "weight.best.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') csv_logger = CSVLogger('training.csv') callbacl_list = [checkpoint, csv_logger] history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_val, y_val), callbacks=callbacl_list) return model, test_func
def get_Model(training): input_shape = (img_w, img_h, 1) # (128, 64, 1) # Make Networkw inputs = Input(name='the_input', shape=input_shape, dtype='float32') # (None, 128, 64, 1) # Convolution layer (VGG) inner = Conv2D(64, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')(inputs) # (None, 128, 64, 64) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner) # (None,64, 32, 64) inner = Conv2D(128, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')(inner) # (None, 64, 32, 128) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner) # (None, 32, 16, 128) inner = Conv2D(256, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')(inner) # (None, 32, 16, 256) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = Conv2D(256, (3, 3), padding='same', name='conv4', kernel_initializer='he_normal')(inner) # (None, 32, 16, 256) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner) # (None, 32, 8, 256) inner = Conv2D(512, (3, 3), padding='same', name='conv5', kernel_initializer='he_normal')(inner) # (None, 32, 8, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = Conv2D(512, (3, 3), padding='same', name='conv6')(inner) # (None, 32, 8, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(1, 2), name='max4')(inner) # (None, 32, 4, 512) inner = Conv2D(512, (2, 2), padding='same', kernel_initializer='he_normal', name='con7')(inner) # (None, 32, 4, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) # CNN to RNN inner = Reshape(target_shape=((32, 2048)), name='reshape')(inner) # (None, 32, 2048) inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner) # (None, 32, 64) # RNN layer lstm_1 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner) # (None, 32, 512) lstm_1b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner) lstm1_merged = add([lstm_1, lstm_1b]) # (None, 32, 512) lstm1_merged = BatchNormalization()(lstm1_merged) lstm_2 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged) lstm_2b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged) lstm2_merged = concatenate([lstm_2, lstm_2b]) # (None, 32, 1024) lstm_merged = BatchNormalization()(lstm2_merged) # transforms RNN output to character activations: inner = Dense(num_classes, kernel_initializer='he_normal',name='dense2')(lstm2_merged) #(None, 32, 63) y_pred = Activation('softmax', name='softmax')(inner) labels = Input(name='the_labels', shape=[max_text_len], dtype='float32') # (None ,8) input_length = Input(name='input_length', shape=[1], dtype='int64') # (None, 1) label_length = Input(name='label_length', shape=[1], dtype='int64') # (None, 1) # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1) if training: return Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out) else: return Model(inputs=[inputs], outputs=y_pred)
def decoder_model(): inputs = Input(shape=(int(VIDEO_LENGTH / 2), 16, 26, 64)) # 10x16x16 convlstm_1 = ConvLSTM2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', return_sequences=True, recurrent_dropout=0.2)(inputs) x = TimeDistributed(BatchNormalization())(convlstm_1) out_1 = TimeDistributed(Activation('tanh'))(x) # x = TimeDistributed(LeakyReLU(alpha=0.2))(x) convlstm_2 = ConvLSTM2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', return_sequences=True, recurrent_dropout=0.2)(out_1) x = TimeDistributed(BatchNormalization())(convlstm_2) out_2 = TimeDistributed(Activation('tanh'))(x) # h_2 = TimeDistributed(LeakyReLU(alpha=0.2))(x) # out_2 = UpSampling3D(size=(1, 2, 2))(h_2) res_1 = add([out_1, out_2]) # res_1 = LeakyReLU(alpha=0.2)(res_1) res_1 = UpSampling3D(size=(1, 2, 2))(res_1) # 10x32x32 convlstm_3a = ConvLSTM2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', return_sequences=True, recurrent_dropout=0.2)(res_1) x = TimeDistributed(BatchNormalization())(convlstm_3a) out_3a = TimeDistributed(Activation('tanh'))(x) # h_3 = TimeDistributed(LeakyReLU(alpha=0.2))(x) # out_3a = UpSampling3D(size=(1, 2, 2))(h_3) convlstm_3b = ConvLSTM2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', return_sequences=True, recurrent_dropout=0.2)(out_3a) x = TimeDistributed(BatchNormalization())(convlstm_3b) out_3b = TimeDistributed(Activation('tanh'))(x) # h_3 = TimeDistributed(LeakyReLU(alpha=0.2))(x) # out_3 = UpSampling3D(size=(1, 2, 2))(h_3) res_2 = add([out_3a, out_3b]) # res_2 = LeakyReLU(alpha=0.2)(res_2) res_2 = UpSampling3D(size=(1, 2, 2))(res_2) # 10x64x64 convlstm_4a = ConvLSTM2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same', return_sequences=True, recurrent_dropout=0.2)(res_2) x = TimeDistributed(BatchNormalization())(convlstm_4a) out_4a = TimeDistributed(Activation('tanh'))(x) # h_4 = TimeDistributed(LeakyReLU(alpha=0.2))(x) convlstm_4b = ConvLSTM2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same', return_sequences=True, recurrent_dropout=0.2)(out_4a) x = TimeDistributed(BatchNormalization())(convlstm_4b) out_4b = TimeDistributed(Activation('tanh'))(x) # h_4 = TimeDistributed(LeakyReLU(alpha=0.2))(x) res_3 = add([out_4a, out_4b]) # res_3 = LeakyReLU(alpha=0.2)(res_3) res_3 = UpSampling3D(size=(1, 2, 2))(res_3) # 10x128x128 convlstm_5 = ConvLSTM2D(filters=3, kernel_size=(3, 3), strides=(1, 1), padding='same', return_sequences=True, recurrent_dropout=0.2)(res_3) predictions = TimeDistributed(Activation('tanh'))(convlstm_5) model = Model(inputs=inputs, outputs=predictions) return model
def get_model(img_w,img_h,minibatch_size,pool_size): conv_filters = 20 kernel_size = (2, 2) time_dense_size = 32 rnn_size = 256 input_shape = (img_w, img_h, 1) img_gen = util.TextImageGenerator(minibatch_size=minibatch_size, img_w=img_w, img_h=img_h, downsample_factor=(pool_size ** 2), absolute_max_string_len=12 ) act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) inner = Dense(time_dense_size, activation=act, name='dense1')(inner) gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) Model(inputs=input_data, outputs=y_pred).summary() labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) test_func = K.function([input_data], [y_pred]) return model, test_func