Esempio n. 1
0
def basic_cnn(nb_words, EMBEDDING_DIM, \
              embedding_matrix, MAX_SEQUENCE_LENGTH, \
              num_rnn, num_dense, rate_drop_rnn, \
              rate_drop_dense, act):
    '''
    This is the basic cnn model 

    model: input layer; embedding layer; several cnn layer; dense layer; output layer
    '''
    embedding_layer = Embedding(nb_words,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=True)

    conv1 = Conv1D(filters=128,
                   kernel_size=1,
                   padding='same',
                   activation='relu')
    conv2 = Conv1D(filters=128,
                   kernel_size=2,
                   padding='same',
                   activation='relu')
    conv3 = Conv1D(filters=128,
                   kernel_size=3,
                   padding='same',
                   activation='relu')
    conv4 = Conv1D(filters=128,
                   kernel_size=4,
                   padding='same',
                   activation='relu')
    conv5 = Conv1D(filters=32,
                   kernel_size=5,
                   padding='same',
                   activation='relu')
    conv6 = Conv1D(filters=32,
                   kernel_size=6,
                   padding='same',
                   activation='relu')

    sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
    embedded_sequences_1 = embedding_layer(sequence_1_input)

    sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
    embedded_sequences_2 = embedding_layer(sequence_2_input)

    conv1a = conv1(embedded_sequences_1)
    glob1a = GlobalAveragePooling1D()(conv1a)
    conv1b = conv1(embedded_sequences_2)
    glob1b = GlobalAveragePooling1D()(conv1b)

    conv2a = conv2(embedded_sequences_1)
    glob2a = GlobalAveragePooling1D()(conv2a)
    conv2b = conv2(embedded_sequences_2)
    glob2b = GlobalAveragePooling1D()(conv2b)

    conv3a = conv3(embedded_sequences_1)
    glob3a = GlobalAveragePooling1D()(conv3a)
    conv3b = conv3(embedded_sequences_2)
    glob3b = GlobalAveragePooling1D()(conv3b)

    conv4a = conv4(embedded_sequences_1)
    glob4a = GlobalAveragePooling1D()(conv4a)
    conv4b = conv4(embedded_sequences_2)
    glob4b = GlobalAveragePooling1D()(conv4b)

    conv5a = conv5(embedded_sequences_1)
    glob5a = GlobalAveragePooling1D()(conv5a)
    conv5b = conv5(embedded_sequences_2)
    glob5b = GlobalAveragePooling1D()(conv5b)

    conv6a = conv6(embedded_sequences_1)
    glob6a = GlobalAveragePooling1D()(conv6a)
    conv6b = conv6(embedded_sequences_2)
    glob6b = GlobalAveragePooling1D()(conv6b)

    mergea = concatenate([glob1a, glob2a, glob3a, glob4a, glob5a, glob6a])
    mergeb = concatenate([glob1b, glob2b, glob3b, glob4b, glob5b, glob6b])

    # We take the explicit absolute difference between the two sentences
    # Furthermore we take the multiply different entries to get a different measure of equalness
    diff = Lambda(lambda x: K.abs(x[0] - x[1]),
                  output_shape=(4 * 128 + 2 * 32, ))([mergea, mergeb])
    mul = Lambda(lambda x: x[0] * x[1],
                 output_shape=(4 * 128 + 2 * 32, ))([mergea, mergeb])

    merge = concatenate([diff, mul])

    # The MLP that determines the outcome
    x = Dropout(0.2)(merge)
    x = BatchNormalization()(x)
    x = Dense(300, activation='relu')(x)

    x = Dropout(0.2)(x)
    x = BatchNormalization()(x)
    preds = Dense(3, activation='softmax')(x)

    ########################################
    ## train the model
    ########################################
    model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds)
    model.compile(loss='categorical_crossentropy',
                  optimizer='nadam',
                  metrics=['acc'])
    model.summary()
    # print(STAMP)
    return model
Esempio n. 2
0
def train_test():
    # generate_img()
    imgs, labels, labels_encode = load_img()
    
    # labels_input = Input([None], dtype='int32')

    img_w = 156
    img_h = 64
    conv_filters = 16
    kernel_size = (3, 3)
    input_shape = (img_w, img_h, 1)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512

    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirectional GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
    gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(len(chars) + 1, kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    base_model = Model(inputs=input_data, outputs=y_pred)

    labels = Input(name='the_labels', shape=[4], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    fit_model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    fit_model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)




    # adadelta = Adadelta(lr=0.05)
    # fit_model.compile(
    #     loss=lambda y_true, y_pred: y_pred,
    #     optimizer=adadelta)
    # fit_model.summary()
    # import sys
    # sys.exit()
    
    fit_model.fit_generator(
    generate_data(imgs, labels_encode, 32), 
    epochs=10, 
    steps_per_epoch=100, 
    verbose=1)
    fit_model.save('fit_model.h5')
    base_model.save('model.h5')
    def build_nn_model(
        self,
        element_dim=103,
        conv_window=3,
        conv_filters=64,
        rnn_dim=64,
        recipe_latent_dim=8,
        intermediate_dim=64,
        latent_dim=8,
        max_material_length=10,
        charset_size=50,
    ):

        self.latent_dim = latent_dim
        self.recipe_latent_dim = recipe_latent_dim
        self.original_dim = max_material_length * charset_size

        x_mat = Input(shape=(max_material_length, charset_size),
                      name="material_in")
        conv_x1 = Conv1D(conv_filters,
                         conv_window,
                         padding="valid",
                         activation="relu",
                         name='conv_enc_1')(x_mat)
        conv_x2 = Conv1D(conv_filters,
                         conv_window,
                         padding="valid",
                         activation="relu",
                         name='conv_enc_2')(conv_x1)
        conv_x3 = Conv1D(conv_filters,
                         conv_window,
                         padding="valid",
                         activation="relu",
                         name='conv_enc_3')(conv_x2)
        h_flatten = Flatten()(conv_x3)
        h = Dense(intermediate_dim, activation="relu",
                  name="hidden_enc")(h_flatten)

        z_mean_func = Dense(latent_dim, name="means_enc")
        z_log_var_func = Dense(latent_dim, name="vars_enc")

        z_mean = z_mean_func(h)
        z_log_var = z_log_var_func(h)

        def sample(args):
            z_mean, z_log_var = args
            epsilon = K.random_normal(shape=(latent_dim, ),
                                      mean=0.0,
                                      stddev=1.0)
            return z_mean + K.exp(z_log_var / 2) * epsilon

        z = Lambda(sample, name="lambda_sample")([z_mean, z_log_var])
        c_element = Input(shape=(element_dim, ), name="cond_element_in")
        c_latent_recipe = Input(shape=(recipe_latent_dim, ),
                                name="cond_latent_recipe_in")

        z_conditional = Concatenate(name="concat_cond")(
            [z, c_latent_recipe, c_element])

        decoder_h = Dense(intermediate_dim,
                          activation="relu",
                          name="hidden_dec")
        decoder_h_repeat = RepeatVector(max_material_length, name="h_rep_dec")
        decoder_h_gru_1 = GRU(rnn_dim,
                              return_sequences=True,
                              name="recurrent_dec_1")
        decoder_h_gru_2 = GRU(rnn_dim,
                              return_sequences=True,
                              name="recurrent_dec_2")
        decoder_h_gru_3 = GRU(rnn_dim,
                              return_sequences=True,
                              name="recurrent_dec_3")
        decoder_mat = TimeDistributed(Dense(charset_size,
                                            activation='softmax'),
                                      name="means_material_dec")

        h_decoded = decoder_h(z_conditional)
        h_decode_repeat = decoder_h_repeat(h_decoded)
        gru_h_decode_1 = decoder_h_gru_1(h_decode_repeat)
        gru_h_decode_2 = decoder_h_gru_2(gru_h_decode_1)
        gru_h_decode_3 = decoder_h_gru_3(gru_h_decode_2)
        x_decoded_mat = decoder_mat(gru_h_decode_3)

        def vae_xent_loss(x, x_decoded_mean):
            x = K.flatten(x)
            x_decoded_mean = K.flatten(x_decoded_mean)
            rec_loss = self.original_dim * metrics.binary_crossentropy(
                x, x_decoded_mean)
            kl_loss = -0.5 * K.mean(
                1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
            return rec_loss + kl_loss

        encoder = Model(inputs=[x_mat], outputs=[z_mean])

        decoder_x_input = Input(shape=(latent_dim, ))

        decoder_inputs = Concatenate(name="concat_cond_dec")(
            [decoder_x_input, c_latent_recipe, c_element])
        _h_decoded = decoder_h(decoder_inputs)
        _h_decode_repeat = decoder_h_repeat(_h_decoded)
        _gru_h_decode_1 = decoder_h_gru_1(_h_decode_repeat)
        _gru_h_decode_2 = decoder_h_gru_2(_gru_h_decode_1)
        _gru_h_decode_3 = decoder_h_gru_3(_gru_h_decode_2)
        _x_decoded_mat = decoder_mat(_gru_h_decode_3)

        decoder = Model(inputs=[decoder_x_input, c_latent_recipe, c_element],
                        outputs=[_x_decoded_mat])

        vae = Model(inputs=[x_mat, c_latent_recipe, c_element],
                    outputs=[x_decoded_mat])

        vae.compile(optimizer=Adam(lr=0.001,
                                   beta_1=0.9,
                                   beta_2=0.999,
                                   epsilon=None,
                                   decay=0.0,
                                   amsgrad=True),
                    loss=vae_xent_loss,
                    metrics=['categorical_accuracy'])

        self.vae = vae
        self.encoder = encoder
        self.decoder = decoder
Esempio n. 4
0
def gamba_unet():

    from keras import regularizers
    from keras.activations import softmax
    from keras.layers import Input, Conv2D, Conv2DTranspose, BatchNormalization, Concatenate, Lambda, Activation, Reshape, Add
    from keras.models import Model

    inputs = Input(shape=(432, 432, 2))
    weight_matrix = Lambda(lambda z: z[:, :, :, 1])(inputs)
    weight_matrix = Reshape((432, 432, 1))(weight_matrix)
    reshape = Lambda(lambda z: z[:, :, :, 0])(inputs)
    reshape = Reshape((432, 432, 1))(reshape)

    reg = 0.01

    #reshape=Dropout(0.2)(reshape)   ## Hyperparameter optimization only on visible layer
    Level1_l = Conv2D(filters=32,
                      kernel_size=(1, 1),
                      strides=1,
                      kernel_regularizer=regularizers.l2(reg))(reshape)
    Level1_l = BatchNormalization(axis=-1)(Level1_l)
    Level1_l_shortcut = Level1_l  #Level1_l#
    Level1_l = Activation('relu')(Level1_l)
    Level1_l = Conv2D(
        filters=32,
        kernel_size=(3, 3),
        strides=1,
        padding='same',
        kernel_regularizer=regularizers.l2(reg))(
            Level1_l
        )  #(Level1_l)# ##  kernel_initializer='glorot_uniform' is the default
    Level1_l = BatchNormalization(axis=-1)(Level1_l)
    #Level1_l=InstanceNormalization(axis=-1)(Level1_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level1_l = Activation('relu')(Level1_l)
    #Level1_l=Dropout(0.5)(Level1_l)
    Level1_l = Conv2D(filters=32,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level1_l)
    Level1_l = BatchNormalization(axis=-1)(Level1_l)
    #Level1_l=InstanceNormalization(axis=-1)(Level1_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level1_l = Add()([Level1_l, Level1_l_shortcut])
    Level1_l = Activation('relu')(Level1_l)

    Level2_l = Conv2D(filters=64,
                      kernel_size=(2, 2),
                      strides=2,
                      kernel_regularizer=regularizers.l2(reg))(Level1_l)
    Level2_l = BatchNormalization(axis=-1)(Level2_l)
    Level2_l_shortcut = Level2_l
    Level2_l = Activation('relu')(Level2_l)
    Level2_l = Conv2D(filters=64,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level2_l)
    Level2_l = BatchNormalization(axis=-1)(Level2_l)
    #Level2_l=InstanceNormalization(axis=-1)(Level2_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level2_l = Activation('relu')(Level2_l)
    #Level2_l=Dropout(0.5)(Level2_l)
    Level2_l = Conv2D(filters=64,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level2_l)
    Level2_l = BatchNormalization(axis=-1)(Level2_l)
    #Level2_l=InstanceNormalization(axis=-1)(Level2_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level2_l = Add()([Level2_l, Level2_l_shortcut])
    Level2_l = Activation('relu')(Level2_l)

    Level3_l = Conv2D(filters=128,
                      kernel_size=(2, 2),
                      strides=2,
                      kernel_regularizer=regularizers.l2(reg))(Level2_l)
    Level3_l = BatchNormalization(axis=-1)(Level3_l)
    Level3_l_shortcut = Level3_l
    Level3_l = Activation('relu')(Level3_l)
    Level3_l = Conv2D(filters=128,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level3_l)
    Level3_l = BatchNormalization(axis=-1)(Level3_l)
    #Level3_l=InstanceNormalization(axis=-1)(Level3_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level3_l = Activation('relu')(Level3_l)
    #Level3_l=Dropout(0.5)(Level3_l)
    Level3_l = Conv2D(filters=128,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level3_l)
    Level3_l = BatchNormalization(axis=-1)(Level3_l)
    #Level3_l=InstanceNormalization(axis=-1)(Level3_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level3_l = Add()([Level3_l, Level3_l_shortcut])
    Level3_l = Activation('relu')(Level3_l)

    Level4_l = Conv2D(filters=256,
                      kernel_size=(2, 2),
                      strides=2,
                      kernel_regularizer=regularizers.l2(reg))(Level3_l)
    Level4_l = BatchNormalization(axis=-1)(Level4_l)
    Level4_l_shortcut = Level4_l
    Level4_l = Activation('relu')(Level4_l)
    Level4_l = Conv2D(filters=256,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level4_l)
    Level4_l = BatchNormalization(axis=-1)(Level4_l)
    #Level4_l=InstanceNormalization(axis=-1)(Level4_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level4_l = Activation('relu')(Level4_l)
    #Level4_l=Dropout(0.5)(Level4_l)
    Level4_l = Conv2D(filters=256,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level4_l)
    Level4_l = BatchNormalization(axis=-1)(Level4_l)
    #Level4_l=InstanceNormalization(axis=-1)(Level4_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level4_l = Add()([Level4_l, Level4_l_shortcut])
    Level4_l = Activation('relu')(Level4_l)

    Level5_l = Conv2D(filters=512,
                      kernel_size=(2, 2),
                      strides=2,
                      kernel_regularizer=regularizers.l2(reg))(Level4_l)
    Level5_l = BatchNormalization(axis=-1)(Level5_l)
    Level5_l_shortcut = Level5_l
    Level5_l = Activation('relu')(Level5_l)
    Level5_l = Conv2D(filters=512,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level5_l)
    Level5_l = BatchNormalization(axis=-1)(Level5_l)
    #Level5_l=InstanceNormalization(axis=-1)(Level5_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level5_l = Activation('relu')(Level5_l)
    #Level5_l=Dropout(0.5)(Level5_l)
    Level5_l = Conv2D(filters=512,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level5_l)
    Level5_l = BatchNormalization(axis=-1)(Level5_l)
    #Level5_l=InstanceNormalization(axis=-1)(Level5_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level5_l = Add()([Level5_l, Level5_l_shortcut])
    Level5_l = Activation('relu')(Level5_l)

    Level6_l = Conv2D(filters=1024,
                      kernel_size=(3, 3),
                      strides=3,
                      kernel_regularizer=regularizers.l2(reg))(Level5_l)
    Level6_l = BatchNormalization(axis=-1)(Level6_l)
    Level6_l_shortcut = Level6_l
    Level6_l = Activation('relu')(Level6_l)
    Level6_l = Conv2D(filters=1024,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level6_l)
    Level6_l = BatchNormalization(axis=-1)(Level6_l)
    #Level5_l=InstanceNormalization(axis=-1)(Level5_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level6_l = Activation('relu')(Level6_l)
    #Level5_l=Dropout(0.5)(Level5_l)
    Level6_l = Conv2D(filters=1024,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level6_l)
    Level6_l = BatchNormalization(axis=-1)(Level6_l)
    #Level5_l=InstanceNormalization(axis=-1)(Level5_l)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level6_l = Add()([Level6_l, Level6_l_shortcut])
    Level6_l = Activation('relu')(Level6_l)

    Level5_r = Conv2DTranspose(
        filters=512,
        kernel_size=(3, 3),
        strides=3,
        kernel_regularizer=regularizers.l2(reg))(Level6_l)
    Level5_r = BatchNormalization(axis=-1)(Level5_r)
    Level5_r_shortcut = Level5_r
    #Level4_r=InstanceNormalization(axis=-1)(Level4_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level5_r = Activation('relu')(Level5_r)
    merge5 = Concatenate(axis=-1)([Level5_l, Level5_r])
    Level5_r = Conv2D(filters=512,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(merge5)
    Level5_r = BatchNormalization(axis=-1)(Level5_r)
    #Level4_r=InstanceNormalization(axis=-1)(Level4_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level5_r = Activation('relu')(Level5_r)
    #Level4_r=Dropout(0.5)(Level4_r)
    Level5_r = Conv2D(filters=512,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level5_r)
    Level5_r = BatchNormalization(axis=-1)(Level5_r)
    #Level4_r=InstanceNormalization(axis=-1)(Level4_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level5_r = Add()([Level5_r, Level5_r_shortcut])
    Level5_r = Activation('relu')(Level5_r)

    Level4_r = Conv2DTranspose(
        filters=256,
        kernel_size=(2, 2),
        strides=2,
        kernel_regularizer=regularizers.l2(reg))(Level5_r)
    Level4_r = BatchNormalization(axis=-1)(Level4_r)
    Level4_r_shortcut = Level4_r
    #Level4_r=InstanceNormalization(axis=-1)(Level4_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level4_r = Activation('relu')(Level4_r)
    merge4 = Concatenate(axis=-1)([Level4_l, Level4_r])
    Level4_r = Conv2D(filters=256,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(merge4)
    Level4_r = BatchNormalization(axis=-1)(Level4_r)
    #Level4_r=InstanceNormalization(axis=-1)(Level4_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level4_r = Activation('relu')(Level4_r)
    #Level4_r=Dropout(0.5)(Level4_r)
    Level4_r = Conv2D(filters=256,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level4_r)
    Level4_r = BatchNormalization(axis=-1)(Level4_r)
    #Level4_r=InstanceNormalization(axis=-1)(Level4_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level4_r = Add()([Level4_r, Level4_r_shortcut])
    Level4_r = Activation('relu')(Level4_r)

    Level3_r = Conv2DTranspose(
        filters=128,
        kernel_size=(2, 2),
        strides=2,
        kernel_regularizer=regularizers.l2(reg))(Level4_r)
    Level3_r = BatchNormalization(axis=-1)(Level3_r)
    Level3_r_shortcut = Level3_r
    #Level3_r=InstanceNormalization(axis=-1)(Level3_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level3_r = Activation('relu')(Level3_r)
    merge3 = Concatenate(axis=-1)([Level3_l, Level3_r])
    Level3_r = Conv2D(filters=128,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(merge3)
    Level3_r = BatchNormalization(axis=-1)(Level3_r)
    #Level3_r=InstanceNormalization(axis=-1)(Level3_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level3_r = Activation('relu')(Level3_r)
    #Level3_r=Dropout(0.5)(Level3_r)
    Level3_r = Conv2D(filters=128,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level3_r)
    Level3_r = BatchNormalization(axis=-1)(Level3_r)
    #Level3_r=InstanceNormalization(axis=-1)(Level3_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level3_r = Add()([Level3_r, Level3_r_shortcut])
    Level3_r = Activation('relu')(Level3_r)

    Level2_r = Conv2DTranspose(
        filters=64,
        kernel_size=(2, 2),
        strides=2,
        kernel_regularizer=regularizers.l2(reg))(Level3_r)
    Level2_r = BatchNormalization(axis=-1)(Level2_r)
    Level2_r_shortcut = Level2_r
    #Level2_r=InstanceNormalization(axis=-1)(Level2_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level2_r = Activation('relu')(Level2_r)
    merge2 = Concatenate(axis=-1)([Level2_l, Level2_r])
    Level2_r = Conv2D(filters=64,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(merge2)
    Level2_r = BatchNormalization(axis=-1)(Level2_r)
    #Level2_r=InstanceNormalization(axis=-1)(Level2_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level2_r = Activation('relu')(Level2_r)
    #Level2_r=Dropout(0.5)(Level2_r)
    Level2_r = Conv2D(filters=64,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level2_r)
    Level2_r = BatchNormalization(axis=-1)(Level2_r)
    #Level2_r=InstanceNormalization(axis=-1)(Level2_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level2_r = Add()([Level2_r, Level2_r_shortcut])
    Level2_r = Activation('relu')(Level2_r)

    Level1_r = Conv2DTranspose(
        filters=32,
        kernel_size=(2, 2),
        strides=2,
        kernel_regularizer=regularizers.l2(reg))(Level2_r)
    Level1_r = BatchNormalization(axis=-1)(Level1_r)
    Level1_r_shortcut = Level1_r
    #Level1_r=InstanceNormalization(axis=-1)(Level1_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level1_r = Activation('relu')(Level1_r)
    merge1 = Concatenate(axis=-1)([Level1_l, Level1_r])
    Level1_r = Conv2D(filters=32,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(merge1)
    Level1_r = BatchNormalization(axis=-1)(Level1_r)
    #Level1_r=InstanceNormalization(axis=-1)(Level1_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level1_r = Activation('relu')(Level1_r)
    #Level1_r=Dropout(0.5)(Level1_r)
    Level1_r = Conv2D(filters=32,
                      kernel_size=(3, 3),
                      strides=1,
                      padding='same',
                      kernel_regularizer=regularizers.l2(reg))(Level1_r)
    Level1_r = BatchNormalization()(Level1_r)
    #Level1_r=InstanceNormalization(axis=-1)(Level1_r)  ## Instance Normalization. Use InstanceNormalization() for Layer Normalization.
    Level1_r = Add()([Level1_r, Level1_r_shortcut])
    Level1_r = Activation('relu')(Level1_r)
    output = Conv2D(filters=7,
                    kernel_size=(1, 1),
                    strides=1,
                    kernel_regularizer=regularizers.l2(reg))(Level1_r)
    #output=BatchNormalization(axis=-1)(output)
    output = Lambda(lambda x: softmax(x, axis=-1))(output)
    output = Concatenate(axis=-1)([output, weight_matrix])
    model = Model(inputs=inputs, outputs=output)
    return model
Esempio n. 5
0
def create_model(anchors,
                 class_names,
                 feature_extractor='darknet19',
                 load_pretrained=False,
                 pretrained_path=None,
                 freeze_body=False):
    '''
    returns the body of the model and the model

    # Params:

    load_pretrained: whether or not to load the pretrained model or initialize all weights

    freeze_body: whether or not to freeze all weights except for the last layer's

    # Returns:

    model_body: YOLOv2 with new output layer

    model: YOLOv2 with custom loss Lambda layer

    '''
    num_anchors = len(anchors)

    detectors_mask_shape = (FEAT_H, FEAT_W, num_anchors, 1)
    matching_boxes_shape = (FEAT_H, FEAT_W, num_anchors, 5)

    # Create model input layers.
    image_input = Input(shape=(IMAGE_H, IMAGE_W, 3))
    boxes_input = Input(shape=(None, 5))

    detectors_mask_input = Input(shape=detectors_mask_shape)
    matching_boxes_input = Input(shape=matching_boxes_shape)

    # Create model body.
    if feature_extractor == 'darknet19':
        yolo_model = yolo_body_darknet(
            image_input,
            len(anchors),
            len(class_names),
            network_config=[SHALLOW_DETECTOR, USE_X0_FEATURE])
    elif feature_extractor == 'mobilenet':
        yolo_model = yolo_body_mobilenet(
            image_input,
            len(anchors),
            len(class_names),
            network_config=[SHALLOW_DETECTOR, USE_X0_FEATURE])
    else:
        assert (False)

    # yolo_model.summary()

    if load_pretrained:
        if pretrained_path:
            yolo_model.load_weights(pretrained_path)
        else:
            print('No pretrained weights!')

    if freeze_body:
        for layer in yolo_model.layers:
            layer.trainable = False

    model_body = Model(image_input, yolo_model.output)
    # model_body.summary()
    # Place model loss on CPU to reduce GPU memory usage.
    with tf.device('/cpu:0'):
        # TODO: Replace Lambda with custom Keras layer for loss.
        model_loss = Lambda(yolo_loss,
                            output_shape=(1, ),
                            name='yolo_loss',
                            arguments={
                                'anchors': anchors,
                                'num_classes': len(class_names)
                            })([
                                model_body.output, boxes_input,
                                detectors_mask_input, matching_boxes_input
                            ])

    model = Model([
        model_body.input, boxes_input, detectors_mask_input,
        matching_boxes_input
    ], model_loss)
    return model_body, model
from keras.models import Sequential, Model
from keras.layers import Cropping2D, Lambda
from keras.layers import Dense, Flatten, Dropout, MaxPooling2D
from keras.layers.convolutional import Conv2D
from keras.callbacks import ModelCheckpoint

# Adding teh original Nvidia  model to train on the sample data for completing track 1

model = Sequential()
# trim image to only see section with road
# Did not help much
#model.add(Cropping2D(cropping=((50,20), (0,0))))

# preprocessing
model.add(Lambda(lambda x: x / 127.5 - 1.0, input_shape=(160, 320, 3)))

model.add(Conv2D(24, 5, 5, activation='elu', subsample=(2, 2)))
model.add(Conv2D(36, 5, 5, activation='elu', subsample=(2, 2)))
model.add(Conv2D(48, 5, 5, activation='elu', subsample=(2, 2)))
model.add(Conv2D(64, 3, 3, activation='elu'))
model.add(Conv2D(64, 3, 3, activation='elu'))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(100, activation='elu'))
model.add(Dense(50, activation='elu'))
model.add(Dense(10, activation='elu'))
model.add(Dense(1))

# Adding checkpoint
Esempio n. 7
0

# compile and train the model using the generator function
train_generator = generator(train_samples_x, train_samples_y, batch_size=128)
validation_generator = generator(validation_samples_x,
                                 validation_samples_y,
                                 batch_size=128)

# Definition of Hyper Parameters
epochs = 4
ch, row, col = 3, 65, 320  # Trimmed image format
drop_rate = 0.2

# Layout of model
model = Sequential()
model.add(Lambda(lambda x: (x / 255.0) - 0.5, input_shape=(160, 320, 3)))
model.add(Cropping2D(cropping=((70, 25), (0, 0))))
model.add(Convolution2D(24, 5, 5, subsample=(2, 2), activation='relu'))
model.add(Convolution2D(36, 5, 5, subsample=(2, 2), activation='relu'))
model.add(Convolution2D(48, 5, 5, subsample=(2, 2), activation='relu'))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(Convolution2D(64, 3, 3, activation='relu'))
model.add(Flatten())
model.add(Dense(100))
model.add(Dropout(drop_rate))
model.add(Dense(50))
model.add(Dropout(drop_rate))
model.add(Dense(10))
model.add(Dropout(drop_rate))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
Esempio n. 8
0
def myCrossLayer(nb_flow=2,
                 map_height=16,
                 map_width=8,
                 nb_layers=3,
                 window_len=12,
                 nb_filter=64,
                 external_dim=None,
                 filter_size=3):
    """
    the final model
    :param nb_flow: number of measurements, also number of channels of each picture sample
    :param map_height: grid map height, here is 16
    :param map_width: grid map width, here is 8
    :param nb_layers: number of cnn layers
    :return:
    """
    window_len_pic_fea = []
    main_inputs = []
    if external_dim == None:
        for i in range(window_len):
            inputs = Input(shape=(nb_flow, map_height, map_width))
            main_inputs.append(inputs)
            cnn_fea = dense_conv3D(nb_filter=nb_filter,
                                   nb_col=filter_size,
                                   nb_row=filter_size,
                                   padding='same',
                                   nb_layers=nb_layers,
                                   dense_units=1024,
                                   dropout_rate=0.5)(inputs)
            # cnn_fea_flatten = Reshape(([nb_layers * 1024]))(cnn_fea)
            cnn_fea_flatten = Reshape(([1024]))(cnn_fea)
            # cnn_fea_flatten = Dropout(rate=0.3)(cnn_fea_flatten)
            # cnn_fea_flatten = expand_dims(cnn_fea_flatten, axis=1)
            cnn_fea_flatten = Lambda(expand_dim_backend)(cnn_fea_flatten)
            window_len_pic_fea.append(cnn_fea_flatten)
    # add external feature here
    if external_dim != None and external_dim > 0:
        for i in range(window_len):
            # todo : use two tensor to represent the data and meta_data respectively
            inputs = Input(shape=((nb_flow, map_height, map_width),
                                  external_dim))
            main_inputs.append(inputs)
            inputs_0 = inputs
            inputs_1 = inputs
            cnn_fea = dense_conv3D(nb_filter=nb_filter,
                                   nb_col=filter_size,
                                   nb_row=filter_size,
                                   padding='same',
                                   nb_layers=nb_layers,
                                   dense_units=1024,
                                   dropout_rate=0.5)(inputs_0)
            # cnn_fea_flatten = Reshape(([nb_layers * 1024]))(cnn_fea)
            cnn_fea_flatten = Reshape(([1024]))(cnn_fea)
            # cnn_fea_flatten = Dropout(rate=0.3)(cnn_fea_flatten)
            # cnn_fea_flatten = expand_dims(cnn_fea_flatten, axis=1)
            cnn_fea_flatten = Lambda(expand_dim_backend)(cnn_fea_flatten)
            window_len_pic_fea.append(cnn_fea_flatten)

        external_input = inputs_1
        # external_input = Input(shape=(external_dim,))
        main_inputs.append(external_input)
        # todo: change the code here
        embedding = Dense(nb_layers * 1024, activation='relu')(external_input)
        external_out = Lambda(expand_dim_backend)(embedding)
        new_concatenate_fea = []
        for pic_fea in window_len_pic_fea:
            tmp_con = Concatenate(axis=-1)([pic_fea, external_out])
            new_concatenate_fea.append(tmp_con)
        window_len_pic_fea = new_concatenate_fea

    outputs = add_densenet(nb_flow=nb_flow,
                           map_height=map_height,
                           map_width=map_width)(window_len_pic_fea)
    # outputs = add_lstm(nb_flow=nb_flow, map_height=map_height, map_width=map_width)(window_len_pic_fea)
    # outputs = attention_after_LSTM(nb_flow=nb_flow, map_height=map_height,
    #                                map_width=map_width, window_len=window_len)(window_len_pic_fea)
    model = Model(inputs=main_inputs, outputs=outputs)
    return model
Esempio n. 9
0
x = Input(batch_shape=(batch_size, original_dim))
h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)


def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(batch_size, latent_dim),
                              mean=0.,
                              std=epsilon_std)
    return z_mean + K.exp(z_log_var / 2) * epsilon


z = Lambda(sampling, output_shape=(latent_dim, ))([z_mean, z_log_var])

decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')

h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

##### KEY HERE

x_decoded_mean2 = Reshape([28, 28, 1])(x_decoded_mean)


def generator_loss(x, x_decoded_mean):
    xent_loss = original_dim * objectives.binary_crossentropy(
        x, x_decoded_mean)
    def build_model(self):
        self.f_enc = self.build_encoder(output_dims=self.z_dims * 2)
        self.f_dec = self.build_decoder()
        self.f_dis = self.build_discriminator()
        self.f_cls = self.build_classifier()

        # Algorithm
        x_r = Input(shape=self.input_shape)
        c = Input(shape=(self.num_attrs, ))
        z_params = self.f_enc([x_r, c])

        z_avg = Lambda(lambda x: x[:, :self.z_dims],
                       output_shape=(self.z_dims, ))(z_params)
        z_log_var = Lambda(lambda x: x[:, self.z_dims:],
                           output_shape=(self.z_dims, ))(z_params)
        z = Lambda(sample_normal,
                   output_shape=(self.z_dims, ))([z_avg, z_log_var])

        kl_loss = KLLossLayer()([z_avg, z_log_var])

        z_p = Input(shape=(self.z_dims, ))

        x_f = self.f_dec([z, c])
        x_p = self.f_dec([z_p, c])

        y_r, f_D_x_r = self.f_dis(x_r)
        y_f, f_D_x_f = self.f_dis(x_f)
        y_p, f_D_x_p = self.f_dis(x_p)

        d_loss = DiscriminatorLossLayer()([y_r, y_f, y_p])

        c_r, f_C_x_r = self.f_cls(x_r)
        c_f, f_C_x_f = self.f_cls(x_f)
        c_p, f_C_x_p = self.f_cls(x_p)

        g_loss = GeneratorLossLayer()(
            [x_r, x_f, f_D_x_r, f_D_x_f, f_C_x_r, f_C_x_f])
        gd_loss = FeatureMatchingLayer()([f_D_x_r, f_D_x_p])
        gc_loss = FeatureMatchingLayer()([f_C_x_r, f_C_x_p])

        c_loss = ClassifierLossLayer()([c, c_r])

        # Build classifier trainer
        set_trainable(self.f_enc, False)
        set_trainable(self.f_dec, False)
        set_trainable(self.f_dis, False)
        set_trainable(self.f_cls, True)

        self.cls_trainer = Model(inputs=[x_r, c], outputs=[c_loss])
        self.cls_trainer = multi_gpu_model(self.cls_trainer, gpus=self.gpus)
        self.cls_trainer.compile(loss=[zero_loss],
                                 optimizer=Adam(lr=1.0e-4, beta_1=0.9))
        self.cls_trainer.summary()

        # Build discriminator trainer
        set_trainable(self.f_enc, False)
        set_trainable(self.f_dec, False)
        set_trainable(self.f_dis, True)
        set_trainable(self.f_cls, False)

        self.dis_trainer = Model(inputs=[x_r, c, z_p], outputs=[d_loss])
        self.dis_trainer = multi_gpu_model(self.dis_trainer, gpus=self.gpus)
        self.dis_trainer.compile(
            loss=[zero_loss],
            optimizer=Adam(lr=1.0e-4, beta_1=0.9),
            metrics=[discriminator_accuracy(y_r, y_f, y_p)])
        self.dis_trainer.summary()

        # Build generator trainer
        set_trainable(self.f_enc, False)
        set_trainable(self.f_dec, True)
        set_trainable(self.f_dis, False)
        set_trainable(self.f_cls, False)

        self.dec_trainer = Model(inputs=[x_r, c, z_p],
                                 outputs=[g_loss, gd_loss, gc_loss])
        self.dec_trainer = multi_gpu_model(self.dec_trainer, gpus=self.gpus)
        self.dec_trainer.compile(loss=[zero_loss, zero_loss, zero_loss],
                                 optimizer=Adam(lr=1.0e-4, beta_1=0.9),
                                 metrics=[generator_accuracy(y_p, y_f)])

        # Build autoencoder
        set_trainable(self.f_enc, True)
        set_trainable(self.f_dec, False)
        set_trainable(self.f_dis, False)
        set_trainable(self.f_cls, False)

        self.enc_trainer = Model(inputs=[x_r, c, z_p],
                                 outputs=[g_loss, kl_loss])
        self.enc_trainer = multi_gpu_model(self.enc_trainer, gpus=self.gpus)
        self.enc_trainer.compile(loss=[zero_loss, zero_loss],
                                 optimizer=Adam(lr=1.0e-4, beta_1=0.9))
        self.enc_trainer.summary()

        # Store trainers
        self.store_to_save('cls_trainer')
        self.store_to_save('dis_trainer')
        self.store_to_save('dec_trainer')
        self.store_to_save('enc_trainer')
Esempio n. 11
0
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(np.prod(img_shape)))
    model.add(Activation('sigmoid'))
    model.add(Reshape(img_shape))

    return model


encoder = encoder_model()
generator = generator_model()

x = Input(shape=img_shape)

z_mean, z_log_var = encoder(x)

z = Lambda(sampling)([z_mean, z_log_var])

recon_x = generator(z)

# instantiate VAE model
vae = Model(x, recon_x)

# Compute VAE loss
xent_loss = K.sum(metrics.binary_crossentropy(x, recon_x), axis=1)
kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var),
                       axis=-1)
vae_loss = K.mean(xent_loss + kl_loss)

vae.add_loss(vae_loss)
vae.compile(optimizer='rmsprop')
Esempio n. 12
0
x = Dense(2)(x)
ip1 = PReLU(name='ip1')(x)
ip2 = Dense(num_classes, activation='softmax')(ip1)

model = Model(inputs=inputs, outputs=[ip2])
model.compile(loss="categorical_crossentropy",
              optimizer=SGD(lr=0.05),
              metrics=['accuracy'])

if isCenterloss:
    lambda_c = 0.2
    input_target = Input(
        shape=(1, ))  # single value ground truth labels as inputs
    centers = Embedding(10, 2)(input_target)
    l2_loss = Lambda(
        lambda x: K.sum(K.square(x[0] - x[1][:, 0]), 1, keepdims=True),
        name='l2_loss')([ip1, centers])
    model_centerloss = Model(inputs=[inputs, input_target],
                             outputs=[ip2, l2_loss])
    model_centerloss.compile(
        optimizer=SGD(lr=0.05),
        loss=["categorical_crossentropy", lambda y_true, y_pred: y_pred],
        loss_weights=[1, lambda_c],
        metrics=['accuracy'])

# prepare callback
histories = TYY_callbacks.Histories(isCenterloss)

# fit
if isCenterloss:
    random_y_train = np.random.rand(x_train.shape[0], 1)
Esempio n. 13
0
  print("loaded model",model.layers[0].input_shape[1])
#  ml = model.layers[0].input_shape[1]
#  if (ml != max_length):
#    print("model length",ml,"different from data length",max_length)
#    max_length = ml
else:
#  model = Sequential()
#  model.add(Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=False, input_shape=(max_length,)))
#  model.add(LSTM_use(hidden_size, return_sequences=True))
#  model.add(LSTM_use(max_output + 1, return_sequences=False))
#  model.add(Dense(max_output +1))
#  model.add(Activation('softmax'))
  
  inputs = Input(shape=(None,None))
  print("k",inputs.shape)
  x0 = Lambda(lambda x : x[:,0,:])(inputs)
  x1 = Lambda(lambda x : x[:,1,:])(inputs)
  x2 = Lambda(lambda x : x[:,2,:])(inputs)
  x3 = Lambda(lambda x : x[:,3,:])(inputs)
#  x4 = Lambda(lambda x : x[:,4,:])(inputs)
#  x5 = Lambda(lambda x : x[:,5,:])(inputs)
#  x6 = Lambda(lambda x : x[:,6,:])(inputs)
#  x7 = Lambda(lambda x : x[:,7,:])(inputs)
#  x8 = Lambda(lambda x : x[:,8,:])(inputs)
#  x9 = Lambda(lambda x : x[:,9,:])(inputs)
  embeds0 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=not args.embed_not_trainable)(x0)
  embeds1 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=not args.embed_not_trainable)(x1)
  embeds2 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=not args.embed_not_trainable)(x2)
  embeds3 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=not args.embed_not_trainable)(x3)
#  embeds4 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=True)(x4)
#  embeds5 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=True)(x5)
Esempio n. 14
0
def basic_attention(nb_words, EMBEDDING_DIM, \
                    embedding_matrix, MAX_SEQUENCE_LENGTH, \
                    num_rnn, num_dense, rate_drop_rnn, \
                    rate_drop_dense, act):
    '''
    This is the basic attention model 

    model: input layer; embedding layer; rnn layer; attention layer; dense layer; output layer
    '''
    embedding_layer = Embedding(nb_words,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=True)
    rnn_layer = Bidirectional(
        GRU(num_rnn,
            dropout=rate_drop_rnn,
            recurrent_dropout=rate_drop_rnn,
            return_sequences=True))
    attention_W = TimeDistributed(Dense(350, activation='tanh'))
    attention_w = TimeDistributed(Dense(1))
    attention_softmax = Activation('softmax')
    attention_sum = Lambda(lambda x: K.sum(x, axis=1))

    sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
    embedded_sequences_1 = embedding_layer(sequence_1_input)
    x1 = rnn_layer(embedded_sequences_1)

    attention1 = attention_W(x1)
    attention1 = attention_w(attention1)
    attention1 = attention_softmax(attention1)
    attention1 = Permute([2, 1])(attention1)
    x1 = Permute([2, 1])(x1)
    x1 = multiply([attention1, x1])
    x1 = Permute([2, 1])(x1)
    x1 = attention_sum(x1)

    sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
    embedded_sequences_2 = embedding_layer(sequence_2_input)
    x2 = rnn_layer(embedded_sequences_2)

    attention2 = attention_W(x2)
    attention2 = attention_w(attention2)
    attention2 = attention_softmax(attention2)
    attention2 = Permute([2, 1])(attention2)
    x2 = Permute([2, 1])(x2)
    x2 = multiply([attention2, x2])
    x2 = Permute([2, 1])(x2)
    x2 = attention_sum(x2)

    merged = multiply([x1, x2])
    merged = Dropout(rate_drop_dense)(merged)
    merged = BatchNormalization()(merged)

    merged = Dense(num_dense, activation=act)(merged)
    merged = Dropout(rate_drop_dense)(merged)
    merged = BatchNormalization()(merged)

    preds = Dense(3, activation='softmax')(merged)

    ########################################
    ## train the model
    ########################################
    model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds)
    model.compile(loss='categorical_crossentropy',
                  optimizer='nadam',
                  metrics=['acc'])
    model.summary()
    # print(STAMP)
    return model
Esempio n. 15
0
 def f(x):
     return Lambda(batchnorm, output_shape=tuple([xx for xx in x._keras_shape if xx is not None]))(x)
                                               batch_size]
            yield get_matrices(train_data_batch)


def generate_validation_batch():
    while True:
        for i in range(validation_part.shape[0] // batch_size):
            validation_data_batch = validation_part.iloc[i *
                                                         batch_size:(i + 1) *
                                                         batch_size]
            yield get_matrices(validation_data_batch)


inp = Input(shape=(4, None, 300))

get_context_2 = Lambda(lambda batch: batch[:, 0, :, :])(inp)
get_context_1 = Lambda(lambda batch: batch[:, 1, :, :])(inp)
get_context_0 = Lambda(lambda batch: batch[:, 2, :, :])(inp)
get_reply = Lambda(lambda batch: batch[:, 3, :, :])(inp)

shared_lstm = LSTM(100)

encoded_context_2 = shared_lstm(get_context_2)
encoded_context_1 = shared_lstm(get_context_1)
encoded_context_0 = shared_lstm(get_context_0)
encoded_reply = shared_lstm(get_reply)

stacked = keras.layers.concatenate(
    [encoded_context_2, encoded_context_1, encoded_context_0, encoded_reply])

drop0 = Dropout(0.4)(stacked)
Esempio n. 17
0
def separable_inception_resnet_block(x,
                                     scale,
                                     block_type,
                                     block_idx,
                                     activation='relu'):
    """Adds a Inception-ResNet block.

    This function builds 3 types of Inception-ResNet blocks mentioned
    in the paper, controlled by the `block_type` argument (which is the
    block name used in the official TF-slim implementation):
        - Inception-ResNet-A: `block_type='block35'`
        - Inception-ResNet-B: `block_type='block17'`
        - Inception-ResNet-C: `block_type='block8'`

    # Arguments
        x: input tensor.
        scale: scaling factor to scale the residuals (i.e., the output of
            passing `x` through an inception module) before adding them
            to the shortcut branch. Let `r` be the output from the residual branch,
            the output of this block will be `x + scale * r`.
        block_type: `'block35'`, `'block17'` or `'block8'`, determines
            the network structure in the residual branch.
        block_idx: an `int` used for generating layer names. The Inception-ResNet blocks
            are repeated many times in this network. We use `block_idx` to identify
            each of the repetitions. For example, the first Inception-ResNet-A block
            will have `block_type='block35', block_idx=0`, ane the layer names will have
            a common prefix `'block35_0'`.
        activation: activation function to use at the end of the block
            (see [activations](../activations.md)).
            When `activation=None`, no activation is applied
            (i.e., "linear" activation: `a(x) = x`).

    # Returns
        Output tensor for the block.

    # Raises
        ValueError: if `block_type` is not one of `'block35'`,
            `'block17'` or `'block8'`.
    """
    if block_type == 'block35':
        branch_0 = conv2d_bn(x, 32, 1)
        branch_1 = conv2d_bn(x, 32, 1)
        branch_1 = SeparableConv2D(filters=32, kernel_size=3,
                                   padding='same')(branch_1)
        branch_2 = conv2d_bn(x, 32, 1)
        branch_2 = SeparableConv2D(filters=48, kernel_size=3,
                                   padding='same')(branch_2)
        branch_2 = SeparableConv2D(filters=64, kernel_size=3,
                                   padding='same')(branch_2)
        branches = [branch_0, branch_1, branch_2]
    elif block_type == 'block17':
        branch_0 = conv2d_bn(x, 192, 1)
        branch_1 = conv2d_bn(x, 128, 1)
        branch_1 = SeparableConv2D(filters=160,
                                   kernel_size=[1, 7],
                                   padding='same')(branch_1)
        branch_1 = SeparableConv2D(filters=192,
                                   kernel_size=[7, 1],
                                   padding='same')(branch_1)
        branches = [branch_0, branch_1]
    elif block_type == 'block8':
        branch_0 = conv2d_bn(x, 192, 1)
        branch_1 = conv2d_bn(x, 192, 1)
        branch_1 = SeparableConv2D(filters=224,
                                   kernel_size=[1, 3],
                                   padding='same')(branch_1)
        branch_1 = SeparableConv2D(filters=256,
                                   kernel_size=[3, 1],
                                   padding='same')(branch_1)
        branches = [branch_0, branch_1]
    else:
        raise ValueError('Unknown Inception-ResNet block type. '
                         'Expects "block35", "block17" or "block8", '
                         'but got: ' + str(block_type))

    block_name = block_type + '_' + str(block_idx)
    channel_axis = 1 if K.image_data_format() == 'channels_first' else 3
    mixed = Concatenate(axis=channel_axis,
                        name=block_name + '_mixed')(branches)
    up = conv2d_bn(mixed,
                   K.int_shape(x)[channel_axis],
                   1,
                   activation=None,
                   use_bias=True,
                   name=block_name + '_conv')

    x = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale,
               output_shape=K.int_shape(x)[1:],
               arguments={'scale': scale},
               name=block_name)([x, up])
    if activation is not None:
        x = Activation(activation, name=block_name + '_ac')(x)
    return x
Esempio n. 18
0
    def build_model(self,
                    relu_target,
                    input_tensor,
                    style_encoded_tensor=None,
                    batch_size=8,
                    feature_weight=1,
                    pixel_weight=1,
                    tv_weight=0,
                    learning_rate=1e-4,
                    lr_decay=5e-5):
        '''Build the EncoderDecoder architecture for a given relu layer.

            Args:
                relu_target: Layer of VGG to decode from
                input_tensor: If None then a placeholder will be created, else use this tensor as the input to the encoder
                style_encoded_tensor: Tensor for style image features at the same relu layer. Used only at test time.
                batch_size: Batch size for training
                feature_weight: Float weight for feature reconstruction loss
                pixel_weight: Float weight for pixel reconstruction loss
                tv_weight: Float weight for total variation loss
                learning_rate: Float LR
                lr_decay: Float linear decay for training
            Returns:
                EncoderDecoder namedtuple with input/encoding/output tensors and ops for training.
        '''
        with tf.name_scope('encoder_decoder_' + relu_target):

            ### Build encoder for reluX_1
            with tf.name_scope('content_encoder_' + relu_target):
                if input_tensor is None:
                    # This is the first level encoder that takes original content imgs
                    content_imgs = tf.placeholder_with_default(
                        tf.constant([[[[0., 0., 0.]]]]),
                        shape=(None, None, None, 3),
                        name='content_imgs')
                else:
                    # This is an intermediate-level encoder that takes output tensor from previous level as input
                    content_imgs = input_tensor

                # Build content layer encoding model
                content_layer = self.vgg_model.get_layer(relu_target).output
                content_encoder_model = Model(inputs=self.vgg_model.input,
                                              outputs=content_layer)

                # Setup content layer encodings for content images
                content_encoded = content_encoder_model(content_imgs)

            ### Build style encoder & WCT if test mode
            if self.mode != 'train':
                # Apply WCT if flag is set to true. Otherwise, pass content_encoded along unchanged.
                with tf.name_scope('wct_' + relu_target):
                    decoder_input = tf.cond(
                        self.apply_wct, lambda: wct_tf(
                            content_encoded, style_encoded_tensor, self.alpha),
                        lambda: content_encoded)
            else:
                decoder_input = content_encoded

            ### Build decoder
            with tf.name_scope('decoder_' + relu_target):
                n_channels = content_encoded.get_shape()[-1].value
                decoder_model = self.build_decoder(input_shape=(None, None,
                                                                n_channels),
                                                   relu_target=relu_target)

                # Wrap the decoder_input tensor so that it has the proper shape for decoder_model
                decoder_input_wrapped = tf.placeholder_with_default(
                    decoder_input, shape=[None, None, None, n_channels])

                # Reconstruct/decode from encoding
                decoded = decoder_model(
                    Lambda(lambda x: x)(decoder_input_wrapped)
                )  # Lambda converts TF tensor to Keras

            # Content layer encoding for stylized out
            decoded_encoded = content_encoder_model(decoded)

        if self.mode == 'train':  # Train & summary ops only needed for training phase
            ### Losses
            with tf.name_scope('losses_' + relu_target):
                # Feature loss between encodings of original & reconstructed
                feature_loss = feature_weight * mse(decoded_encoded,
                                                    content_encoded)

                # Pixel reconstruction loss between decoded/reconstructed img and original
                pixel_loss = pixel_weight * mse(decoded, content_imgs)

                # Total Variation loss
                if tv_weight > 0:
                    tv_loss = tv_weight * tf.reduce_mean(
                        tf.image.total_variation(decoded))
                else:
                    tv_loss = tf.constant(0.)

                total_loss = feature_loss + pixel_loss + tv_loss

            ### Training ops
            with tf.name_scope('train_' + relu_target):
                global_step = tf.Variable(0,
                                          name='global_step_train',
                                          trainable=False)
                # self.learning_rate = tf.train.exponential_decay(learning_rate, self.global_step, 100, 0.96, staircase=False)
                learning_rate = torch_decay(learning_rate, global_step,
                                            lr_decay)
                d_optimizer = tf.train.AdamOptimizer(learning_rate,
                                                     beta1=0.9,
                                                     beta2=0.999)

                # Only train decoder vars, encoder is frozen
                d_vars = [
                    var for var in tf.trainable_variables()
                    if 'decoder_' + relu_target in var.name
                ]

                train_op = d_optimizer.minimize(total_loss,
                                                var_list=d_vars,
                                                global_step=global_step)

            ### Loss & image summaries
            with tf.name_scope('summary_' + relu_target):
                feature_loss_summary = tf.summary.scalar(
                    'feature_loss', feature_loss)
                pixel_loss_summary = tf.summary.scalar('pixel_loss',
                                                       pixel_loss)
                tv_loss_summary = tf.summary.scalar('tv_loss', tv_loss)
                total_loss_summary = tf.summary.scalar('total_loss',
                                                       total_loss)

                content_imgs_summary = tf.summary.image(
                    'content_imgs', content_imgs)
                decoded_images_summary = tf.summary.image(
                    'decoded_images', clip(decoded))

                for var in d_vars:
                    tf.summary.histogram(var.op.name, var)

                summary_op = tf.summary.merge_all()
        else:
            # For inference set unnneeded ops to None
            pixel_loss, feature_loss, tv_loss, total_loss, train_op, global_step, learning_rate, summary_op = [
                None
            ] * 8

        # Put it all together
        encoder_decoder = EncoderDecoder(
            content_input=content_imgs,
            content_encoder_model=content_encoder_model,
            content_encoded=content_encoded,
            style_encoded=style_encoded_tensor,
            decoder_input=decoder_input,
            decoder_model=decoder_model,
            decoded=decoded,
            decoded_encoded=decoded_encoded,
            pixel_loss=pixel_loss,
            feature_loss=feature_loss,
            tv_loss=tv_loss,
            total_loss=total_loss,
            train_op=train_op,
            global_step=global_step,
            learning_rate=learning_rate,
            summary_op=summary_op)

        return encoder_decoder
Esempio n. 19
0
                center_angle = float(batch_sample[3])
                images.append(img)
                angles.append(center_angle)

            # trim image to only see section with road
            X_train = np.array(images)
            y_train = np.array(angles)
            yield sklearn.utils.shuffle(X_train, y_train)


# compile and train the model using the generator function
train_generator = generator(train_samples, batch_size=batch_size)
validation_generator = generator(validation_samples, batch_size=batch_size)

model = Sequential([
    Lambda(lambda x: x / 255.0 - 0.5, input_shape=(160, 320, 3)),
    Cropping2D(cropping=((65, 25), (0, 0))),
    Conv2D(filters=24,
           kernel_size=(5, 5),
           strides=strides_1,
           padding=padding,
           activation="elu"),
    MaxPool2D(strides=(2, 2)),
    Conv2D(filters=36,
           kernel_size=(5, 5),
           strides=strides_1,
           padding=padding,
           activation="elu"),
    MaxPool2D(strides=(2, 2)),
    Conv2D(filters=48,
           kernel_size=(5, 5),
Esempio n. 20
0
def Inception_Inflated3d(include_top=True,
                         weights=None,
                         input_tensor=None,
                         input_shape=None,
                         dropout_prob=0.0,
                         endpoint_logit=True,
                         classes=400):
	"""Instantiates the Inflated 3D Inception v1 architecture.
    Optionally loads weights pre-trained
    on Kinetics. Note that when using TensorFlow,
    for best performance you should set
    `image_data_format='channels_last'` in your Keras config
    at ~/.keras/keras.json.
    The model and the weights are compatible with both
    TensorFlow and Theano. The data format
    convention used by the model is the one
    specified in your Keras config file.
    Note that the default input frame(image) size for this model is 224x224.
    # Arguments
        include_top: whether to include the the classification 
            layer at the top of the network.
        weights: one of `None` (random initialization)
            or 'kinetics_only' (pre-training on Kinetics dataset only).
            or 'imagenet_and_kinetics' (pre-training on ImageNet and Kinetics datasets).
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(NUM_FRAMES, 224, 224, 3)` (with `channels_last` data format)
            or `(NUM_FRAMES, 3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 inputs channels.
            NUM_FRAMES should be no smaller than 8. The authors used 64
            frames per example for training and testing on kinetics dataset
            Also, Width and height should be no smaller than 32.
            E.g. `(64, 150, 150, 3)` would be one valid value.
        dropout_prob: optional, dropout probability applied in dropout layer
            after global average pooling layer. 
            0.0 means no dropout is applied, 1.0 means dropout is applied to all features.
            Note: Since Dropout is applied just before the classification
            layer, it is only useful when `include_top` is set to True.
        endpoint_logit: (boolean) optional. If True, the model's forward pass
            will end at producing logits. Otherwise, softmax is applied after producing
            the logits to produce the class probabilities prediction. Setting this parameter 
            to True is particularly useful when you want to combine results of rgb model
            and optical flow model.
            - `True` end model forward pass at logit output
            - `False` go further after logit to produce softmax predictions
            Note: This parameter is only useful when `include_top` is set to True.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """
	if not (weights in WEIGHTS_NAME or weights is None or os.path.exists(weights)):
		raise ValueError('The `weights` argument should be either '
		                 '`None` (random initialization) or %s' %
		                 str(WEIGHTS_NAME) + ' '
		                                     'or a valid path to a file containing `weights` values')

	if weights in WEIGHTS_NAME and include_top and classes != 400:
		raise ValueError('If using `weights` as one of these %s, with `include_top`'
		                 ' as true, `classes` should be 400' % str(WEIGHTS_NAME))

	# Determine proper input shape
	input_shape = _obtain_input_shape(
		input_shape,
		default_frame_size=224,
		min_frame_size=32,
		default_num_frames=64,
		min_num_frames=8,
		data_format=K.image_data_format(),
		require_flatten=include_top,
		weights=weights)

	if input_tensor is None:
		img_input = Input(shape=input_shape)
	else:
		if not K.is_keras_tensor(input_tensor):
			img_input = Input(tensor=input_tensor, shape=input_shape)
		else:
			img_input = input_tensor

	if K.image_data_format() == 'channels_first':
		channel_axis = 1
	else:
		channel_axis = 4

	# Downsampling via convolution (spatial and temporal)
	x = conv3d_bn(img_input, 64, 7, 7, 7, strides=(2, 2, 2), padding='same', name='Conv3d_1a_7x7')

	# Downsampling (spatial only)
	x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_2a_3x3')(x)
	x = conv3d_bn(x, 64, 1, 1, 1, strides=(1, 1, 1), padding='same', name='Conv3d_2b_1x1')
	x = conv3d_bn(x, 192, 3, 3, 3, strides=(1, 1, 1), padding='same', name='Conv3d_2c_3x3')

	# Downsampling (spatial only)
	x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_3a_3x3')(x)

	# Mixed 3b
	branch_0 = conv3d_bn(x, 64, 1, 1, 1, padding='same', name='Conv3d_3b_0a_1x1')

	branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_3b_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 128, 3, 3, 3, padding='same', name='Conv3d_3b_1b_3x3')

	branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_3b_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 32, 3, 3, 3, padding='same', name='Conv3d_3b_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3b_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 32, 1, 1, 1, padding='same', name='Conv3d_3b_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_3b')

	# Mixed 3c
	branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_0a_1x1')

	branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 192, 3, 3, 3, padding='same', name='Conv3d_3c_1b_3x3')

	branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_3c_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 96, 3, 3, 3, padding='same', name='Conv3d_3c_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3c_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_3c_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_3c')

	# Downsampling (spatial and temporal)
	x = MaxPooling3D((3, 3, 3), strides=(2, 2, 2), padding='same', name='MaxPool2d_4a_3x3')(x)

	# Mixed 4b
	branch_0 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_4b_0a_1x1')

	branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_4b_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 208, 3, 3, 3, padding='same', name='Conv3d_4b_1b_3x3')

	branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_4b_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 48, 3, 3, 3, padding='same', name='Conv3d_4b_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4b_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4b_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_4b')

	# Mixed 4c
	branch_0 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4c_0a_1x1')

	branch_1 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4c_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 224, 3, 3, 3, padding='same', name='Conv3d_4c_1b_3x3')

	branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4c_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4c_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4c_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4c_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_4c')

	# Mixed 4d
	branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_0a_1x1')

	branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 256, 3, 3, 3, padding='same', name='Conv3d_4d_1b_3x3')

	branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4d_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4d_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4d_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4d_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_4d')

	# Mixed 4e
	branch_0 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4e_0a_1x1')

	branch_1 = conv3d_bn(x, 144, 1, 1, 1, padding='same', name='Conv3d_4e_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 288, 3, 3, 3, padding='same', name='Conv3d_4e_1b_3x3')

	branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4e_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4e_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4e_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4e_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_4e')

	# Mixed 4f
	branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_4f_0a_1x1')

	branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4f_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_4f_1b_3x3')

	branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4f_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_4f_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4f_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_4f_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_4f')

	# Downsampling (spatial and temporal)
	x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same', name='MaxPool2d_5a_2x2')(x)

	# Mixed 5b
	branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_5b_0a_1x1')

	branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3')

	branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_5b')

	# Mixed 5c
	branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1')

	branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3')

	branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_5c')

	if include_top:
		# Classification block
		x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x)
		x = Dropout(dropout_prob)(x)

		x = conv3d_bn(x, classes, 1, 1, 1, padding='same',
		              use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1')

		num_frames_remaining = int(x.shape[1])
		x = Reshape((num_frames_remaining, classes))(x)

		# logits (raw scores for each class)
		x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False),
		           output_shape=lambda s: (s[0], s[2]))(x)

		if not endpoint_logit:
			x = Activation('softmax', name='prediction')(x)
	else:
		h = int(x.shape[2])
		w = int(x.shape[3])
		x = AveragePooling3D((2, h, w), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x)

	inputs = img_input
	# create model
	model = Model(inputs, x, name='i3d_inception')

	# load weights
	if weights in WEIGHTS_NAME:
		if weights == WEIGHTS_NAME[0]:  # rgb_kinetics_only
			if include_top:
				weights_url = WEIGHTS_PATH['rgb_kinetics_only']
				model_name = 'i3d_inception_rgb_kinetics_only.h5'
			else:
				weights_url = WEIGHTS_PATH_NO_TOP['rgb_kinetics_only']
				model_name = 'i3d_inception_rgb_kinetics_only_no_top.h5'

		elif weights == WEIGHTS_NAME[1]:  # flow_kinetics_only
			if include_top:
				weights_url = WEIGHTS_PATH['flow_kinetics_only']
				model_name = 'i3d_inception_flow_kinetics_only.h5'
			else:
				weights_url = WEIGHTS_PATH_NO_TOP['flow_kinetics_only']
				model_name = 'i3d_inception_flow_kinetics_only_no_top.h5'

		elif weights == WEIGHTS_NAME[2]:  # rgb_imagenet_and_kinetics
			if include_top:
				weights_url = WEIGHTS_PATH['rgb_imagenet_and_kinetics']
				model_name = 'i3d_inception_rgb_imagenet_and_kinetics.h5'
			else:
				weights_url = WEIGHTS_PATH_NO_TOP['rgb_imagenet_and_kinetics']
				model_name = 'i3d_inception_rgb_imagenet_and_kinetics_no_top.h5'

		elif weights == WEIGHTS_NAME[3]:  # flow_imagenet_and_kinetics
			if include_top:
				weights_url = WEIGHTS_PATH['flow_imagenet_and_kinetics']
				model_name = 'i3d_inception_flow_imagenet_and_kinetics.h5'
			else:
				weights_url = WEIGHTS_PATH_NO_TOP['flow_imagenet_and_kinetics']
				model_name = 'i3d_inception_flow_imagenet_and_kinetics_no_top.h5'

		downloaded_weights_path = get_file(model_name, weights_url, cache_subdir='models')
		model.load_weights(downloaded_weights_path)

		if K.backend() == 'theano':
			layer_utils.convert_all_kernels_in_model(model)

		if K.image_data_format() == 'channels_first' and K.backend() == 'tensorflow':
			warnings.warn('You are using the TensorFlow backend, yet you '
			              'are using the Theano '
			              'image data format convention '
			              '(`image_data_format="channels_first"`). '
			              'For best performance, set '
			              '`image_data_format="channels_last"` in '
			              'your keras config '
			              'at ~/.keras/keras.json.')

	elif weights is not None:
		model.load_weights(weights)

	return model
Esempio n. 21
0
            def YOLOMODEL(path):
                input_image = Input(shape=(input_size, input_size, 3))
                true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

                # Layer 1
                x = Conv2D(32, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_1',
                           use_bias=False)(input_image)
                x = BatchNormalization(name='norm_1')(x)
                x = LeakyReLU(alpha=0.1)(x)
                x = MaxPooling2D(pool_size=(2, 2))(x)

                # Layer 2
                x = Conv2D(64, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_2',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_2')(x)
                x = LeakyReLU(alpha=0.1)(x)
                x = MaxPooling2D(pool_size=(2, 2))(x)

                # Layer 3
                x = Conv2D(128, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_3',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_3')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 4
                x = Conv2D(64, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_4',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_4')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 5
                x = Conv2D(128, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_5',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_5')(x)
                x = LeakyReLU(alpha=0.1)(x)
                x = MaxPooling2D(pool_size=(2, 2))(x)

                # Layer 6
                x = Conv2D(256, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_6',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_6')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 7
                x = Conv2D(128, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_7',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_7')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 8
                x = Conv2D(256, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_8',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_8')(x)
                x = LeakyReLU(alpha=0.1)(x)
                x = MaxPooling2D(pool_size=(2, 2))(x)

                # Layer 9
                x = Conv2D(512, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_9',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_9')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 10
                x = Conv2D(256, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_10',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_10')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 11
                x = Conv2D(512, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_11',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_11')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 12
                x = Conv2D(256, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_12',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_12')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 13
                x = Conv2D(512, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_13',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_13')(x)
                x = LeakyReLU(alpha=0.1)(x)

                skip_connection = x

                x = MaxPooling2D(pool_size=(2, 2))(x)

                # Layer 14
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_14',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_14')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 15
                x = Conv2D(512, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_15',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_15')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 16
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_16',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_16')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 17
                x = Conv2D(512, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_17',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_17')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 18
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_18',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_18')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 19
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_19',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_19')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 20
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_20',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_20')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 21
                skip_connection = Conv2D(64, (1, 1),
                                         strides=(1, 1),
                                         padding='same',
                                         name='conv_21',
                                         use_bias=False)(skip_connection)
                skip_connection = BatchNormalization(
                    name='norm_21')(skip_connection)
                skip_connection = LeakyReLU(alpha=0.1)(skip_connection)
                skip_connection = Lambda(space_to_depth_x2)(skip_connection)

                x = concatenate([skip_connection, x])

                # Layer 22
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_22',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_22')(x)
                x = LeakyReLU(alpha=0.1)(x)
                feature_extractor = Model(input_image, x, name='FULLYOLO')
                features = feature_extractor(input_image)

                grid_h, grid_w = feature_extractor.get_output_shape_at(-1)[1:3]

                # make the object detection layer
                output = Conv2D(nb_box * (4 + 1 + nb_class), (1, 1),
                                strides=(1, 1),
                                padding='same',
                                name='conv_23',
                                kernel_initializer='lecun_normal')(features)
                output = Reshape(
                    (grid_h, grid_w, nb_box, 4 + 1 + nb_class))(output)
                output = Lambda(lambda args: args[0])([output, true_boxes])

                model = Model([input_image, true_boxes], output)

                # initialize the weights of the detection layer
                layer = model.layers[-4]
                weights = layer.get_weights()

                new_kernel = np.random.normal(
                    size=weights[0].shape) / (grid_h * grid_w)
                new_bias = np.random.normal(size=weights[1].shape) / (grid_h *
                                                                      grid_w)

                layer.set_weights([new_kernel, new_bias])

                model.load_weights(path)

                #print(model.summary())
                return model
Esempio n. 22
0
def _tf_grid_mask_frame(frame, image_size, n_neighbor_pixels, grid_side):
    """Compute a grid mask for TensorFlow.

    :param frame:
    :param image_size:
    :param n_neighbor_pixels:
    :param grid_side:
    :return:
    """
    max_n_peds = frame.shape.as_list()[0]
    pids = frame[:, 0]

    # --------------------
    # compute id_mask
    # --------------------

    def compute_id_mask(pids):
        id_mask = tf.tensordot(tf.expand_dims(pids, axis=1),
                               tf.transpose(tf.expand_dims(pids, axis=1)),
                               axes=(1, 0))
        id_mask = tf.cast(id_mask, tf.bool)
        # mask self-to-self (diagonal elements)
        id_mask = tf.logical_and(
            tf.logical_not(tf.cast(tf.eye(max_n_peds), tf.bool)), id_mask)
        id_mask = tf.expand_dims(id_mask, axis=2)
        id_mask = tf.cast(id_mask, tf.float32)

        return id_mask

    id_mask = Lambda(compute_id_mask)(pids)

    bound = n_neighbor_pixels / np.array(image_size)
    pos = frame[:, 1:]
    tl = pos - bound / 2
    br = pos + bound / 2

    frame_mask = []
    for self_index in range(max_n_peds):
        is_neighbor = Lambda(lambda pos: tf.cast(
            tf.reduce_all(tf.concat(
                [tl[self_index] <= pos, pos < br[self_index]], axis=1),
                          axis=1), np.int32))(pos)

        cell_xy = Lambda(lambda pos: tf.cast(
            tf.floor(((pos - tl[self_index]) / bound) * grid_side), tf.int32))(
                pos)

        cell_index = cell_xy[:, 0] + cell_xy[:, 1] * grid_side
        cell_index = cell_index * is_neighbor

        self_frame_mask = tf.stack(tf.map_fn(
            lambda c: tf.eye(grid_side**2, dtype=np.int32)[c], cell_index),
                                   axis=0)
        self_frame_mask *= tf.expand_dims(is_neighbor, 1)
        frame_mask.append(self_frame_mask)

    frame_mask = tf.stack(frame_mask, axis=0)
    frame_mask = tf.cast(frame_mask, tf.float32)
    # mask not exist elements & self-to-self pair
    frame_mask *= id_mask
    return frame_mask
Esempio n. 23
0
def yolo_body_mobilenet(inputs,
                        num_anchors,
                        num_classes,
                        weights='imagenet',
                        network_config=[False, False]):
    """
    Mobile Detector Implementation
    :param feature_extractor:
    :param num_classes:
    :param num_anchors:
    :return:
    """
    fine_grained_layers = [17, 27, 43]
    shallow_detector, use_x0 = network_config

    if shallow_detector:
        fine_grained_layers = fine_grained_layers[0:2]
        num_final_layers = 512
        final_feature_layer = 69
    else:
        fine_grained_layers = fine_grained_layers[1:]
        num_final_layers = 1024
        final_feature_layer = -1

    feature_model = MobileNet(input_tensor=inputs,
                              include_top=False,
                              weights=None)
    feature_model = Model(
        inputs=feature_model.input,
        outputs=feature_model.layers[final_feature_layer].output)

    if weights == 'imagenet':
        print('Loading pretrained weights from ImageNet...')
        trained_model = MobileNet(input_shape=(224, 224, 3),
                                  include_top=False,
                                  weights='imagenet')
        trained_layers = trained_model.layers
        feature_layers = feature_model.layers
        for i in range(0, min(len(feature_layers), len(trained_layers))):
            weights = trained_layers[i].get_weights()
            feature_layers[i].set_weights(weights)

    x2 = feature_model.output
    x1 = feature_model.layers[fine_grained_layers[1]].output
    x0 = feature_model.layers[fine_grained_layers[0]].output

    x2 = _depthwise_conv_block(x2, num_final_layers, 1.0, block_id=14)
    x2 = _depthwise_conv_block(x2, num_final_layers, 1.0, block_id=15)

    # Reroute x1
    x1 = Conv2D(64, (1, 1), padding='same', use_bias=False, strides=(1, 1))(x1)
    x1 = BatchNormalization()(x1)
    # To keep keras to tensorflow conversion happy
    x1 = Lambda(relu_6)(x1)

    x1_reshaped = Lambda(space_to_depth_x2,
                         output_shape=space_to_depth_x2_output_shape,
                         name='space_to_depth_x2')(x1)

    # Reroute x0
    x0 = Conv2D(16, (1, 1), padding='same', use_bias=False, strides=(1, 1))(x0)
    x0 = BatchNormalization()(x0)
    x0 = Lambda(relu_6)(x0)

    x0_reshaped = Lambda(space_to_depth_x4,
                         output_shape=space_to_depth_x4_output_shape,
                         name='space_to_depth_x4')(x0)

    if use_x0:
        x = concatenate([x0_reshaped, x1_reshaped, x2])
    else:
        x = concatenate([x1_reshaped, x2])

    x = _depthwise_conv_block(x, num_final_layers, 1.0, block_id=16)

    x = Conv2D(num_anchors * (num_classes + 5), (1, 1))(x)

    model = Model(inputs=feature_model.input, outputs=x)
    return model
    std = (para_pred[:, :, 1])
    print("mean.shape: ", mean.shape)
    print('std.shape: ', std.shape)
    likelihood = log_gaussian(y_true[:, :, 0], mean, std)
    print("likelihood.shape: ", likelihood.shape)
    print('==end of custom loss===')
    result = K.mean(likelihood)
    return -result


#aux_in = Input(shape=(input_window_length,n_dims, ), name='aux_input')
aux_in = Input(shape=(None, ), name='aux_input', dtype='int32')

# in salute to https://gist.github.com/bzamecnik/a33052ec46ee7efeb217856d98a4fb5f
aux_in_full = Lambda(K.one_hot,
                     arguments={'num_classes': n_dims},
                     output_shape=(None, n_dims))(aux_in)
x = Dense(20, activation='sigmoid')(aux_in_full)

#x = Embedding(input_dim=370, output_dim=20, input_length = 192)(aux_in)
main_in = Input(shape=(
    None,
    n_features,
), name="main_input")
input1 = layers.concatenate([main_in, x])
lstm_out1 = LSTM(40, return_sequences=True)(input1)
drop_out1 = Dropout(0.2)(lstm_out1)
lstm_out2 = LSTM(40, return_sequences=True)(drop_out1)
drop_out2 = Dropout(0.2)(lstm_out2)
lstm_out3 = LSTM(40, return_sequences=True)(lstm_out2)
drop_out3 = Dropout(0.2)(lstm_out3)
Esempio n. 25
0
def yolo_body_darknet(inputs,
                      num_anchors,
                      num_classes,
                      weights='yolov2',
                      network_config=[False, False]):
    """Create YOLO_V2 model CNN body in Keras."""
    fine_grained_layers = [17, 27, 43]

    shallow_detector, use_x0 = network_config

    if shallow_detector:
        fine_grained_layers = fine_grained_layers[0:2]
        num_final_layers = 512
        final_feature_layer = 43
    else:
        fine_grained_layers = fine_grained_layers[1:]
        num_final_layers = 1024
        final_feature_layer = -1

    feature_model = darknet19(inputs, include_top=False)
    feature_model = Model(
        inputs=feature_model.input,
        outputs=feature_model.layers[final_feature_layer].output)

    if weights == 'yolov2':
        print("Loading pre-trained yolov2 weights")
        # Save topless yolo:
        yolo_path = os.path.join('model_data', 'yolo.h5')
        trained_model = load_model(yolo_path)
        # trained_model = Model(trained_model.inputs, trained_model.output)
        trained_layers = trained_model.layers
        feature_layers = feature_model.layers
        for i in range(0, min(len(feature_layers), len(trained_layers))):
            weights = trained_layers[i].get_weights()
            feature_layers[i].set_weights(weights)

    x2 = feature_model.output
    x1 = feature_model.layers[fine_grained_layers[1]].output
    x0 = feature_model.layers[fine_grained_layers[0]].output

    x2 = DarknetConv2D_BN_Leaky(num_final_layers, (3, 3))(x2)
    x2 = DarknetConv2D_BN_Leaky(num_final_layers, (3, 3))(x2)

    x1 = DarknetConv2D_BN_Leaky(64, (1, 1))(x1)
    # TODO: Allow Keras Lambda to use func arguments for output_shape?
    x1_reshaped = Lambda(space_to_depth_x2,
                         output_shape=space_to_depth_x2_output_shape,
                         name='space_to_depth_x2')(x1)

    x0 = DarknetConv2D_BN_Leaky(16, (1, 1))(x0)
    # TODO:  #304Allow Keras Lambda to use func arguments for output_shape?
    x0_reshaped = Lambda(space_to_depth_x4,
                         output_shape=space_to_depth_x4_output_shape,
                         name='space_to_depth_x4')(x0)

    if use_x0:
        x = concatenate([x0_reshaped, x1_reshaped, x2])
    else:
        x = concatenate([x1_reshaped, x2])
    x = DarknetConv2D_BN_Leaky(num_final_layers, (3, 3))(x)

    x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x)
    return Model(feature_model.inputs, x)
Esempio n. 26
0
def ssc_300(image_size,
            n_classes,
            l2_regularization=0.0005,
            min_scale=None,
            max_scale=None,
            scales=None,
            aspect_ratios_global=None,
            aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
            two_boxes_for_ar1=True,
            steps=[8, 16, 32, 64, 100, 300],
            offsets=None,
            subtract_mean=[123, 117, 104],
            divide_by_stddev=None,
            swap_channels=[2, 1, 0],
            predictors=[
                'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2'
            ],
            hidden_size=[250, 250, 100],
            output_activation=False,
            lstm=False,
            condense_predictors=False):
    """
    Build a Keras model with SSC300 architecture, see references.

    The base network is a reduced atrous VGG-16, extended by the SSD architecture,
    as described in the paper. Most of the arguments that this function takes are only needed for the anchor
    box layers. In case you're training the network.

    Note: Requires Keras v2.0 or later. Currently works only with the
    TensorFlow backend (v1.0 or later).

    References: https://arxiv.org/abs/1512.02325v5

    :param tuple image_size: The input image size in the format `(height, width, channels)`.
    :param int n_classes: The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO.
    :param float l2_regularization: The L2-regularization rate. Applies to all convolutional layers.
        Set to zero to deactivate L2-regularization.
    :param float min_scale: The smallest scaling factor for the size of the anchor boxes as a fraction
        of the shorter side of the input images.
    :param float max_scale: The largest scaling factor for the size of the anchor boxes as a fraction
        of the shorter side of the input images. All scaling factors between the smallest and the
        largest will be linearly interpolated. Note that the second to last of the linearly interpolated
        scaling factors will actually be the scaling factor for the last predictor layer, while the last
        scaling factor is used for the second box for aspect ratio 1 in the last predictor layer
        if `two_boxes_for_ar1` is `True`.
    :param list scales: A list of floats containing scaling factors per convolutional predictor layer.
        This list must be one element longer than the number of predictor layers. The first `k` elements are the
        scaling factors for the `k` predictor layers, while the last element is used for the second box
        for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional
        last scaling factor must be passed either way, even if it is not being used. If a list is passed,
        this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero.
    :param list aspect_ratios_global: The list of aspect ratios for which anchor boxes are to be
        generated. This list is valid for all prediction layers.
    :param list aspect_ratios_per_layer: A list containing one aspect ratio list for each prediction layer.
        This allows you to set the aspect ratios for each predictor layer individually, which is the case for the
        original SSD300 implementation. If a list is passed, it overrides `aspect_ratios_global`.
    :param bool two_boxes_for_ar1: Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise.
        If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated
        using the scaling factor for the respective layer, the second one will be generated using
        geometric mean of said scaling factor and next bigger scaling factor.
    :param list steps: `None` or a list with as many elements as there are predictor layers. The elements can be
        either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many
        pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over
        the image. If the list contains ints/floats, then that value will be used for both spatial dimensions.
        If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`.
        If no steps are provided, then they will be computed such that the anchor box center points will form an
        equidistant grid within the image dimensions.
    :param list offsets: `None` or a list with as many elements as there are predictor layers. The elements can be
        either floats or tuples of two floats. These numbers represent for each predictor layer how many
        pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be
        as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions
        of the step size specified in the `steps` argument. If the list contains floats, then that value will
        be used for both spatial dimensions. If the list contains tuples of two floats, then they represent
        `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size.
    :param list subtract_mean: `None` or an array-like object of integers or floating point values
        of any shape that is broadcast-compatible with the image shape. The elements of this array will be
        subtracted from the image pixel intensity values. For example, pass a list of three integers
        to perform per-channel mean normalization for color images.
    :param list divide_by_stddev: `None` or an array-like object of non-zero integers or
        floating point values of any shape that is broadcast-compatible with the image shape. The image pixel
        intensity values will be divided by the elements of this array. For example, pass a list
        of three integers to perform per-channel standard deviation normalization for color images.
    :param list swap_channels: Either `False` or a list of integers representing the desired order in which the input
        image channels should be swapped.
    :param list predictors: names of the convolutional layers used as predictors
    :param list hidden_size: number of neurons for the 3 hidden fully-connected layers
    :param bool output_activation: whether to include or not the softplus activation function after the hidden layers
    :param bool lstm: whether to add or not an LSTM cell on top of the hidden layer
    :param bool condense_predictors: whether to condense or not the predictors in a single prediction

    :return model: The Keras SSC300 model.
    """

    n_predictor_layers = len(
        predictors
    )  # The number of predictor conv layers in the network is 6 for the original SSD300.
    l2_reg = l2_regularization  # Make the internal name shorter.
    img_height, img_width, img_channels = image_size[0], image_size[
        1], image_size[2]

    ############################################################################
    # Get a few exceptions out of the way.
    ############################################################################

    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified."
        )
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError(
            "Either `min_scale` and `max_scale` or `scales` need to be specified."
        )
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:  # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(hidden_size) != 3:
        raise ValueError(
            "3 hidden size values must be passed, but {} values were received."
            .format(len(hidden_size)))
    hidden_size = np.array(hidden_size)
    if np.any(hidden_size <= 0):
        raise ValueError(
            "All hidden sizes must be >0, but the sizes given are {}".format(
                hidden_size))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one offset value per predictor layer.")

    ############################################################################
    # Compute the anchor box parameters.
    ############################################################################

    # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Compute the number of boxes to be predicted per cell for each predictor layer.
    # We need this so that we know how many channels the predictor layers need to have.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) +
                               1)  # +1 for the second box for aspect ratio 1
            else:
                n_boxes.append(len(ar))
    else:  # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    ############################################################################
    # Define functions for the Lambda layers below.
    ############################################################################

    def identity_layer(tensor):
        return tensor

    def input_mean_normalization(tensor):
        return tensor - np.array(subtract_mean)

    def input_stddev_normalization(tensor):
        return tensor / np.array(divide_by_stddev)

    def input_channel_swap(tensor):
        if len(swap_channels) == 3:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]]
            ],
                           axis=-1)
        elif len(swap_channels) == 4:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]], tensor[..., swap_channels[3]]
            ],
                           axis=-1)

    ############################################################################
    # Build the network.
    ############################################################################

    x = Input(shape=(img_height, img_width, img_channels))

    # The following identity layer is only needed so that the subsequent lambda layers can be optional.
    x1 = Lambda(identity_layer,
                output_shape=(img_height, img_width, img_channels),
                name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(input_mean_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(input_stddev_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_stddev_normalization')(x1)
    if swap_channels:
        x1 = Lambda(input_channel_swap,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_channel_swap')(x1)

    conv1_1 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_1')(x1)
    conv1_2 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_2')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool1')(conv1_2)

    conv2_1 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_1')(pool1)
    conv2_2 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool2')(conv2_2)

    conv3_1 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_1')(pool2)
    conv3_2 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_2')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_3')(conv3_2)
    pool3 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool3')(conv3_3)

    conv4_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_1')(pool3)
    conv4_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_3')(conv4_2)
    pool4 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool4')(conv4_3)

    conv5_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_1')(pool4)
    conv5_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_2')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_3')(conv5_2)
    pool5 = MaxPooling2D(pool_size=(3, 3),
                         strides=(1, 1),
                         padding='same',
                         name='pool5')(conv5_3)

    fc6 = Conv2D(1024, (3, 3),
                 dilation_rate=(6, 6),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc6')(pool5)

    fc7 = Conv2D(1024, (1, 1),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc7')(fc6)

    conv6_1 = Conv2D(256, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_1')(fc7)
    conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv6_padding')(conv6_1)
    conv6_2 = Conv2D(512, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_2')(conv6_1)

    conv7_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_1')(conv6_2)
    conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv7_padding')(conv7_1)
    conv7_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_2')(conv7_1)

    conv8_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_1')(conv7_2)
    conv8_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_2')(conv8_1)

    conv9_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_1')(conv8_2)
    conv9_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_2')(conv9_1)

    # Feed conv4_3 into the L2 normalization layer
    conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3)

    conv_features = {
        'conv4_3': conv4_3_norm,
        'fc7': fc7,
        'conv6_2': conv6_2,
        'conv7_2': conv7_2,
        'conv8_2': conv8_2,
        'conv9_2': conv9_2
    }
    predictor_layers = []

    ### Build the predictor layers on top of the base network
    for predictor in predictors:
        flatten = Flatten(name='{}_flat'.format(predictor))(
            conv_features[predictor])
        d1 = Dense(hidden_size[0], name='{}_d1'.format(predictor))(flatten)
        d1bn = BatchNormalization(name='{}_bn1'.format(predictor))(d1)
        r1 = Activation(activation='relu',
                        name='{}_r1'.format(predictor))(d1bn)
        d2 = Dense(hidden_size[1], name='{}_d2'.format(predictor))(r1)
        d2bn = BatchNormalization(name='{}_bn2'.format(predictor))(d2)
        r2 = Activation(activation='relu',
                        name='{}_r2'.format(predictor))(d2bn)
        d3 = Dense(hidden_size[2], name='{}_d3'.format(predictor))(r2)
        d3bn = BatchNormalization(name='{}_bn3'.format(predictor))(d3)
        r3 = Activation(activation='relu',
                        name='{}_r3'.format(predictor))(d3bn)
        pred = Dense(n_classes, name='{}_pred'.format(predictor))(r3)
        predictor_layers.append(pred)

    # Concatenate the output of the different predictors
    # Output shape of `predictions`: (batch, n_predictors, n_classes)
    predictions = Concatenate(axis=1, name='predictions1')(predictor_layers)
    if output_activation:
        predictions = Activation(activation='softplus')(predictions)
    if lstm:
        predictions = Reshape((n_predictor_layers, n_classes),
                              name='lstm_predictions_res')(predictions)
        predictions = Bidirectional(LSTM(20, return_sequences=False),
                                    name='lstm_predictions')(predictions)
    if condense_predictors:
        predictions = Dense(n_classes,
                            name='predictions_condensed')(predictions)

    return Model(inputs=x, outputs=predictions)
#get directory of input images and create array of images and store images in the directory to the array
test_dir = "C:/Users/panka/OneDrive/Desktop/Aditya/image data 2018-19/Test_Resized"
#get labels pickle and convert to dataframe then sort by the filename to go along with the images
test_labels_file = "C:/Users/panka/OneDrive/Desktop/Aditya/image data 2018-19/Testing_Input_Resized.pkl"

test_labels = pd.read_pickle(test_labels_file)

test_datagen = ImageDataGenerator(rescale=1./255,preprocessing_function=image_transform)
test_generator = test_datagen.flow_from_dataframe(dataframe=test_labels,directory=test_dir,target_size=(108,192),x_col='Filename',y_col=['Right Ankle x','Right Knee x','Right Hip x','Left Hip x','Left Knee x','Left Ankle x','Pelvis x','Thorax x','Upper Neck x','Head Top x','Right Wrist x','Right Elbow x','Right Shoulder x','Left Shoulder x','Left Elbow x','Left Wrist x','Right Ankle y','Right Knee y','Right Hip y','Left Hip y','Left Knee y','Left Ankle y','Pelvis y','Thorax y','Upper Neck y','Head Top y','Right Wrist y','Right Elbow y','Right Shoulder y','Left Shoulder y','Left Elbow y','Left Wrist y'],class_mode='other',batch_size=8)

#create model
model = Sequential()

#add model layers
model.add(Conv2D(1, kernel_size=1, input_shape=(108,192,3), activation='relu'))
model.add(Lambda(image_transform))
model.add(Conv2D(64, kernel_size=3, activation='relu'))
model.add(Conv2D(64, kernel_size=3, activation='relu'))
model.add(Conv2D(64, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(128, kernel_size=3, activation='relu'))
model.add(Conv2D(128, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(256, kernel_size=3, activation='relu'))
model.add(Conv2D(256, kernel_size=3, activation='relu'))
model.add(Conv2D(256, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(32, activation='relu'))

#compile model using accuracy to measure model performance
def get_cnn_model(image_size,
            n_classes,
            mode='training'):
    n_classes += 1 # 增加一个背景类
    img_height, img_width, img_channels = image_size[0], image_size[1], image_size[2]
    l2_reg = 0.0005 # L2 正则化
    
    ##调整输入
    subtract_mean=[123, 117, 104]
    divide_by_stddev=None
    swap_channels=[2, 1, 0]
    
    #以下四个函数为 Lambda层使用
    def identity_layer(tensor):
        return tensor

    def input_mean_normalization(tensor):
        return tensor - np.array(subtract_mean)

    def input_stddev_normalization(tensor):
        return tensor / np.array(divide_by_stddev)

    def input_channel_swap(tensor):
        if len(swap_channels) == 3:
            return K.stack([tensor[...,swap_channels[0]], tensor[...,swap_channels[1]], tensor[...,swap_channels[2]]], axis=-1)
        elif len(swap_channels) == 4:
            return K.stack([tensor[...,swap_channels[0]], tensor[...,swap_channels[1]], tensor[...,swap_channels[2]], tensor[...,swap_channels[3]]], axis=-1)
    
    x = Input(shape=(img_height, img_width, img_channels))

    # Tidentity_layer 可选
    x1 = Lambda(identity_layer, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(input_mean_normalization, output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(input_stddev_normalization, output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1)
    if swap_channels:
        x1 = Lambda(input_channel_swap, output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1)
        
    ##改变后的VGG16的实现
    conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_1')(x1)
    conv1_2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_2')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(conv1_2)

    conv2_1 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_1')(pool1)
    conv2_2 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(conv2_2)

    conv3_1 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_1')(pool2)
    conv3_2 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_2')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_3')(conv3_2)
    pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(conv3_3)

    conv4_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_1')(pool3)
    conv4_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3')(conv4_2)
    pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4')(conv4_3)

    conv5_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_1')(pool4)
    conv5_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_2')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_3')(conv5_2)
    pool5 = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same', name='pool5')(conv5_3)


    flatten = Flatten(pool5) 
    fc6 = Dense(512, activation='relu', name='fc6')(flatten)
    dropout = Dropout(0.5)(fc6)
    fc7 = Dense(n_classes, activation='softmax', name='fc6')(dropout)

    model = Model(inputs=x, outputs=fc7)
    return model
Esempio n. 29
0
    def create_model(self):
        self._set_model_params()
        act = 'relu'
        input_data = Input(name='the_input',
                           shape=self.input_shape,
                           dtype='float32')
        inner = Convolution2D(self.conv_num_filters,
                              self.filter_size,
                              self.filter_size,
                              border_mode='same',
                              activation=act,
                              name='conv1')(input_data)

        inner = MaxPooling2D(pool_size=(self.pool_size_1, self.pool_size_1),
                             name='max1')(inner)
        inner = Convolution2D(self.conv_num_filters,
                              self.filter_size,
                              self.filter_size,
                              border_mode='same',
                              activation=act,
                              name='conv2')(inner)
        inner = MaxPooling2D(pool_size=(self.pool_size_2, self.pool_size_2),
                             name='max2')(inner)
        conv_to_rnn_dims = (int(
            (self.img_h /
             (self.pool_size_1 * self.pool_size_2)) * self.conv_num_filters),
                            int(self.img_w /
                                (self.pool_size_1 * self.pool_size_2)))
        inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
        inner = Permute(dims=(2, 1), name='permute')(inner)

        # cuts down input size going into RNN:
        inner = TimeDistributed(
            Dense(self.time_dense_size, activation=act, name='dense1'))(inner)

        # Two layers of bidirecitonal GRUs
        # GRU seems to work as well, if not better than LSTM:
        gru_1 = GRU(self.rnn_size, return_sequences=True, name='gru1')(inner)
        gru_1b = GRU(self.rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     name='gru1_b')(inner)
        gru1_merged = merge([gru_1, gru_1b], mode='sum')
        gru_2 = GRU(self.rnn_size, return_sequences=True,
                    name='gru2')(gru1_merged)
        gru_2b = GRU(self.rnn_size, return_sequences=True,
                     go_backwards=True)(gru1_merged)

        # transforms RNN output to character activations:
        inner = TimeDistributed(Dense(self.output_size,
                                      name='dense2'))(merge([gru_2, gru_2b],
                                                            mode='concat'))

        y_pred = Activation('softmax', name='softmax')(inner)
        # Model(input=[input_data], output=y_pred).summary()
        labels = Input(name='the_labels',
                       shape=[self.absolute_max_string_len],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer
        loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name="ctc")(
            [y_pred, labels, input_length, label_length])
        lr = 0.03
        # clipnorm seems to speeds up convergence
        clipnorm = 5
        sgd = SGD(lr=lr,
                  decay=3e-7,
                  momentum=0.9,
                  nesterov=True,
                  clipnorm=clipnorm)
        model = Model(input=[input_data, labels, input_length, label_length],
                      output=[loss_out])
        # model.summary()
        # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
        if self.weight_file is not None:
            model.load_weights(self.weight_file)

        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=sgd)
        self.model = model

        self._predictor = K.function([input_data], [y_pred])

        return model
Esempio n. 30
0
def cnn_rnn(nb_words, EMBEDDING_DIM, \
            embedding_matrix, MAX_SEQUENCE_LENGTH, \
            num_rnn, num_dense, rate_drop_rnn, \
            rate_drop_dense, act):
    '''
    This is the basic cnn rnn model 

    model: input layer; embedding layer; cnn based attention layer; rnn layer; dense layer; output layer
    '''

    embedding_layer = Embedding(nb_words,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=False)

    rnn_layer = Bidirectional(
        GRU(num_rnn, dropout=rate_drop_rnn, recurrent_dropout=rate_drop_rnn))
    cnn_layer = Conv1D(activation="relu",
                       padding="valid",
                       strides=1,
                       filters=128,
                       kernel_size=2)
    # cnn_layer1 = Conv1D(activation="relu", padding="valid", strides=1, filters=64, kernel_size=4)
    pooling_layer = GlobalMaxPooling1D()
    cnn_dense = Dense(300)
    cnn_dropout1 = Dropout(0.35)
    cnn_dropout2 = Dropout(0.35)
    cnn_batchnormalization = BatchNormalization()
    cnn_repeatvector = RepeatVector(EMBEDDING_DIM)
    cnn_dense1 = Dense(300)
    cnn_timedistributed = TimeDistributed(Dense(1))

    sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
    embedded_sequences_1 = embedding_layer(sequence_1_input)

    sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
    embedded_sequences_2 = embedding_layer(sequence_2_input)

    cnn_1 = cnn_layer(embedded_sequences_1)
    # cnn_1 = cnn_layer1(cnn_1)
    cnn_1 = pooling_layer(cnn_1)
    cnn_1 = cnn_dropout1(cnn_1)
    cnn_1 = cnn_dense(cnn_1)
    cnn_1 = cnn_dropout2(cnn_1)
    cnn_1 = cnn_batchnormalization(cnn_1)

    cnn_2 = cnn_layer(embedded_sequences_2)
    # cnn_2 = cnn_layer1(cnn_2)
    cnn_2 = pooling_layer(cnn_2)
    cnn_2 = cnn_dropout1(cnn_2)
    cnn_2 = cnn_dense(cnn_2)
    cnn_2 = cnn_dropout2(cnn_2)
    cnn_2 = cnn_batchnormalization(cnn_2)

    # cnn_1 = cnn_repeatvector(cnn_1)
    # cnn_2 = cnn_repeatvector(cnn_2)

    cnn_1_t = cnn_dense1(cnn_1)
    cnn_2_t = cnn_dense1(cnn_2)

    # cnn_1_t = cnn_timedistributed(cnn_1)
    # cnn_2_t = cnn_timedistributed(cnn_2)

    # cnn_1_t = Permute([2, 1])(cnn_1_t)
    # cnn_2_t = Permute([2, 1])(cnn_2_t)

    a1 = multiply([cnn_1_t, embedded_sequences_1])
    a2 = multiply([cnn_2_t, embedded_sequences_2])

    a1 = Permute([2, 1])(a1)
    a2 = Permute([2, 1])(a2)

    a1 = Lambda(lambda x: K.sum(x, axis=1))(a1)
    a2 = Lambda(lambda x: K.sum(x, axis=1))(a2)

    a1 = Activation('softmax')(a1)
    a2 = Activation('softmax')(a2)

    embedded_sequences_1 = Permute([2, 1])(embedded_sequences_1)
    embedded_sequences_2 = Permute([2, 1])(embedded_sequences_2)

    x1 = multiply([a1, embedded_sequences_1])
    x2 = multiply([a2, embedded_sequences_2])

    x1 = Permute([2, 1])(x1)
    x2 = Permute([2, 1])(x2)

    x1 = rnn_layer(x1)
    x2 = rnn_layer(x2)

    merged = multiply([x1, x2])
    merged = Dropout(rate_drop_dense)(merged)
    merged = BatchNormalization()(merged)

    merged = Dense(num_dense, activation=act)(merged)
    merged = Dropout(rate_drop_dense)(merged)
    merged = BatchNormalization()(merged)

    preds = Dense(3, activation='softmax')(merged)

    # x1 = TimeDistributed(Dense(EMBEDDING_DIM, activation='relu'))(embedded_sequences_1)
    # x1 = Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, ))(x1)

    # y1 = TimeDistributed(Dense(EMBEDDING_DIM, activation='relu'))(embedded_sequences_2)
    # y1 = Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, ))(y1)

    ########################################
    ## train the model
    ########################################
    model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds)
    model.compile(loss='categorical_crossentropy',
                  optimizer='nadam',
                  metrics=['acc'])
    model.summary()
    # print(STAMP)
    return model