Esempio n. 1
0
    def build(self):
        assert self.config['question_len'] == self.config['answer_len']

        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # cnn
        cnns = [Convolution1D(filter_length=filter_length,
                              nb_filter=500,
                              activation='tanh',
                              border_mode='same') for filter_length in [2, 3, 5, 7]]
        question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat')
        answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat')

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        enc = Dense(100, activation='tanh')
        question_pool = enc(maxpool(question_cnn))
        answer_pool = enc(maxpool(answer_cnn))

        return question_pool, answer_pool
Esempio n. 2
0
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              # mask_zero=True,
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # question rnn part
        f_rnn = LSTM(141, return_sequences=True, consume_less='mem')
        b_rnn = LSTM(141, return_sequences=True, consume_less='mem', go_backwards=True)
        question_f_rnn = f_rnn(question_embedding)
        question_b_rnn = b_rnn(question_embedding)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = merge([maxpool(question_f_rnn), maxpool(question_b_rnn)], mode='concat', concat_axis=-1)

        # answer rnn part
        from attention_lstm import AttentionLSTMWrapper
        f_rnn = AttentionLSTMWrapper(f_rnn, question_pool, single_attention_param=True)
        b_rnn = AttentionLSTMWrapper(b_rnn, question_pool, single_attention_param=True)

        answer_f_rnn = f_rnn(answer_embedding)
        answer_b_rnn = b_rnn(answer_embedding)
        answer_pool = merge([maxpool(answer_f_rnn), maxpool(answer_b_rnn)], mode='concat', concat_axis=-1)

        return question_pool, answer_pool
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        f_rnn = LSTM(141, return_sequences=True, implementation=1)
        b_rnn = LSTM(141, return_sequences=True, implementation=1, go_backwards=True)

        qf_rnn = f_rnn(question_embedding)
        qb_rnn = b_rnn(question_embedding)
        # question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1)
        question_pool = concatenate([qf_rnn, qb_rnn], axis=-1)

        af_rnn = f_rnn(answer_embedding)
        ab_rnn = b_rnn(answer_embedding)
        # answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1)
        answer_pool = concatenate([af_rnn, ab_rnn], axis=-1)

        # cnn
        cnns = [Conv1D(kernel_size=kernel_size,
                       filters=500,
                       activation='tanh',
                       padding='same') for kernel_size in [1, 2, 3, 5]]
        # question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat')
        question_cnn = concatenate([cnn(question_pool) for cnn in cnns], axis=-1)
        # answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')
        answer_cnn = concatenate([cnn(answer_pool) for cnn in cnns], axis=-1)

        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        return question_pool, answer_pool
Esempio n. 4
0
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              mask_zero=True,
                              # dropout=0.2,
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = maxpool(question_embedding)
        answer_pool = maxpool(answer_embedding)

        return question_pool, answer_pool
    def build(self):
        assert self.config['question_len'] == self.config['answer_len']

        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        hidden_layer = TimeDistributed(Dense(200, activation='tanh'))

        question_hl = hidden_layer(question_embedding)
        answer_hl = hidden_layer(answer_embedding)

        # cnn
        cnns = [Conv1D(kernel_size=kernel_size,
                       filters=1000,
                       activation='tanh',
                       padding='same') for kernel_size in [2, 3, 5, 7]]
        # question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat')
        question_cnn = concatenate([cnn(question_hl) for cnn in cnns], axis=-1)
        # answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat')
        answer_cnn = concatenate([cnn(answer_hl) for cnn in cnns], axis=-1)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        # enc = Dense(100, activation='tanh')
        # question_pool = enc(maxpool(question_cnn))
        # answer_pool = enc(maxpool(answer_cnn))
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        return question_pool, answer_pool
    def build(self):
        question = self.question
        answer = self.get_answer()

        rnn_model = get_model(question_maxlen=self.model_params.get('question_len', 20),
                              answer_maxlen=self.model_params.get('question_len', 60),
                              vocab_len=self.config['n_words'], n_hidden=256, load_save=True)
        rnn_model.trainable = False

        answer_inverted = rnn_model(answer)
        argmax = Lambda(lambda x: K.argmax(x, axis=2), output_shape=lambda x: (x[0], x[1]))
        argmax.trainable = False
        answer_argmax = argmax(answer_inverted)

        # add embedding layers
        weights = self.model_params.get('initial_embed_weights', None)
        weights = weights if weights is None else [weights]
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=self.model_params.get('n_embed_dims', 100),
                              # W_regularizer=regularizers.activity_l1(1e-4),
                              W_constraint=constraints.nonneg(),
                              dropout=0.5,
                              weights=weights,
                              mask_zero=True)
        question_embedding = embedding(question)
        answer_embedding = embedding(answer_argmax)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        question_maxpool = maxpool(question_embedding)
        answer_maxpool = maxpool(answer_embedding)

        # activation
        activation = Activation('linear')
        question_output = activation(question_maxpool)
        answer_output = activation(answer_maxpool)

        return question_output, answer_output
Esempio n. 7
0
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        f_rnn = LSTM(141, return_sequences=True, consume_less='mem')
        b_rnn = LSTM(141, return_sequences=True, consume_less='mem')

        qf_rnn = f_rnn(question_embedding)
        qb_rnn = b_rnn(question_embedding)
        question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1)

        af_rnn = f_rnn(answer_embedding)
        ab_rnn = b_rnn(answer_embedding)
        answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1)

        # cnn
        cnns = [Convolution1D(filter_length=filter_length,
                          nb_filter=500,
                          activation='tanh',
                          border_mode='same') for filter_length in [1, 2, 3, 5]]
        question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat')
        answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')

        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        return question_pool, answer_pool
Esempio n. 8
0
        # block = BatchNormalization()(block)
        block = Dropout(0.1)(block)
        if pool_length[i]:
            block = MaxPooling1D(pool_length=pool_length[i])(block)

    block = Lambda(max_1d, output_shape=(nb_filter[-1],))(block)
    block = Dense(128, activation='relu')(block)
    return block

max_features = len(chars) + 1
char_embedding = 40

document = Input(shape=(max_sentences, maxlen), dtype='int64')
in_sentence = Input(shape=(maxlen, ), dtype='int64')

embedded = Lambda(binarize, output_shape=binarize_outshape)(in_sentence)

block2 = char_block(embedded, [100, 200, 200], filter_length=[5, 3, 3], subsample=[1, 1, 1], pool_length=[2, 2, 2])
block3 = char_block(embedded, [200, 300, 300], filter_length=[7, 3, 3], subsample=[1, 1, 1], pool_length=[2, 2, 2])

sent_encode = merge([block2, block3], mode='concat', concat_axis=-1)
sent_encode = Dropout(0.4)(sent_encode)

encoder = Model(input=in_sentence, output=sent_encode)
encoded = TimeDistributed(encoder)(document)

lstm_h = 80
forwards = LSTM(lstm_h, return_sequences=False, dropout_W=0.15, dropout_U=0.15,
                consume_less='gpu')(encoded)
backwards = LSTM(lstm_h, return_sequences=False, dropout_W=0.15, dropout_U=0.15,
                 consume_less='gpu', go_backwards=True)(encoded)
Esempio n. 9
0
from dlnn.tests.ml.conv_test import layer_1_conv, layer_3_conv, layer_6_conv
from dlnn.tests.ml.elm_func_test import layer_9_flatten, layer_10_a_dense, layer_12_a_dense, layer_10_b_dense, \
    layer_12_b_dense, layer_10_c_dense, layer_12_c_dense, layer_13_concatenate, layer_14_reshape, \
    layer_15_merge_categorical
from dlnn.tests.ml.pooling_test import layer_5_pool, layer_8_pool
from dlnn.tests.ml.repos_helper import corpus_data, label_init, corpus_label, normalized, corr_step_1, corr_step_2, \
    corr_step_3, corr_step_4, corr_step_5, corr_step_6, corr_step_7, corr_step_8, corr_step_8_full, corr_step_9, \
    corr_step_10_a_bias_init, corr_step_10_a_kernel_init, corr_step_10_b_kernel_init, corr_step_10_b_bias_init, \
    corr_step_10_c_bias_init, corr_step_10_c_kernel_init, corr_step_10_a, corr_step_10_b, corr_step_10_c, \
    corr_step_11_a, corr_step_11_b, corr_step_11_c
from dlnn.tests.ml.testcase import TestCase
from dlnn.util import to_categorical
from dlnn.util.Initializers import Unifinv

inputs = Input(shape=(corpus_data.shape[-1], ))
scale = Lambda(lambda x: x * 1.0 / 300.0)(inputs)
reshape = Reshape([1, 1, 4])(scale)
tile = Lambda(lambda x: K.tile(x, (1, 1, 4, 1)))(reshape)
step_1_conv = layer_1_conv()(tile)
step_2_activation = layer_2_activation()(step_1_conv)
step_3_conv = layer_3_conv()(step_2_activation)
step_4_activation = layer_4_activation()(step_3_conv)
step_5_pool = layer_5_pool(2)(step_4_activation)
step_6_conv = layer_6_conv()(step_5_pool)
step_7_activation = layer_7_activation()(step_6_conv)
step_8_pool = layer_8_pool(1)(step_7_activation)
step_9_flatten = layer_9_flatten()(step_8_pool)
step_10_a_dense = layer_10_a_dense()(step_9_flatten)
step_11_a_activation = layer_11_a_activation()(step_10_a_dense)
step_12_a_dense = layer_12_a_dense()(step_11_a_activation)
step_10_b_dense = layer_10_b_dense()(step_9_flatten)
Esempio n. 10
0
    def __init__(self,
                 image_size,
                 channels,
                 conv_layers,
                 feature_maps,
                 filter_shapes,
                 strides,
                 dense_layers,
                 dense_neurons,
                 dense_dropouts,
                 latent_dim,
                 activation='relu',
                 eps_mean=0.0,
                 eps_std=1.0):

        self.history = LossHistory()

        # check that arguments are proper length;
        if len(filter_shapes) != conv_layers:
            raise Exception(
                "number of convolutional layers must equal length of filter_shapes list"
            )
        if len(strides) != conv_layers:
            raise Exception(
                "number of convolutional layers must equal length of strides list"
            )
        if len(feature_maps) != conv_layers:
            raise Exception(
                "number of convolutional layers must equal length of feature_maps list"
            )
        if len(dense_neurons) != dense_layers:
            raise Exception(
                "number of dense layers must equal length of dense_neurons list"
            )
        if len(dense_dropouts) != dense_layers:
            raise Exception(
                "number of dense layers must equal length of dense_dropouts list"
            )

        # even shaped filters may cause problems in theano backend;
        even_filters = [
            f for pair in filter_shapes for f in pair if f % 2 == 0
        ]
        if K.image_dim_ordering() == 'th' and len(even_filters) > 0:
            warnings.warn(
                'Even shaped filters may cause problems in Theano backend')
        if K.image_dim_ordering(
        ) == 'channels_first' and len(even_filters) > 0:
            warnings.warn(
                'Even shaped filters may cause problems in Theano backend')

        self.eps_mean = eps_mean
        self.eps_std = eps_std
        self.image_size = image_size

        # define input layer;
        if K.image_dim_ordering() == 'th' or K.image_dim_ordering(
        ) == 'channels_first':
            self.input = Input(shape=(channels, image_size[0], image_size[1]))
        else:
            self.input = Input(shape=(image_size[0], image_size[1], channels))

        # define convolutional encoding layers;
        self.encode_conv = []
        layer = Convolution2D(feature_maps[0],
                              filter_shapes[0],
                              padding='same',
                              activation=activation,
                              strides=strides[0])(self.input)
        self.encode_conv.append(layer)
        for i in range(1, conv_layers):
            layer = Convolution2D(feature_maps[i],
                                  filter_shapes[i],
                                  padding='same',
                                  activation=activation,
                                  strides=strides[i])(self.encode_conv[i - 1])
            self.encode_conv.append(layer)

        # define dense encoding layers;
        self.flat = Flatten()(self.encode_conv[-1])
        self.encode_dense = []
        layer = Dense(dense_neurons[0], activation=activation)(Dropout(
            dense_dropouts[0])(self.flat))
        self.encode_dense.append(layer)
        for i in range(1, dense_layers):
            layer = Dense(dense_neurons[i], activation=activation)(Dropout(
                dense_dropouts[i])(self.encode_dense[i - 1]))
            self.encode_dense.append(layer)

        # define embedding layer;
        self.z_mean = Dense(latent_dim)(self.encode_dense[-1])
        self.z_log_var = Dense(latent_dim)(self.encode_dense[-1])
        self.z = Lambda(self._sampling, output_shape=(latent_dim, ))(
            [self.z_mean, self.z_log_var])

        # save all decoding layers for generation model;
        self.all_decoding = []

        # define dense decoding layers;
        self.decode_dense = []
        layer = Dense(dense_neurons[-1], activation=activation)
        self.all_decoding.append(layer)
        self.decode_dense.append(layer(self.z))
        for i in range(1, dense_layers):
            layer = Dense(dense_neurons[-i - 1], activation=activation)
            self.all_decoding.append(layer)
            self.decode_dense.append(layer(self.decode_dense[i - 1]))

        # dummy model to get image size after encoding convolutions;
        self.decode_conv = []
        if K.image_dim_ordering() == 'th' or K.image_dim_ordering(
        ) == 'channels_first':
            dummy_input = np.ones((1, channels, image_size[0], image_size[1]))
        else:
            dummy_input = np.ones((1, image_size[0], image_size[1], channels))
        dummy = Model(self.input, self.encode_conv[-1])
        conv_size = dummy.predict(dummy_input).shape
        layer = Dense(conv_size[1] * conv_size[2] * conv_size[3],
                      activation=activation)
        self.all_decoding.append(layer)
        self.decode_dense.append(layer(self.decode_dense[-1]))
        reshape = Reshape(conv_size[1:])
        self.all_decoding.append(reshape)
        self.decode_conv.append(reshape(self.decode_dense[-1]))

        # define deconvolutional decoding layers;
        for i in range(1, conv_layers):
            if K.image_dim_ordering() == 'th' or K.image_dim_ordering(
            ) == 'channels_first':
                dummy_input = np.ones(
                    (1, channels, image_size[0], image_size[1]))
            else:
                dummy_input = np.ones(
                    (1, image_size[0], image_size[1], channels))
            dummy = Model(self.input, self.encode_conv[-i - 1])
            conv_size = list(dummy.predict(dummy_input).shape)

            if K.image_dim_ordering() == 'th' or K.image_dim_ordering(
            ) == 'channels_first':
                conv_size[1] = feature_maps[-i]
            else:
                conv_size[3] = feature_maps[-i]

            layer = Conv2DTranspose(feature_maps[-i - 1],
                                    filter_shapes[-i],
                                    padding='same',
                                    activation=activation,
                                    strides=strides[-i])
            self.all_decoding.append(layer)
            self.decode_conv.append(layer(self.decode_conv[i - 1]))

        layer = Conv2DTranspose(channels,
                                filter_shapes[0],
                                padding='same',
                                activation='sigmoid',
                                strides=strides[0])
        self.all_decoding.append(layer)
        self.output = layer(self.decode_conv[-1])

        # build model;
        self.model = Model(self.input, self.output)
        self.optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
        self.model.compile(optimizer=self.optimizer, loss=self._vae_loss)
        #         print "model summary:"
        #         self.model.summary()

        # model for embeddings;
        self.embedder = Model(self.input, self.z_mean)

        # model for generation;
        self.decoder_input = Input(shape=(latent_dim, ))
        self.generation = []
        self.generation.append(self.all_decoding[0](self.decoder_input))
        for i in range(1, len(self.all_decoding)):
            self.generation.append(self.all_decoding[i](self.generation[i -
                                                                        1]))
        self.generator = Model(self.decoder_input, self.generation[-1])
 def f(x):
     if use_prelu:
         x = PReLU()(x)
     else:
         x = Lambda(relu)(x)
     return x
Esempio n. 12
0
def get_siamese_model(input_shape):
    """
        Model architecture
    """
    def initialize_weights():
        return 'glorot_uniform'

    def initialize_bias():
        return 'zeros'

    # Define the tensors for the two input images
    left_input = Input(input_shape)
    right_input = Input(input_shape)
    # Convolutional Neural Network
    model = Sequential()
    model.add(
        Conv2D(64, (10, 10),
               activation='relu',
               input_shape=input_shape,
               kernel_initializer=initialize_weights(),
               kernel_regularizer=regularizers.l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(
        Conv2D(128, (7, 7),
               activation='relu',
               kernel_initializer=initialize_weights(),
               bias_initializer=initialize_bias(),
               kernel_regularizer=regularizers.l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(
        Conv2D(128, (4, 4),
               activation='relu',
               kernel_initializer=initialize_weights(),
               bias_initializer=initialize_bias(),
               kernel_regularizer=regularizers.l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(
        Conv2D(256, (4, 4),
               activation='relu',
               kernel_initializer=initialize_weights(),
               bias_initializer=initialize_bias(),
               kernel_regularizer=regularizers.l2(2e-4)))
    model.add(Flatten())
    model.add(Dense(4096, activation='sigmoid'))

    # Generate the encodings (feature vectors) for the two images
    encoded_l = model(left_input)
    encoded_r = model(right_input)

    # Add a customized layer to compute the absolute difference between the encodings
    L1_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))
    L1_distance = L1_layer([encoded_l, encoded_r])

    # Add a dense layer with a sigmoid unit to generate the similarity score
    prediction = Dense(1,
                       activation='sigmoid',
                       bias_initializer=initialize_bias)(L1_distance)

    # Connect the inputs with the outputs
    siamese_net = Model(inputs=[left_input, right_input], outputs=prediction)

    # return the model
    return siamese_net
        plt.imshow(original_image)
        plt.savefig('./examples/original_image.png')
        flipped_image = cv2.flip(image,1)
        plt.figure()
        plt.imshow(flipped_image)
        plt.savefig('./examples/flipped_image.png')
        
    image_taken=image_taken+1
    
#get the training data
X_train = np.array(argumented_images)
y_train = np.array(argumented_steering_angles)

#buid ing the model
model = Sequential()
model.add(Lambda(lambda x: x / 255.0 - 0.5, input_shape=(160,320,3)))
model.add(Cropping2D(cropping=((70,25),(0,0))))

model.add(Convolution2D(24,5,5,subsample=(2,2),activation="relu"))
model.add(Convolution2D(36,5,5,subsample=(2,2),activation="relu"))
model.add(Convolution2D(48,5,5,subsample=(2,2),activation="relu"))

model.add(Convolution2D(64,3,3,activation="relu"))
model.add(Convolution2D(64,3,3,activation="relu"))


model.add(Flatten())
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dense(50))
model.add(Activation('relu'))
Esempio n. 14
0
def create_model(filter_kernels, dense_outputs, maxlen, vocab_size, nb_filter,
                 cat_output):
    initializer = RandomNormal(mean=0.0, stddev=0.05, seed=None)

    # Define what the input shape looks like
    inputs = Input(shape=(maxlen, ), dtype='int64')

    # Option one:
    # Uncomment following code to use a lambda layer to create a onehot encoding of a sequence of characters on the fly.
    # Holding one-hot encodings in memory is very inefficient.
    # The output_shape of embedded layer will be: batch x maxlen x vocab_size
    #
    import tensorflow as tf

    def one_hot(x):
        return tf.one_hot(x,
                          vocab_size,
                          on_value=1.0,
                          off_value=0.0,
                          axis=-1,
                          dtype=tf.float32)

    def one_hot_outshape(in_shape):
        return in_shape[0], in_shape[1], vocab_size

    embedded = Lambda(one_hot, output_shape=one_hot_outshape)(inputs)

    # Option two:
    # Or, simply use Embedding layer as following instead of use lambda to create one-hot layer
    # Think of it as a one-hot embedding and a linear layer mashed into a single layer.
    # See discussion here: https://github.com/keras-team/keras/issues/4838
    # Note this will introduce one extra layer of weights (of size vocab_size x vocab_size = 69*69 = 4761)
    # embedded = Embedding(input_dim=vocab_size, output_dim=vocab_size)(inputs)

    # All the convolutional layers...
    conv = Convolution1D(filters=nb_filter,
                         kernel_size=filter_kernels[0],
                         kernel_initializer=initializer,
                         padding='valid',
                         activation='relu',
                         input_shape=(maxlen, vocab_size),
                         name='Conv1')(embedded)
    conv = MaxPooling1D(pool_size=3, name='MaxPool1')(conv)

    conv1 = Convolution1D(filters=nb_filter,
                          kernel_size=filter_kernels[1],
                          kernel_initializer=initializer,
                          padding='valid',
                          activation='relu',
                          name='Conv2')(conv)
    conv1 = MaxPooling1D(pool_size=3, name='MaxPool2')(conv1)

    conv2 = Convolution1D(filters=nb_filter,
                          kernel_size=filter_kernels[2],
                          kernel_initializer=initializer,
                          padding='valid',
                          activation='relu',
                          name='Conv3')(conv1)

    conv3 = Convolution1D(filters=nb_filter,
                          kernel_size=filter_kernels[3],
                          kernel_initializer=initializer,
                          padding='valid',
                          activation='relu',
                          name='Conv4')(conv2)

    conv4 = Convolution1D(filters=nb_filter,
                          kernel_size=filter_kernels[4],
                          kernel_initializer=initializer,
                          padding='valid',
                          activation='relu',
                          name='Conv5')(conv3)

    conv5 = Convolution1D(filters=nb_filter,
                          kernel_size=filter_kernels[5],
                          kernel_initializer=initializer,
                          padding='valid',
                          activation='relu',
                          name='Conv6')(conv4)
    conv5 = MaxPooling1D(pool_size=3, name='MaxPool3')(conv5)
    k = 40

    # K-max pooling
    def kmax_outshape(in_shape):
        return (in_shape[0], in_shape[2] * k)

    def KMaxPooling(inputs):
        # swap last two dimensions since top_k will be applied along the last dimension
        shifted_input = tf.transpose(inputs, [0, 2, 1])
        # extract top_k, returns two tensors [values, indices]
        top_k = tf.nn.top_k(shifted_input, k=k, sorted=True, name='TopK')[0]
        return top_k

    # conv5 = Lambda(KMaxPooling, output_shape=kmax_outshape)(conv5)
    conv5 = Flatten()(conv5)

    # Two dense layers with dropout of .5
    z = Dropout(0.5)(Dense(dense_outputs, activation='relu')(conv5))
    z = Dropout(0.5)(Dense(dense_outputs, activation='relu')(z))

    # Output dense layer with softmax activation
    pred = Dense(cat_output, activation='softmax', name='output')(z)

    model = Model(inputs=inputs, outputs=pred)
    print(model.summary())
    sgd = SGD(lr=0.01, momentum=0.9)
    adam = Adam(
        lr=0.001
    )  # Feel free to use SGD above. I found Adam with lr=0.001 is faster than SGD with lr=0.01
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'])

    return model
def build_model(image_size,
                n_classes,
                mode='training',
                l2_regularization=0.0,
                min_scale=0.1,
                max_scale=0.9,
                scales=None,
                aspect_ratios_global=[0.5, 1.0, 2.0],
                aspect_ratios_per_layer=None,
                two_boxes_for_ar1=True,
                steps=None,
                offsets=None,
                clip_boxes=False,
                variances=[1.0, 1.0, 1.0, 1.0],
                coords='centroids',
                normalize_coords=False,
                subtract_mean=None,
                divide_by_stddev=None,
                swap_channels=False,
                confidence_thresh=0.01,
                iou_threshold=0.45,
                top_k=200,
                nms_max_output_size=400,
                return_predictor_sizes=False):

    n_predictor_layers = 3  # The number of predictor conv layers in the network
    n_classes += 1  # Account for the background class.
    l2_reg = l2_regularization  # Make the internal name shorter.
    img_height, img_width, img_channels = image_size[0], image_size[
        1], image_size[2]

    ############################################################################
    # Get a few exceptions out of the way.
    ############################################################################

    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified."
        )
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError(
            "Either `min_scale` and `max_scale` or `scales` need to be specified."
        )
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:  # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(
            variances
    ) != 4:  # We need one variance value for each of the four box coordinates
        raise ValueError(
            "4 variance values must be pased, but {} values were received.".
            format(len(variances)))
    variances = np.array(variances)
    if np.any(variances <= 0):
        raise ValueError(
            "All variances must be >0, but the variances given are {}".format(
                variances))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one offset value per predictor layer.")

    ############################################################################
    # Compute the anchor box parameters.
    ############################################################################

    # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Compute the number of boxes to be predicted per cell for each predictor layer.
    # We need this so that we know how many channels the predictor layers need to have.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) +
                               1)  # +1 for the second box for aspect ratio 1
            else:
                n_boxes.append(len(ar))
    else:  # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    ############################################################################
    # Define functions for the Lambda layers below.
    ############################################################################

    def identity_layer(tensor):
        return tensor

    def input_mean_normalization(tensor):
        return tensor - np.array(subtract_mean)

    def input_stddev_normalization(tensor):
        return tensor / np.array(divide_by_stddev)

    def input_channel_swap(tensor):
        if len(swap_channels) == 3:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]]
            ],
                           axis=-1)
        elif len(swap_channels) == 4:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]], tensor[..., swap_channels[3]]
            ],
                           axis=-1)

    ############################################################################
    # Build the network.
    ############################################################################

    x = Input(shape=(img_height, img_width, img_channels))

    # The following identity layer is only needed so that the subsequent lambda layers can be optional.
    x1 = Lambda(identity_layer,
                output_shape=(img_height, img_width, img_channels),
                name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(input_mean_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(input_stddev_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_stddev_normalization')(x1)
    if swap_channels:
        x1 = Lambda(input_channel_swap,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_channel_swap')(x1)

    conv1 = Conv2D(32, (5, 5),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv1')(x1)
    conv1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')(
        conv1
    )  # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
    conv1 = ELU(name='elu1')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1)

    conv2 = Conv2D(48, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv2')(pool1)
    conv2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(conv2)
    conv2 = ELU(name='elu2')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2)

    conv3 = Conv2D(64, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv3')(pool2)
    conv3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(conv3)
    conv3 = ELU(name='elu3')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3)

    conv4 = Conv2D(64, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv4')(pool3)
    conv4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4)
    conv4 = ELU(name='elu4')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(conv4)

    conv5 = Conv2D(48, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv5')(pool4)
    conv5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5)
    conv5 = ELU(name='elu5')(conv5)
    pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(conv5)

    conv6 = Conv2D(32, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv6')(pool5)
    conv6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6)
    conv6 = ELU(name='elu6')(conv6)

    classes4 = Conv2D(n_boxes[0] * n_classes, (3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='classes4')(conv4)
    classes5 = Conv2D(n_boxes[0] * n_classes, (3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='classes5')(conv5)
    classes6 = Conv2D(n_boxes[1] * n_classes, (3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='classes6')(conv6)

    boxes4 = Conv2D(n_boxes[0] * 4, (3, 3),
                    strides=(1, 1),
                    padding="same",
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(l2_reg),
                    name='boxes4')(conv4)
    boxes5 = Conv2D(n_boxes[0] * 4, (3, 3),
                    strides=(1, 1),
                    padding="same",
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(l2_reg),
                    name='boxes5')(conv5)
    boxes6 = Conv2D(n_boxes[1] * 4, (3, 3),
                    strides=(1, 1),
                    padding="same",
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(l2_reg),
                    name='boxes6')(conv6)

    # Generate the anchor boxes
    anchors4 = AnchorBoxes(img_height,
                           img_width,
                           this_scale=scales[0],
                           next_scale=scales[1],
                           aspect_ratios=aspect_ratios[0],
                           two_boxes_for_ar1=two_boxes_for_ar1,
                           this_steps=steps[0],
                           this_offsets=offsets[0],
                           clip_boxes=clip_boxes,
                           variances=variances,
                           coords=coords,
                           normalize_coords=normalize_coords,
                           name='anchors4')(boxes4)
    anchors5 = AnchorBoxes(img_height,
                           img_width,
                           this_scale=scales[0],
                           next_scale=scales[1],
                           aspect_ratios=aspect_ratios[0],
                           two_boxes_for_ar1=two_boxes_for_ar1,
                           this_steps=steps[0],
                           this_offsets=offsets[0],
                           clip_boxes=clip_boxes,
                           variances=variances,
                           coords=coords,
                           normalize_coords=normalize_coords,
                           name='anchors5')(boxes5)
    anchors6 = AnchorBoxes(img_height,
                           img_width,
                           this_scale=scales[1],
                           next_scale=scales[2],
                           aspect_ratios=aspect_ratios[1],
                           two_boxes_for_ar1=two_boxes_for_ar1,
                           this_steps=steps[1],
                           this_offsets=offsets[1],
                           clip_boxes=clip_boxes,
                           variances=variances,
                           coords=coords,
                           normalize_coords=normalize_coords,
                           name='anchors6')(boxes6)

    # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)`
    # We want the classes isolated in the last axis to perform softmax on them
    classes4_reshaped = Reshape((-1, n_classes),
                                name='classes4_reshape')(classes4)
    classes5_reshaped = Reshape((-1, n_classes),
                                name='classes5_reshape')(classes5)
    classes6_reshaped = Reshape((-1, n_classes),
                                name='classes6_reshape')(classes6)

    # Reshape the box coordinate predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)`
    # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss
    boxes4_reshaped = Reshape((-1, 4), name='boxes4_reshape')(boxes4)
    boxes5_reshaped = Reshape((-1, 4), name='boxes5_reshape')(boxes5)
    boxes6_reshaped = Reshape((-1, 4), name='boxes6_reshape')(boxes6)

    # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)`
    anchors4_reshaped = Reshape((-1, 8), name='anchors4_reshape')(anchors4)
    anchors5_reshaped = Reshape((-1, 8), name='anchors5_reshape')(anchors5)
    anchors6_reshaped = Reshape((-1, 8), name='anchors6_reshape')(anchors6)

    # Concatenate the predictions from the different layers and the assosciated anchor box tensors
    # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions,
    # so we want to concatenate along axis 1
    # Output shape of `classes_concat`: (batch, n_boxes_total, n_classes)
    classes_concat = Concatenate(axis=1, name='classes_concat')(
        [classes4_reshaped, classes5_reshaped, classes6_reshaped])

    # Output shape of `boxes_concat`: (batch, n_boxes_total, 4)
    boxes_concat = Concatenate(axis=1, name='boxes_concat')(
        [boxes4_reshaped, boxes5_reshaped, boxes6_reshaped])

    # Output shape of `anchors_concat`: (batch, n_boxes_total, 8)
    anchors_concat = Concatenate(axis=1, name='anchors_concat')(
        [anchors4_reshaped, anchors5_reshaped, anchors6_reshaped])

    # The box coordinate predictions will go into the loss function just the way they are,
    # but for the class predictions, we'll apply a softmax activation layer first
    classes_softmax = Activation('softmax',
                                 name='classes_softmax')(classes_concat)

    # Concatenate the class and box coordinate predictions and the anchors to one large predictions tensor
    # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8)
    predictions = Concatenate(axis=2, name='predictions')(
        [classes_softmax, boxes_concat, anchors_concat])

    if mode == 'training':
        model = Model(inputs=x, outputs=predictions)
    elif mode == 'inference':
        decoded_predictions = DecodeDetections(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    elif mode == 'inference_fast':
        decoded_predictions = DecodeDetectionsFast(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    else:
        raise ValueError(
            "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'."
            .format(mode))

    if return_predictor_sizes:
        # The spatial dimensions are the same for the `classes` and `boxes` predictor layers.
        predictor_sizes = np.array([
            classes4._keras_shape[1:3], classes5._keras_shape[1:3],
            classes6._keras_shape[1:3]
        ])
        return model, predictor_sizes
    else:
        return model
embedder = Embedding(MAX_TOKENS + 1,
                     embedding_dim,
                     weights=[embeddings],
                     trainable=False)
doc_embedding = embedder(document)
l_embedding = embedder(left_context)
r_embedding = embedder(right_context)

# I use LSTM RNNs instead of vanilla RNNs as described in the paper.
forward = LSTM(hidden_dim_1,
               return_sequences=True)(l_embedding)  # See equation (1).
backward = LSTM(hidden_dim_1, return_sequences=True,
                go_backwards=True)(r_embedding)  # See equation (2).
# Keras returns the output sequences in reverse order.
backward = Lambda(lambda x: backend.reverse(x, axes=1))(backward)
together = concatenate([forward, doc_embedding, backward],
                       axis=2)  # See equation (3).

semantic = TimeDistributed(Dense(hidden_dim_2, activation="tanh"))(
    together)  # See equation (4).

# Keras provides its own max-pooling layers, but they cannot handle variable length input
# (as far as I can tell). As a result, I define my own max-pooling layer here.
pool_rnn = Lambda(lambda x: backend.max(x, axis=1),
                  output_shape=(hidden_dim_2, ))(semantic)  # See equation (5).

output = Dense(NUM_CLASSES, input_dim=hidden_dim_2,
               activation="softmax")(pool_rnn)  # See equations (6) and (7).

model = Model(inputs=[document, left_context, right_context], outputs=output)
Esempio n. 17
0
    def __init__(self, input_size, weights=None):
        input_image = Input(shape=(input_size, input_size, 3))

        # the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K)
        def space_to_depth_x2(x):
            return tf.space_to_depth(x, block_size=2)

        # Layer 1
        x = Conv2D(32, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_1',
                   use_bias=False)(input_image)
        x = BatchNormalization(name='norm_1')(x)
        x = LeakyReLU(alpha=0.1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)

        # Layer 2
        x = Conv2D(64, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_2',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_2')(x)
        x = LeakyReLU(alpha=0.1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)

        # Layer 3
        x = Conv2D(128, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_3',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_3')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 4
        x = Conv2D(64, (1, 1),
                   strides=(1, 1),
                   padding='same',
                   name='conv_4',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_4')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 5
        x = Conv2D(128, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_5',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_5')(x)
        x = LeakyReLU(alpha=0.1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)

        # Layer 6
        x = Conv2D(256, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_6',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_6')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 7
        x = Conv2D(128, (1, 1),
                   strides=(1, 1),
                   padding='same',
                   name='conv_7',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_7')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 8
        x = Conv2D(256, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_8',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_8')(x)
        x = LeakyReLU(alpha=0.1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)

        # Layer 9
        x = Conv2D(512, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_9',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_9')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 10
        x = Conv2D(256, (1, 1),
                   strides=(1, 1),
                   padding='same',
                   name='conv_10',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_10')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 11
        x = Conv2D(512, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_11',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_11')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 12
        x = Conv2D(256, (1, 1),
                   strides=(1, 1),
                   padding='same',
                   name='conv_12',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_12')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 13
        x = Conv2D(512, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_13',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_13')(x)
        x = LeakyReLU(alpha=0.1)(x)

        skip_connection = x

        x = MaxPooling2D(pool_size=(2, 2))(x)

        # Layer 14
        x = Conv2D(1024, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_14',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_14')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 15
        x = Conv2D(512, (1, 1),
                   strides=(1, 1),
                   padding='same',
                   name='conv_15',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_15')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 16
        x = Conv2D(1024, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_16',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_16')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 17
        x = Conv2D(512, (1, 1),
                   strides=(1, 1),
                   padding='same',
                   name='conv_17',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_17')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 18
        x = Conv2D(1024, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_18',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_18')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 19
        x = Conv2D(1024, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_19',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_19')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 20
        x = Conv2D(1024, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_20',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_20')(x)
        x = LeakyReLU(alpha=0.1)(x)

        # Layer 21
        skip_connection = Conv2D(64, (1, 1),
                                 strides=(1, 1),
                                 padding='same',
                                 name='conv_21',
                                 use_bias=False)(skip_connection)
        skip_connection = BatchNormalization(name='norm_21')(skip_connection)
        skip_connection = LeakyReLU(alpha=0.1)(skip_connection)
        skip_connection = Lambda(space_to_depth_x2)(skip_connection)

        x = concatenate([skip_connection, x])

        # Layer 22
        x = Conv2D(1024, (3, 3),
                   strides=(1, 1),
                   padding='same',
                   name='conv_22',
                   use_bias=False)(x)
        x = BatchNormalization(name='norm_22')(x)
        x = LeakyReLU(alpha=0.1)(x)

        self.feature_extractor = Model(input_image, x)

        if weights == 'imagenet':
            print(
                'Imagenet for YOLO backend are not available yet, defaulting to random weights'
            )
        elif weights == None:
            pass
        else:
            print('Loaded backend weigths: ' + weights)
            self.feature_extractor.load_weights(weights)
Esempio n. 18
0
def Inception_Inflated3d(include_top=True,
                         weights=None,
                         input_tensor=None,
                         input_shape=None,
                         dropout_prob=0.0,
                         endpoint_logit=True,
                         classes=400):
    """Instantiates the Inflated 3D Inception v1 architecture.

    Optionally loads weights pre-trained
    on Kinetics. Note that when using TensorFlow,
    for best performance you should set
    `image_data_format='channels_last'` in your Keras config
    at ~/.keras/keras.json.
    The model and the weights are compatible with both
    TensorFlow and Theano. The data format
    convention used by the model is the one
    specified in your Keras config file.
    Note that the default input frame(image) size for this model is 224x224.

    # Arguments
        include_top: whether to include the the classification 
            layer at the top of the network.
        weights: one of `None` (random initialization)
            or 'kinetics_only' (pre-training on Kinetics dataset only).
            or 'imagenet_and_kinetics' (pre-training on ImageNet and Kinetics datasets).
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(NUM_FRAMES, 224, 224, 3)` (with `channels_last` data format)
            or `(NUM_FRAMES, 3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 inputs channels.
            NUM_FRAMES should be no smaller than 8. The authors used 64
            frames per example for training and testing on kinetics dataset
            Also, Width and height should be no smaller than 32.
            E.g. `(64, 150, 150, 3)` would be one valid value.
        dropout_prob: optional, dropout probability applied in dropout layer
            after global average pooling layer. 
            0.0 means no dropout is applied, 1.0 means dropout is applied to all features.
            Note: Since Dropout is applied just before the classification
            layer, it is only useful when `include_top` is set to True.
        endpoint_logit: (boolean) optional. If True, the model's forward pass
            will end at producing logits. Otherwise, softmax is applied after producing
            the logits to produce the class probabilities prediction. Setting this parameter 
            to True is particularly useful when you want to combine results of rgb model
            and optical flow model.
            - `True` end model forward pass at logit output
            - `False` go further after logit to produce softmax predictions
            Note: This parameter is only useful when `include_top` is set to True.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """
    if not (weights in WEIGHTS_NAME or weights is None
            or os.path.exists(weights)):
        raise ValueError(
            'The `weights` argument should be either '
            '`None` (random initialization) or %s' % str(WEIGHTS_NAME) + ' '
            'or a valid path to a file containing `weights` values')

    if weights in WEIGHTS_NAME and include_top and classes != 400:
        raise ValueError(
            'If using `weights` as one of these %s, with `include_top`'
            ' as true, `classes` should be 400' % str(WEIGHTS_NAME))

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_frame_size=224,
                                      min_frame_size=32,
                                      default_num_frames=64,
                                      min_num_frames=8,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    if K.image_data_format() == 'channels_first':
        channel_axis = 1
    else:
        channel_axis = 4

    # Downsampling via convolution (spatial and temporal)
    x = conv3d_bn(img_input,
                  64,
                  7,
                  7,
                  7,
                  strides=(2, 2, 2),
                  padding='same',
                  name='Conv3d_1a_7x7')

    # Downsampling (spatial only)
    x = MaxPooling3D((1, 3, 3),
                     strides=(1, 2, 2),
                     padding='same',
                     name='MaxPool2d_2a_3x3')(x)
    x = conv3d_bn(x,
                  64,
                  1,
                  1,
                  1,
                  strides=(1, 1, 1),
                  padding='same',
                  name='Conv3d_2b_1x1')
    x = conv3d_bn(x,
                  192,
                  3,
                  3,
                  3,
                  strides=(1, 1, 1),
                  padding='same',
                  name='Conv3d_2c_3x3')

    # Downsampling (spatial only)
    x = MaxPooling3D((1, 3, 3),
                     strides=(1, 2, 2),
                     padding='same',
                     name='MaxPool2d_3a_3x3')(x)

    # Mixed 3b
    branch_0 = conv3d_bn(x,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3b_0a_1x1')

    branch_1 = conv3d_bn(x,
                         96,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3b_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         128,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_3b_1b_3x3')

    branch_2 = conv3d_bn(x,
                         16,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3b_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         32,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_3b_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_3b_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         32,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3b_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_3b')

    # Mixed 3c
    branch_0 = conv3d_bn(x,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3c_0a_1x1')

    branch_1 = conv3d_bn(x,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3c_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         192,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_3c_1b_3x3')

    branch_2 = conv3d_bn(x,
                         32,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3c_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         96,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_3c_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_3c_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3c_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_3c')

    # Downsampling (spatial and temporal)
    x = MaxPooling3D((3, 3, 3),
                     strides=(2, 2, 2),
                     padding='same',
                     name='MaxPool2d_4a_3x3')(x)

    # Mixed 4b
    branch_0 = conv3d_bn(x,
                         192,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4b_0a_1x1')

    branch_1 = conv3d_bn(x,
                         96,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4b_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         208,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4b_1b_3x3')

    branch_2 = conv3d_bn(x,
                         16,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4b_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         48,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4b_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_4b_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4b_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_4b')

    # Mixed 4c
    branch_0 = conv3d_bn(x,
                         160,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4c_0a_1x1')

    branch_1 = conv3d_bn(x,
                         112,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4c_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         224,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4c_1b_3x3')

    branch_2 = conv3d_bn(x,
                         24,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4c_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         64,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4c_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_4c_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4c_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_4c')

    # Mixed 4d
    branch_0 = conv3d_bn(x,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4d_0a_1x1')

    branch_1 = conv3d_bn(x,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4d_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         256,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4d_1b_3x3')

    branch_2 = conv3d_bn(x,
                         24,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4d_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         64,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4d_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_4d_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4d_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_4d')

    # Mixed 4e
    branch_0 = conv3d_bn(x,
                         112,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4e_0a_1x1')

    branch_1 = conv3d_bn(x,
                         144,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4e_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         288,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4e_1b_3x3')

    branch_2 = conv3d_bn(x,
                         32,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4e_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         64,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4e_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_4e_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4e_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_4e')

    # Mixed 4f
    branch_0 = conv3d_bn(x,
                         256,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4f_0a_1x1')

    branch_1 = conv3d_bn(x,
                         160,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4f_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         320,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4f_1b_3x3')

    branch_2 = conv3d_bn(x,
                         32,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4f_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         128,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4f_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_4f_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4f_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_4f')

    # Downsampling (spatial and temporal)
    x = MaxPooling3D((2, 2, 2),
                     strides=(2, 2, 2),
                     padding='same',
                     name='MaxPool2d_5a_2x2')(x)

    # Mixed 5b
    branch_0 = conv3d_bn(x,
                         256,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5b_0a_1x1')

    branch_1 = conv3d_bn(x,
                         160,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5b_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         320,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_5b_1b_3x3')

    branch_2 = conv3d_bn(x,
                         32,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5b_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         128,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_5b_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_5b_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5b_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_5b')

    # Mixed 5c
    branch_0 = conv3d_bn(x,
                         384,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5c_0a_1x1')

    branch_1 = conv3d_bn(x,
                         192,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5c_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         384,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_5c_1b_3x3')

    branch_2 = conv3d_bn(x,
                         48,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5c_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         128,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_5c_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_5c_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5c_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_5c')

    if include_top:
        # Classification block
        x = AveragePooling3D((2, 7, 7),
                             strides=(1, 1, 1),
                             padding='valid',
                             name='global_avg_pool')(x)
        x = Dropout(dropout_prob)(x)

        x = conv3d_bn(x,
                      classes,
                      1,
                      1,
                      1,
                      padding='same',
                      use_bias=True,
                      use_activation_fn=False,
                      use_bn=False,
                      name='Conv3d_6a_1x1')

        num_frames_remaining = int(x.shape[1])
        x = Reshape((num_frames_remaining, classes))(x)

        # logits (raw scores for each class)
        x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False),
                   output_shape=lambda s: (s[0], s[2]))(x)

        if not endpoint_logit:
            x = Activation('softmax', name='prediction')(x)
    else:
        h = int(x.shape[2])
        w = int(x.shape[3])
        x = AveragePooling3D((2, h, w),
                             strides=(1, 1, 1),
                             padding='valid',
                             name='global_avg_pool')(x)

    inputs = img_input
    # create model
    model = Model(inputs, x, name='i3d_inception')

    # load weights
    if weights in WEIGHTS_NAME:
        if weights == WEIGHTS_NAME[0]:  # rgb_kinetics_only
            if include_top:
                weights_url = WEIGHTS_PATH['rgb_kinetics_only']
                model_name = 'i3d_inception_rgb_kinetics_only.h5'
            else:
                weights_url = WEIGHTS_PATH_NO_TOP['rgb_kinetics_only']
                model_name = 'i3d_inception_rgb_kinetics_only_no_top.h5'

        elif weights == WEIGHTS_NAME[1]:  # flow_kinetics_only
            if include_top:
                weights_url = WEIGHTS_PATH['flow_kinetics_only']
                model_name = 'i3d_inception_flow_kinetics_only.h5'
            else:
                weights_url = WEIGHTS_PATH_NO_TOP['flow_kinetics_only']
                model_name = 'i3d_inception_flow_kinetics_only_no_top.h5'

        elif weights == WEIGHTS_NAME[2]:  # rgb_imagenet_and_kinetics
            if include_top:
                weights_url = WEIGHTS_PATH['rgb_imagenet_and_kinetics']
                model_name = 'i3d_inception_rgb_imagenet_and_kinetics.h5'
            else:
                weights_url = WEIGHTS_PATH_NO_TOP['rgb_imagenet_and_kinetics']
                model_name = 'i3d_inception_rgb_imagenet_and_kinetics_no_top.h5'

        elif weights == WEIGHTS_NAME[3]:  # flow_imagenet_and_kinetics
            if include_top:
                weights_url = WEIGHTS_PATH['flow_imagenet_and_kinetics']
                model_name = 'i3d_inception_flow_imagenet_and_kinetics.h5'
            else:
                weights_url = WEIGHTS_PATH_NO_TOP['flow_imagenet_and_kinetics']
                model_name = 'i3d_inception_flow_imagenet_and_kinetics_no_top.h5'

        downloaded_weights_path = get_file(model_name,
                                           weights_url,
                                           cache_subdir='models')
        model.load_weights(downloaded_weights_path)

        if K.backend() == 'theano':
            layer_utils.convert_all_kernels_in_model(model)

        if K.image_data_format() == 'channels_first' and K.backend(
        ) == 'tensorflow':
            warnings.warn('You are using the TensorFlow backend, yet you '
                          'are using the Theano '
                          'image data format convention '
                          '(`image_data_format="channels_first"`). '
                          'For best performance, set '
                          '`image_data_format="channels_last"` in '
                          'your keras config '
                          'at ~/.keras/keras.json.')

    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 19
0
h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)


def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),
                              mean=0.,
                              stddev=epsilon_std)
    return z_mean + K.exp(z_log_var / 2) * epsilon


# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim, ))([z_mean, z_log_var])

# we instantiate these layers separately so as to reuse them later
decoder_h = Dense(intermediate_dim,
                  activation='relu')  # Deepen decoder after this
decoder_mean = Dense(original_dim, activation='sigmoid')

h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)


# Custom loss layer
class CustomVariationalLayer(Layer):
    def __init__(self, **kwargs):
        self.is_placeholder = True
        super(CustomVariationalLayer, self).__init__(**kwargs)
# embeddingPrompt('char')
embedding_layer_c = Embedding(len(char.index) + 1,
                              CHAR_EMBEDDING_DIM,
                              weights=[char_embedding_matrix],
                              input_length=MAX_WORD_LENGTH,
                              trainable=trainable,
                              mask_zero=mask)

sequence_input_c = Input(shape=(
    padsize,
    MAX_WORD_LENGTH,
), dtype='int32')

embedded_sequences_c = embedding_layer_c(sequence_input_c)

rone = Lambda(reshape_one)(embedded_sequences_c)

merge_m = 'sum'  # raw_input('Enter merge mode for GRU Karakter: ')
merge_m_c = merge_m
dropout = 0.2  # input('Enter dropout for GRU: ')
rec_dropout = dropout  # input('Enter GRU Karakter recurrent dropout: ')
gru_karakter = Bidirectional(GRU(CHAR_EMBEDDING_DIM,
                                 return_sequences=False,
                                 dropout=dropout,
                                 recurrent_dropout=rec_dropout),
                             merge_mode=merge_m,
                             weights=None)(rone)

rtwo = Lambda(reshape_two)(gru_karakter)
"""
Combine word + char model
Esempio n. 21
0
def make_decoder_layer(all_anchors, num_classes, input_shape):
    # Lambda layer for postprocessing YOLOv3 output
    def decode(yolo_outputs):
        num_scales = len(yolo_outputs)
        anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                       ] if num_scales == 3 else [[3, 4, 5], [1, 2, 3]]

        b_min_max_list = []
        box_confidence_list = []
        class_probs_list = []

        for scale_idx in np.arange(3):
            anchors = all_anchors[anchor_mask[scale_idx]]
            output = yolo_outputs[scale_idx]
            num_anchors = len(anchors)

            batch_size = K.shape(output)[0]
            grid_shape = K.shape(output)[1:3]
            grid_height = grid_shape[0]  # height
            grid_width = grid_shape[1]  # width

            # reshape to tensor of dimensions batch_size, grid_height, grid_width, num_anchors, 5 + num_classes
            # the five box parameters are:
            #   t_x, t_y determine the center point of the box
            #   t_w, t_h determine the width and height of the box
            #   the box confidence indicates the confidence that box contains an object and box is accurate
            output = K.reshape(
                output,
                [-1, grid_height, grid_width, num_anchors, 5 + num_classes])

            # compute b_x, b_y for each cell and each anchor
            c_x = K.tile(K.reshape(K.arange(grid_width), [1, -1, 1, 1]),
                         [grid_height, 1, num_anchors, 1])
            c_y = K.tile(K.reshape(K.arange(grid_height), [-1, 1, 1, 1]),
                         [1, grid_width, num_anchors, 1])
            c_xy = K.concatenate([c_x, c_y])
            c_xy = K.cast(c_xy, K.dtype(output))
            b_xy = (K.sigmoid(output[..., :2]) + c_xy) / K.cast(
                grid_shape[::-1], K.dtype(output))

            # compute b_w and b_h for each cell and each anchor
            p_wh = K.tile(
                K.reshape(K.constant(anchors), [1, 1, num_anchors, 2]),
                [grid_height, grid_width, 1, 1])
            b_wh = p_wh * K.exp(output[..., 2:4]) / K.cast(
                input_shape[::-1], K.dtype(output))

            b_min_max = K.reshape(convert_box_params(
                b_xy, b_wh), [batch_size, -1, 4])  # y_min, x_min, y_max, x_max

            # compute box confidence for each cell and each anchor
            box_confidence = K.reshape(K.sigmoid(output[..., 4]),
                                       [batch_size, -1])

            # compute class probabilities for each cell and each anchor
            class_probs = K.reshape(K.sigmoid(output[..., 5:]),
                                    [batch_size, -1, num_classes])

            b_min_max_list.append(b_min_max)
            box_confidence_list.append(box_confidence)
            class_probs_list.append(class_probs)

        return [
            K.concatenate(b_min_max_list, axis=1),
            K.concatenate(box_confidence_list, axis=1),
            K.concatenate(class_probs_list, axis=1)
        ]

    return Lambda(decode)
        _train_data, _test_data = read.split(feature_data, test_id)
        _train_data = read.remove_class(_train_data, [a_label])
        _train_data = create_train_instances(_train_data)

        _support_data, _test_data = read.support_set_split(
            _test_data, samples_per_class)
        _support_data, _support_labels = read.flatten(_support_data)
        _support_data = np.array(_support_data)

        numsupportset = samples_per_class * classes_per_set
        input1 = Input((numsupportset + 1, feature_length))
        modelinputs = []
        base_network = mlp_embedding()
        for lidx in range(numsupportset):
            modelinputs.append(
                base_network(Lambda(lambda x: x[:, lidx, :])(input1)))
        targetembedding = base_network(Lambda(lambda x: x[:, -1, :])(input1))
        modelinputs.append(targetembedding)
        supportlabels = Input((numsupportset, classes_per_set))
        modelinputs.append(supportlabels)
        knnsimilarity = MatchCosine(nway=classes_per_set,
                                    n_samp=samples_per_class)(modelinputs)

        model = Model(inputs=[input1, supportlabels], outputs=knnsimilarity)
        model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        model.fit([_train_data[0], _train_data[1]],
                  _train_data[2],
                  epochs=epochs,
                  batch_size=batch_size,
Esempio n. 23
0
import math

# Use pre-trained VGG16 model as a start point.
input_height = 65
input_width = 320
vgg = VGG16(include_top=False,
            weights='imagenet',
            input_shape=(input_height, input_width, 3))

# freeze all layers weight
for layer in vgg.layers:
    layer.trainable = False
#vgg.summary()

input_shape = Input(shape=(160, 320, 3))
normalize = Lambda(lambda x: x / 255.0 - 0.5)(input_shape)  # Normalize inputs
crop_input = Cropping2D(cropping=((70, 25), (0, 0)))(normalize)
vgg16 = vgg(crop_input)
flatten = Flatten()(vgg16)
fc1 = Dense(2048, activation='relu')(flatten)
d1 = Dropout(0.5)(fc1)  # dropout regularization
fc2 = Dense(2048, activation='relu')(d1)
d2 = Dropout(0.5)(fc2)  # dropout regularization
prediction = Dense(1)(d2)

model = Model(inputs=input_shape, outputs=prediction)
model.compile(optimizer='Adam', loss='mse')
# model.summary()
model.fit_generator(train_generator,
                    steps_per_epoch=math.ceil(len(train_data) / 32),
                    epochs=5,
def train_lstm(x1_train, x1_test, f1_train, f1_test, x2_train, x2_test,
               f2_train, f2_test, y_train, y_test):
    print('Defining a Simple Keras Model...')
    input_shape = x1_train.shape[1:]
    input_shape2 = f1_train.shape[1:]
    base_network = create_base_network(input_shape, input_shape2)
    Mydot = Lambda(lambda x: K.batch_dot(x[0], x[1]))

    #Q-part--得 hidden层的平均
    input_con = Input(shape=input_shape)
    input_f1 = Input(shape=input_shape2)
    hid_con = base_network([input_con, input_f1])

    ave_con = GlobalAveragePooling1D()(hid_con)
    print('ave_con:', np.shape(ave_con))
    ave_con = Reshape((100, 1))(ave_con)
    print('avg_con_reshape:', np.shape(ave_con))

    #A-part--得 hidden层输出
    input_tag = Input(shape=input_shape)
    input_f2 = Input(shape=input_shape2)
    hid_tag = base_network([input_tag, input_f2])

    #A-part Attention1
    tag_at = MyLayer_one()([hid_tag, ave_con])
    print('*****************')
    print(tag_at)
    tag_at = Flatten()(tag_at)
    print(tag_at)
    tag_at = Activation('softmax')(tag_at)
    print(tag_at)
    tag_at = RepeatVector(1)(tag_at)
    print(tag_at)
    print(hid_tag)
    att_tag_mul = Mydot([tag_at, hid_tag])
    print('att_tag_mul:', np.shape(att_tag_mul))

    at_done = Flatten()(att_tag_mul)
    print('out:', np.shape(at_done))
    output = Dropout(0.2)(at_done)
    output = Dense(3)(output)
    output = Activation('softmax')(output)
    model = Model(inputs=[input_con, input_f1, input_tag, input_f2],
                  outputs=output)
    print('Compiling the Model...')
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    print("Train...")
    earlyStopping = callbacks.EarlyStopping(monitor='val_loss',
                                            patience=5,
                                            verbose=1,
                                            mode='auto')
    saveBestModel = callbacks.ModelCheckpoint('lstm_model/part_1_relation.h5',
                                              monitor='val_loss',
                                              verbose=1,
                                              save_best_only=True,
                                              mode='auto')
    model.fit([x1_train, f1_train, x2_train, f2_train],
              y_train,
              batch_size=batch_size,
              epochs=n_epoch,
              verbose=1,
              validation_data=([x1_test, f1_test, x2_test, f2_test], y_test),
              callbacks=[earlyStopping, saveBestModel])
def create_model(anchors,
                 class_names,
                 is_use_center_loss,
                 load_pretrained=True,
                 freeze_body=True):
    '''
    returns the body of the model and the model
    # Params:
    load_pretrained: whether or not to load the pretrained model or initialize all weights
    freeze_body: whether or not to freeze all weights except for the last layer's

    # Returns:
    model_body: YOLOv2 with new output layer
    model: YOLOv2 with custom loss Lambda layer
    '''

    # detectors_mask_shape = (13, 13, 5, 1)
    # matching_boxes_shape = (13, 13, 5, 5)
    detectors_mask_shape = (5, 5, 5, 1)
    matching_boxes_shape = (5, 5, 5, 5)

    # Create model input layers.
    image_input = Input(shape=(160, 160, 3))
    boxes_input = Input(shape=(None, 5))
    detectors_mask_input = Input(shape=detectors_mask_shape)
    matching_boxes_input = Input(shape=matching_boxes_shape)

    # Create model body.
    yolo_model = yolo_body(image_input, len(anchors), len(class_names))
    topless_yolo = Model(yolo_model.input, yolo_model.layers[-2].output)

    if load_pretrained:
        # Save topless yolo:
        topless_yolo_path = os.path.join('model_data', 'pretrained_best.h5')
        if not os.path.exists(topless_yolo_path):
            print("CREATING TOPLESS WEIGHTS FILE")
            yolo_path = os.path.join('model_data', 'yolo.h5')
            model_body = load_model(yolo_path)
            model_body = Model(model_body.inputs, model_body.layers[-2].output)
            model_body.save_weights(topless_yolo_path)
        # topless_yolo.load_weights(topless_yolo_path)

    if freeze_body:
        for layer in topless_yolo.layers:
            layer.trainable = False
    final_layer = Conv2D(len(anchors) * (5 + len(class_names)), (1, 1),
                         activation='linear')(topless_yolo.output)

    model_body = Model(image_input, final_layer)
    model_loss = None

    # Place model loss on CPU to reduce GPU memory usage.
    with tf.device('/cpu:0'):
        # TODO: Replace Lambda with custom Keras layer for loss.
        if not is_use_center_loss:
            model_loss = Lambda(
                yolo_loss,
                output_shape=(1,),
                name='yolo_loss',
                arguments={'anchors': anchors, 'num_classes': len(class_names)}) \
                ([model_body.output, boxes_input, detectors_mask_input, matching_boxes_input])

        else:
            model_center_loss = Lambda(
                yolo_center_loss,
                output_shape=(1,),
                name='yolo_loss',
                arguments={'anchors': anchors, 'num_classes': len(class_names), 'ratio':0.1, 'alpha':0.5}) \
                ([model_body.output, boxes_input, detectors_mask_input, matching_boxes_input])

    model = Model([
        model_body.input, boxes_input, detectors_mask_input,
        matching_boxes_input
    ], model_loss)
    model.load_weights(os.path.join('model_data', 'pretrained_best.h5'))

    return model_body, model
Esempio n. 26
0
def createPreProcessingLayers():
    #Creates a model with the initial pre-processing layers.
    model = Sequential()
    model.add(Lambda(lambda x: (x / 255.0) - 0.5, input_shape=(160,320,3)))
    model.add(Cropping2D(cropping=((50,20), (0,0))))
    return model
Esempio n. 27
0
train_lines = label_lines[:int(len(label_lines) * config.validation_split)]
valid_lines = label_lines[int(len(label_lines) * config.validation_split):]

model_yolo = yolo.DarkNet()(n_class=num_classes, n_anchor=num_anchors)
model_yolo.summary()

h, w = config.image_input_shape
y_true = [
    Input(shape=(h // config.scale_size[l], w // config.scale_size[l],
                 num_anchors // 3, num_classes + 5)) for l in range(3)
]
model_loss = Lambda(yolo.yolo_loss,
                    output_shape=(1, ),
                    name='yolo_loss',
                    arguments={
                        'anchors': anchors,
                        'num_classes': num_classes
                    })([*model_yolo.output, *y_true])

model = Model([model_yolo.input, *y_true], model_loss)
model.compile(optimizer=Adam(1e-3),
              loss={
                  'yolo_loss': lambda y_true, y_pred: y_pred
              })
model.fit_generator(
    generator=data_generator(label_lines=train_lines,
                             batch_size=config.batch_size,
                             input_shape=config.image_input_shape,
                             anchors=anchors,
                             num_classes=num_classes),
    m = tf.add(y1, y2)
    x = tf.divide(n, m)
    return x


embedding_size = 128

# inputs = tf.placeholder(tf.float32, [None, 96, 96, 3], name='input')
inputs = Input((img_size[0], img_size[1], 3), name='group_input')

base_module = InceptionResNetV2(weights=None,
                                input_tensor=inputs,
                                classes=embedding_size)
custom_input = base_module.output
# 图片总数量 = batch_size * 2
x = Lambda(lambda x: tf.reshape(x, [batch_size, 2, embedding_size]),
           name='prediction_reshape')(custom_input)
# 矩阵相似计算:x^2相似
x = Lambda(similarity, input_shape=[2, embedding_size], name='similarity')(x)
# # 线性回归
x = Dense(1, activation='sigmoid', name='final_predict')(x)
model = Model(base_module.input, x)

model.load_weights('face_model_epoch_19.h5')


def get_img_data(img_path):
    img = image.load_img(img_path, target_size=[96, 96])
    img = image.img_to_array(img) / 255.
    return img

Esempio n. 29
0
            yield sklearn.utils.shuffle(X_train, y_train)

# compile and train the model using the generator function
train_generator = generator(train_samples, batch_size=16)
validation_generator = generator(validation_samples, batch_size=16)

ch, row, col = 3, 160, 320  # Trimmed image format

from keras.models import Sequential
from keras.layers import Dense, Flatten, Lambda, Cropping2D, Activation, Dropout, Convolution2D
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D

model = Sequential()
model.add(Lambda(lambda x: x / 255.0 - 0.5,
          input_shape=(row, col, ch),
          output_shape=(row, col, ch)))
model.add(Cropping2D(cropping=((70, 26), (0, 0))))
model.add(Convolution2D(6, (5, 5), border_mode='same', activation='relu'))
model.add(MaxPooling2D(4, 4))
model.add(Convolution2D(16, (5, 5), border_mode='same', activation='relu'))
model.add(MaxPooling2D(2, 4))
model.add(Convolution2D(33, (3, 3), border_mode='same', activation='relu'))
model.add(MaxPooling2D(1, 2))
model.add(Flatten())
model.add(Dense(190))
model.add(Dropout(0.75))
model.add(Activation('relu'))
model.add(Dense(84))
model.add(Activation('relu'))
model.add(Dense(1))
Esempio n. 30
0
    def create_model(self):
        self._set_model_params()
        act = 'relu'
        input_data = Input(name='the_input',
                           shape=self.input_shape,
                           dtype='float32')
        inner = Convolution2D(self.conv_num_filters,
                              self.filter_size,
                              self.filter_size,
                              border_mode='same',
                              activation=act,
                              name='conv1')(input_data)

        inner = MaxPooling2D(pool_size=(self.pool_size_1, self.pool_size_1),
                             name='max1')(inner)
        inner = Convolution2D(self.conv_num_filters,
                              self.filter_size,
                              self.filter_size,
                              border_mode='same',
                              activation=act,
                              name='conv2')(inner)
        inner = MaxPooling2D(pool_size=(self.pool_size_2, self.pool_size_2),
                             name='max2')(inner)
        conv_to_rnn_dims = (int(
            (self.img_h /
             (self.pool_size_1 * self.pool_size_2)) * self.conv_num_filters),
                            int(self.img_w /
                                (self.pool_size_1 * self.pool_size_2)))
        inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
        inner = Permute(dims=(2, 1), name='permute')(inner)

        # cuts down input size going into RNN:
        inner = TimeDistributed(
            Dense(self.time_dense_size, activation=act, name='dense1'))(inner)

        # Two layers of bidirecitonal GRUs
        # GRU seems to work as well, if not better than LSTM:
        gru_1 = GRU(self.rnn_size, return_sequences=True, name='gru1')(inner)
        gru_1b = GRU(self.rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     name='gru1_b')(inner)
        gru1_merged = merge([gru_1, gru_1b], mode='sum')
        gru_2 = GRU(self.rnn_size, return_sequences=True,
                    name='gru2')(gru1_merged)
        gru_2b = GRU(self.rnn_size, return_sequences=True,
                     go_backwards=True)(gru1_merged)

        # transforms RNN output to character activations:
        inner = TimeDistributed(Dense(self.output_size,
                                      name='dense2'))(merge([gru_2, gru_2b],
                                                            mode='concat'))

        y_pred = Activation('softmax', name='softmax')(inner)
        # Model(input=[input_data], output=y_pred).summary()
        labels = Input(name='the_labels',
                       shape=[self.absolute_max_string_len],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer
        loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name="ctc")(
            [y_pred, labels, input_length, label_length])
        lr = 0.03
        # clipnorm seems to speeds up convergence
        clipnorm = 5
        sgd = SGD(lr=lr,
                  decay=3e-7,
                  momentum=0.9,
                  nesterov=True,
                  clipnorm=clipnorm)
        model = Model(input=[input_data, labels, input_length, label_length],
                      output=[loss_out])
        # model.summary()
        # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
        if self.weight_file is not None:
            model.load_weights(self.weight_file)

        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=sgd)
        self.model = model

        self._predictor = K.function([input_data], [y_pred])

        return model
#get directory of input images and create array of images and store images in the directory to the array
test_dir = "C:/Users/panka/OneDrive/Desktop/Aditya/image data 2018-19/Test_Resized"
#get labels pickle and convert to dataframe then sort by the filename to go along with the images
test_labels_file = "C:/Users/panka/OneDrive/Desktop/Aditya/image data 2018-19/Testing_Input_Resized.pkl"

test_labels = pd.read_pickle(test_labels_file)

test_datagen = ImageDataGenerator(rescale=1./255,preprocessing_function=image_transform)
test_generator = test_datagen.flow_from_dataframe(dataframe=test_labels,directory=test_dir,target_size=(108,192),x_col='Filename',y_col=['Right Ankle x','Right Knee x','Right Hip x','Left Hip x','Left Knee x','Left Ankle x','Pelvis x','Thorax x','Upper Neck x','Head Top x','Right Wrist x','Right Elbow x','Right Shoulder x','Left Shoulder x','Left Elbow x','Left Wrist x','Right Ankle y','Right Knee y','Right Hip y','Left Hip y','Left Knee y','Left Ankle y','Pelvis y','Thorax y','Upper Neck y','Head Top y','Right Wrist y','Right Elbow y','Right Shoulder y','Left Shoulder y','Left Elbow y','Left Wrist y'],class_mode='other',batch_size=8)

#create model
model = Sequential()

#add model layers
model.add(Conv2D(1, kernel_size=1, input_shape=(108,192,3), activation='relu'))
model.add(Lambda(image_transform))
model.add(Conv2D(64, kernel_size=3, activation='relu'))
model.add(Conv2D(64, kernel_size=3, activation='relu'))
model.add(Conv2D(64, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(128, kernel_size=3, activation='relu'))
model.add(Conv2D(128, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(256, kernel_size=3, activation='relu'))
model.add(Conv2D(256, kernel_size=3, activation='relu'))
model.add(Conv2D(256, kernel_size=3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(32, activation='relu'))

#compile model using accuracy to measure model performance
Esempio n. 32
0
    margin = 1.
    return K.mean((1. - y_true) * K.square(y_pred) +
                  y_true * K.square(K.maximum(margin - y_pred, 0.)))


inp = Input((224, 224, 3))

mobile_model = MobileNetV2(include_top=False,
                           input_shape=(224, 224, 3),
                           input_tensor=inp,
                           pooling='avg')
x = Dense(512, activation='relu')(mobile_model.output)
x = Dropout(0.3)(x)
x = Dense(128)(x)
x = Lambda(lambda x: K.l2_normalize(x, axis=1))(x)

model_top = Model(inputs=inp, outputs=x)
#model_top.summary()

inp_1 = Input((224, 224, 3))
inp_2 = Input((224, 224, 3))

out_1 = model_top(inp_1)
out_2 = model_top(inp_2)

merge_layer = Lambda(euclidean_dist)([out_1, out_2])

model = Model(inputs=[inp_1, inp_2], outputs=merge_layer)
#model.summary()
    def CreateModel(self):
        '''
        定义CNN/LSTM/CTC模型,使用函数式模型
        输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s)
        隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2
        隐藏层:全连接层
        输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数,
        CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出

        '''

        input_data = Input(name='the_input',
                           shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH,
                                  1))

        layer_h1 = Conv2D(32, (3, 3),
                          use_bias=False,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(input_data)  # 卷积层
        layer_h1 = Dropout(0.05)(layer_h1)
        layer_h2 = Conv2D(32, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h1)  # 卷积层
        layer_h3 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h2)  # 池化层
        # layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合
        layer_h3 = Dropout(0.05)(layer_h3)
        layer_h4 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h3)  # 卷积层
        layer_h4 = Dropout(0.1)(layer_h4)
        layer_h5 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h4)  # 卷积层
        layer_h6 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h5)  # 池化层

        layer_h6 = Dropout(0.1)(layer_h6)
        layer_h7 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h6)  # 卷积层
        layer_h7 = Dropout(0.15)(layer_h7)
        layer_h8 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h7)  # 卷积层
        layer_h9 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h8)  # 池化层

        layer_h9 = Dropout(0.15)(layer_h9)
        layer_h10 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h9)  # 卷积层
        layer_h10 = Dropout(0.2)(layer_h10)
        layer_h11 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h10)  # 卷积层
        layer_h12 = MaxPooling2D(pool_size=1, strides=None,
                                 padding="valid")(layer_h11)  # 池化层

        layer_h12 = Dropout(0.2)(layer_h12)
        layer_h13 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h12)  # 卷积层
        layer_h13 = Dropout(0.2)(layer_h13)
        layer_h14 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h13)  # 卷积层
        layer_h15 = MaxPooling2D(pool_size=1, strides=None,
                                 padding="valid")(layer_h14)  # 池化层

        # test=Model(inputs = input_data, outputs = layer_h12)
        # test.summary()

        layer_h16 = Reshape((200, 3200))(layer_h15)  # Reshape层
        # layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层
        # layer_h6 = Dropout(0.2)(layer_h5) # 随机中断部分神经网络连接,防止过拟合
        layer_h16 = Dropout(0.3)(layer_h16)
        layer_h17 = Dense(128,
                          activation="relu",
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h16)  # 全连接层
        layer_h17 = Dropout(0.3)(layer_h17)
        layer_h18 = Dense(self.MS_OUTPUT_SIZE,
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h17)  # 全连接层

        y_pred = Activation('softmax', name='Activation0')(layer_h18)
        model_data = Model(inputs=input_data, outputs=y_pred)
        # model_data.summary()

        labels = Input(name='the_labels',
                       shape=[self.label_max_string_length],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer

        # layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC
        loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ),
                          name='ctc')(
                              [y_pred, labels, input_length, label_length])

        model = Model(inputs=[input_data, labels, input_length, label_length],
                      outputs=loss_out)

        model.summary()

        # clipnorm seems to speeds up convergence
        # sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
        # opt = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06)
        opt = Adam(lr=0.001,
                   beta_1=0.9,
                   beta_2=0.999,
                   decay=0.0,
                   epsilon=10e-8)
        # model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=opt)

        # captures output of softmax so we can decode the output during visualization
        test_func = K.function([input_data], [y_pred])

        # print('[*提示] 创建模型成功,模型编译成功')
        print('[*Info] Create Model Successful, Compiles Model Successful. ')
        return model, model_data
    def build_nn_model(
        self,
        element_dim=103,
        conv_window=3,
        conv_filters=64,
        rnn_dim=64,
        recipe_latent_dim=8,
        intermediate_dim=64,
        latent_dim=8,
        max_material_length=10,
        charset_size=50,
    ):

        self.latent_dim = latent_dim
        self.recipe_latent_dim = recipe_latent_dim
        self.original_dim = max_material_length * charset_size

        x_mat = Input(shape=(max_material_length, charset_size),
                      name="material_in")
        conv_x1 = Conv1D(conv_filters,
                         conv_window,
                         padding="valid",
                         activation="relu",
                         name='conv_enc_1')(x_mat)
        conv_x2 = Conv1D(conv_filters,
                         conv_window,
                         padding="valid",
                         activation="relu",
                         name='conv_enc_2')(conv_x1)
        conv_x3 = Conv1D(conv_filters,
                         conv_window,
                         padding="valid",
                         activation="relu",
                         name='conv_enc_3')(conv_x2)
        h_flatten = Flatten()(conv_x3)
        h = Dense(intermediate_dim, activation="relu",
                  name="hidden_enc")(h_flatten)

        z_mean_func = Dense(latent_dim, name="means_enc")
        z_log_var_func = Dense(latent_dim, name="vars_enc")

        z_mean = z_mean_func(h)
        z_log_var = z_log_var_func(h)

        def sample(args):
            z_mean, z_log_var = args
            epsilon = K.random_normal(shape=(latent_dim, ),
                                      mean=0.0,
                                      stddev=1.0)
            return z_mean + K.exp(z_log_var / 2) * epsilon

        z = Lambda(sample, name="lambda_sample")([z_mean, z_log_var])
        c_element = Input(shape=(element_dim, ), name="cond_element_in")
        c_latent_recipe = Input(shape=(recipe_latent_dim, ),
                                name="cond_latent_recipe_in")

        z_conditional = Concatenate(name="concat_cond")(
            [z, c_latent_recipe, c_element])

        decoder_h = Dense(intermediate_dim,
                          activation="relu",
                          name="hidden_dec")
        decoder_h_repeat = RepeatVector(max_material_length, name="h_rep_dec")
        decoder_h_gru_1 = GRU(rnn_dim,
                              return_sequences=True,
                              name="recurrent_dec_1")
        decoder_h_gru_2 = GRU(rnn_dim,
                              return_sequences=True,
                              name="recurrent_dec_2")
        decoder_h_gru_3 = GRU(rnn_dim,
                              return_sequences=True,
                              name="recurrent_dec_3")
        decoder_mat = TimeDistributed(Dense(charset_size,
                                            activation='softmax'),
                                      name="means_material_dec")

        h_decoded = decoder_h(z_conditional)
        h_decode_repeat = decoder_h_repeat(h_decoded)
        gru_h_decode_1 = decoder_h_gru_1(h_decode_repeat)
        gru_h_decode_2 = decoder_h_gru_2(gru_h_decode_1)
        gru_h_decode_3 = decoder_h_gru_3(gru_h_decode_2)
        x_decoded_mat = decoder_mat(gru_h_decode_3)

        def vae_xent_loss(x, x_decoded_mean):
            x = K.flatten(x)
            x_decoded_mean = K.flatten(x_decoded_mean)
            rec_loss = self.original_dim * metrics.binary_crossentropy(
                x, x_decoded_mean)
            kl_loss = -0.5 * K.mean(
                1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
            return rec_loss + kl_loss

        encoder = Model(inputs=[x_mat], outputs=[z_mean])

        decoder_x_input = Input(shape=(latent_dim, ))

        decoder_inputs = Concatenate(name="concat_cond_dec")(
            [decoder_x_input, c_latent_recipe, c_element])
        _h_decoded = decoder_h(decoder_inputs)
        _h_decode_repeat = decoder_h_repeat(_h_decoded)
        _gru_h_decode_1 = decoder_h_gru_1(_h_decode_repeat)
        _gru_h_decode_2 = decoder_h_gru_2(_gru_h_decode_1)
        _gru_h_decode_3 = decoder_h_gru_3(_gru_h_decode_2)
        _x_decoded_mat = decoder_mat(_gru_h_decode_3)

        decoder = Model(inputs=[decoder_x_input, c_latent_recipe, c_element],
                        outputs=[_x_decoded_mat])

        vae = Model(inputs=[x_mat, c_latent_recipe, c_element],
                    outputs=[x_decoded_mat])

        vae.compile(optimizer=Adam(lr=0.001,
                                   beta_1=0.9,
                                   beta_2=0.999,
                                   epsilon=None,
                                   decay=0.0,
                                   amsgrad=True),
                    loss=vae_xent_loss,
                    metrics=['categorical_accuracy'])

        self.vae = vae
        self.encoder = encoder
        self.decoder = decoder
Esempio n. 35
0
vgg16 = VGG16(weights='imagenet', include_top=False)

x = vgg16.output
x = Flatten(name='flatten')(x)
x = Dense(4096, activation = 'relu', name='fc1')(x)
x = Dropout(0.2,name='drop1')(x)
x = Dense(4096, activation = 'relu', name='fc2')(x)
feature = Dropout(0.2,name='drop2')(x)
base_model = Model(vgg16.input,feature)
cls_out = Dense(751,activation='softmax', name='softmax')(base_model.output)
cls_model = Model(vgg16.input,cls_out)
input1 = Input(shape=input_shape)
input2 = Input(shape=input_shape)
fea1,fea2 = base_model(input1), base_model(input2)
cls1,cls2 = cls_model(input1), cls_model(input2)
distance = Lambda(euclidean_distance,
                  output_shape=eucl_dist_output_shape)([fea1, fea2])

model = Model(inputs = [input1, input2], outputs = [distance,cls1,cls2])

# train
rms = RMSprop()
model.compile(loss=[contrastive_loss,'categorical_crossentropy','categorical_crossentropy'],
              optimizer=rms,
              loss_weights=[1.,0.5,0.5])

model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], [tr_y,tr_c1,tr_c2],
          batch_size=128,
          epochs=epochs)

Esempio n. 36
0
def build_model(image_size,
                n_classes,
                mode='training',
                l2_regularization=0.0,
                min_scale=0.1,
                max_scale=0.9,
                scales=None,
                aspect_ratios_global=[0.5, 1.0, 2.0],
                aspect_ratios_per_layer=None,
                two_boxes_for_ar1=True,
                steps=None,
                offsets=None,
                limit_boxes=False,
                variances=[1.0, 1.0, 1.0, 1.0],
                coords='centroids',
                normalize_coords=False,
                subtract_mean=None,
                divide_by_stddev=None,
                swap_channels=False,
                confidence_thresh=0.01,
                iou_threshold=0.45,
                top_k=200,
                nms_max_output_size=400,
                return_predictor_sizes=False):
    '''
    Build a Keras model with SSD architecture, see references.

    The model consists of convolutional feature layers and a number of convolutional
    predictor layers that take their input from different feature layers.
    The model is fully convolutional.

    The implementation found here is a smaller version of the original architecture
    used in the paper (where the base network consists of a modified VGG-16 extended
    by a few convolutional feature layers), but of course it could easily be changed to
    an arbitrarily large SSD architecture by following the general design pattern used here.
    This implementation has 7 convolutional layers and 4 convolutional predictor
    layers that take their input from layers 4, 5, 6, and 7, respectively.

    Most of the arguments that this function takes are only needed for the anchor
    box layers. In case you're training the network, the parameters passed here must
    be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading
    trained weights, the parameters passed here must be the same as the ones used
    to produce the trained weights.

    Some of these arguments are explained in more detail in the documentation of the
    `SSDBoxEncoder` class.

    Note: Requires Keras v2.0 or later. Training currently works only with the
    TensorFlow backend (v1.0 or later).

    Arguments:
        image_size (tuple): The input image size in the format `(height, width, channels)`.
        n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO.
        mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode,
            the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes,
            the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding,
            non-maximum suppression, and top-k filtering. The difference between latter two modes is that
            'inference' follows the exact procedure of the original Caffe implementation, while
            'inference_fast' uses a faster prediction decoding procedure.
        l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers.
        min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images.
        max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images. All scaling factors between the smallest and the
            largest will be linearly interpolated. Note that the second to last of the linearly interpolated
            scaling factors will actually be the scaling factor for the last predictor layer, while the last
            scaling factor is used for the second box for aspect ratio 1 in the last predictor layer
            if `two_boxes_for_ar1` is `True`.
        scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer.
            This list must be one element longer than the number of predictor layers. The first `k` elements are the
            scaling factors for the `k` predictor layers, while the last element is used for the second box
            for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional
            last scaling factor must be passed either way, even if it is not being used.
            Defaults to `None`. If a list is passed, this argument overrides `min_scale` and
            `max_scale`. All scaling factors must be greater than zero.
        aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be
            generated. This list is valid for all predictor layers. The original implementation uses more aspect ratios
            for some predictor layers and fewer for others. If you want to do that, too, then use the next argument instead.
            Defaults to `[0.5, 1.0, 2.0]`.
        aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each predictor layer.
            This allows you to set the aspect ratios for each predictor layer individually. If a list is passed,
            it overrides `aspect_ratios_global`.
        two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise.
            If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated
            using the scaling factor for the respective layer, the second one will be generated using
            geometric mean of said scaling factor and next bigger scaling factor. Defaults to `True`, following the original
            implementation.
        steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many
            pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over
            the image. If the list contains ints/floats, then that value will be used for both spatial dimensions.
            If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`.
            If no steps are provided, then they will be computed such that the anchor box center points will form an
            equidistant grid within the image dimensions.
        offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either floats or tuples of two floats. These numbers represent for each predictor layer how many
            pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be
            as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions
            of the step size specified in the `steps` argument. If the list contains floats, then that value will
            be used for both spatial dimensions. If the list contains tuples of two floats, then they represent
            `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size,
            which is also the recommended setting.
        limit_boxes (bool, optional): If `True`, limits box coordinates to stay within image boundaries.
            This would normally be set to `True`, but here it defaults to `False`, following the original
            implementation.
        variances (list, optional): A list of 4 floats >0 with scaling factors (actually it's not factors but divisors
            to be precise) for the encoded predicted box coordinates. A variance value of 1.0 would apply
            no scaling at all to the predictions, while values in (0,1) upscale the encoded predictions and values greater
            than 1.0 downscale the encoded predictions. If you want to reproduce the configuration of the original SSD,
            set this to `[0.1, 0.1, 0.2, 0.2]`, provided the coordinate format is 'centroids'.
        coords (str, optional): The box coordinate format to be used. Can be either 'centroids' for the format
            `(cx, cy, w, h)` (box center coordinates, width, and height) or 'minmax' for the format
            `(xmin, xmax, ymin, ymax)`.
        normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates,
            i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates.
        subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values
            of any shape that is broadcast-compatible with the image shape. The elements of this array will be
            subtracted from the image pixel intensity values. For example, pass a list of three integers
            to perform per-channel mean normalization for color images.
        divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or
            floating point values of any shape that is broadcast-compatible with the image shape. The image pixel
            intensity values will be divided by the elements of this array. For example, pass a list
            of three integers to perform per-channel standard deviation normalization for color images.
        swap_channels (bool, optional): If `True`, the color channel order of the input images will be reversed,
            i.e. if the input color channel order is RGB, the color channels will be swapped to BGR.
        confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
            positive class in order to be considered for the non-maximum suppression stage for the respective class.
            A lower value will result in a larger part of the selection process being done by the non-maximum suppression
            stage, while a larger value will result in a larger part of the selection process happening in the confidence
            thresholding stage.
        iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold`
            with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
            to the box's confidence score.
        top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
            non-maximum suppression stage.
        nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage.
        return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also
            a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since
            you can always get their sizes easily via the Keras API, but it's convenient and less error-prone
            to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the
            spatial dimensions of the predictor layers), for inference you don't need them.

    Returns:
        model: The Keras SSD model.
        predictor_sizes (optional): A Numpy array containing the `(height, width)` portion
            of the output tensor shape for each convolutional predictor layer. During
            training, the generator function needs this in order to transform
            the ground truth labels into tensors of identical structure as the
            output tensors of the model, which is in turn needed for the cost
            function.

    References:
        https://arxiv.org/abs/1512.02325v5
    '''

    n_predictor_layers = 4 # The number of predictor conv layers in the network
    n_classes += 1 # Account for the background class.
    l2_reg = l2_regularization # Make the internal name shorter.
    img_height, img_width, img_channels = image_size[0], image_size[1], image_size[2]

    ############################################################################
    # Get a few exceptions out of the way.
    ############################################################################

    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError("`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified.")
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError("It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}.".format(n_predictor_layers, len(aspect_ratios_per_layer)))

    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError("Either `min_scale` and `max_scale` or `scales` need to be specified.")
    if scales:
        if len(scales) != n_predictor_layers+1:
            raise ValueError("It must be either scales is None or len(scales) == {}, but len(scales) == {}.".format(n_predictor_layers+1, len(scales)))
    else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers+1)

    if len(variances) != 4: # We need one variance value for each of the four box coordinates
        raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances)))
    variances = np.array(variances)
    if np.any(variances <= 0):
        raise ValueError("All variances must be >0, but the variances given are {}".format(variances))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError("You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError("You must provide at least one offset value per predictor layer.")

    ############################################################################
    # Compute the anchor box parameters.
    ############################################################################

    # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Compute the number of boxes to be predicted per cell for each predictor layer.
    # We need this so that we know how many channels the predictor layers need to have.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1
            else:
                n_boxes.append(len(ar))
    else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    ############################################################################
    # Build the network.
    ############################################################################

    x = Input(shape=(img_height, img_width, img_channels))

    # The following identity layer is only needed so that the subsequent lambda layers can be optional.
    x1 = Lambda(lambda z: z, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(lambda z: z - np.array(subtract_mean), output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(lambda z: z / np.array(divide_by_stddev), output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1)
    if swap_channels and (img_channels == 3):
        x1 = Lambda(lambda z: z[...,::-1], output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1)

    conv1 = Conv2D(32, (5, 5), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1')(x1)
    conv1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')(conv1) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
    conv1 = ELU(name='elu1')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1)

    conv2 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2')(pool1)
    conv2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(conv2)
    conv2 = ELU(name='elu2')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2)

    conv3 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3')(pool2)
    conv3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(conv3)
    conv3 = ELU(name='elu3')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3)

    conv4 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4')(pool3)
    conv4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4)
    conv4 = ELU(name='elu4')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(conv4)

    conv5 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5')(pool4)
    conv5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5)
    conv5 = ELU(name='elu5')(conv5)
    pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(conv5)

    conv6 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6')(pool5)
    conv6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6)
    conv6 = ELU(name='elu6')(conv6)
    pool6 = MaxPooling2D(pool_size=(2, 2), name='pool6')(conv6)

    conv7 = Conv2D(32, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7')(pool6)
    conv7 = BatchNormalization(axis=3, momentum=0.99, name='bn7')(conv7)
    conv7 = ELU(name='elu7')(conv7)

    # The next part is to add the convolutional predictor layers on top of the base network
    # that we defined above. Note that I use the term "base network" differently than the paper does.
    # To me, the base network is everything that is not convolutional predictor layers or anchor
    # box layers. In this case we'll have four predictor layers, but of course you could
    # easily rewrite this into an arbitrarily deep base network and add an arbitrary number of
    # predictor layers on top of the base network by simply following the pattern shown here.

    # Build the convolutional predictor layers on top of conv layers 4, 5, 6, and 7.
    # We build two predictor layers on top of each of these layers: One for class prediction (classification), one for box coordinate prediction (localization)
    # We precidt `n_classes` confidence values for each box, hence the `classes` predictors have depth `n_boxes * n_classes`
    # We predict 4 box coordinates for each box, hence the `boxes` predictors have depth `n_boxes * 4`
    # Output shape of `classes`: `(batch, height, width, n_boxes * n_classes)`
    classes4 = Conv2D(n_boxes[0] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes4')(conv4)
    classes5 = Conv2D(n_boxes[1] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes5')(conv5)
    classes6 = Conv2D(n_boxes[2] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes6')(conv6)
    classes7 = Conv2D(n_boxes[3] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes7')(conv7)
    # Output shape of `boxes`: `(batch, height, width, n_boxes * 4)`
    boxes4 = Conv2D(n_boxes[0] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes4')(conv4)
    boxes5 = Conv2D(n_boxes[1] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes5')(conv5)
    boxes6 = Conv2D(n_boxes[2] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes6')(conv6)
    boxes7 = Conv2D(n_boxes[3] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes7')(conv7)

    # Generate the anchor boxes
    # Output shape of `anchors`: `(batch, height, width, n_boxes, 8)`
    anchors4 = AnchorBoxes(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0],
                           two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0],
                           limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors4')(boxes4)
    anchors5 = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1],
                           two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1],
                           limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors5')(boxes5)
    anchors6 = AnchorBoxes(img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2],
                           two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[2], this_offsets=offsets[2],
                           limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors6')(boxes6)
    anchors7 = AnchorBoxes(img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3],
                           two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[3], this_offsets=offsets[3],
                           limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors7')(boxes7)

    # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)`
    # We want the classes isolated in the last axis to perform softmax on them
    classes4_reshaped = Reshape((-1, n_classes), name='classes4_reshape')(classes4)
    classes5_reshaped = Reshape((-1, n_classes), name='classes5_reshape')(classes5)
    classes6_reshaped = Reshape((-1, n_classes), name='classes6_reshape')(classes6)
    classes7_reshaped = Reshape((-1, n_classes), name='classes7_reshape')(classes7)
    # Reshape the box coordinate predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)`
    # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss
    boxes4_reshaped = Reshape((-1, 4), name='boxes4_reshape')(boxes4)
    boxes5_reshaped = Reshape((-1, 4), name='boxes5_reshape')(boxes5)
    boxes6_reshaped = Reshape((-1, 4), name='boxes6_reshape')(boxes6)
    boxes7_reshaped = Reshape((-1, 4), name='boxes7_reshape')(boxes7)
    # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)`
    anchors4_reshaped = Reshape((-1, 8), name='anchors4_reshape')(anchors4)
    anchors5_reshaped = Reshape((-1, 8), name='anchors5_reshape')(anchors5)
    anchors6_reshaped = Reshape((-1, 8), name='anchors6_reshape')(anchors6)
    anchors7_reshaped = Reshape((-1, 8), name='anchors7_reshape')(anchors7)

    # Concatenate the predictions from the different layers and the assosciated anchor box tensors
    # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions,
    # so we want to concatenate along axis 1
    # Output shape of `classes_concat`: (batch, n_boxes_total, n_classes)
    classes_concat = Concatenate(axis=1, name='classes_concat')([classes4_reshaped,
                                                                 classes5_reshaped,
                                                                 classes6_reshaped,
                                                                 classes7_reshaped])

    # Output shape of `boxes_concat`: (batch, n_boxes_total, 4)
    boxes_concat = Concatenate(axis=1, name='boxes_concat')([boxes4_reshaped,
                                                             boxes5_reshaped,
                                                             boxes6_reshaped,
                                                             boxes7_reshaped])

    # Output shape of `anchors_concat`: (batch, n_boxes_total, 8)
    anchors_concat = Concatenate(axis=1, name='anchors_concat')([anchors4_reshaped,
                                                                 anchors5_reshaped,
                                                                 anchors6_reshaped,
                                                                 anchors7_reshaped])

    # The box coordinate predictions will go into the loss function just the way they are,
    # but for the class predictions, we'll apply a softmax activation layer first
    classes_softmax = Activation('softmax', name='classes_softmax')(classes_concat)

    # Concatenate the class and box coordinate predictions and the anchors to one large predictions tensor
    # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8)
    predictions = Concatenate(axis=2, name='predictions')([classes_softmax, boxes_concat, anchors_concat])

    if mode == 'training':
        model = Model(inputs=x, outputs=predictions)
    elif mode == 'inference':
        decoded_predictions = DecodeDetections(confidence_thresh=confidence_thresh,
                                               iou_threshold=iou_threshold,
                                               top_k=top_k,
                                               nms_max_output_size=nms_max_output_size,
                                               coords=coords,
                                               normalize_coords=normalize_coords,
                                               img_height=img_height,
                                               img_width=img_width,
                                               name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    elif mode == 'inference_fast':
        decoded_predictions = DecodeDetections2(confidence_thresh=confidence_thresh,
                                                iou_threshold=iou_threshold,
                                                top_k=top_k,
                                                nms_max_output_size=nms_max_output_size,
                                                coords=coords,
                                                normalize_coords=normalize_coords,
                                                img_height=img_height,
                                                img_width=img_width,
                                                name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    else:
        raise ValueError("`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'.".format(mode))

    if return_predictor_sizes:
        # Get the spatial dimensions (height, width) of the convolutional predictor layers, we need them to generate the default boxes
        # The spatial dimensions are the same for the `classes` and `boxes` predictors
        predictor_sizes = np.array([classes4._keras_shape[1:3],
                                    classes5._keras_shape[1:3],
                                    classes6._keras_shape[1:3],
                                    classes7._keras_shape[1:3]])
        return model, predictor_sizes
    else:
        return model
Esempio n. 37
0
    alpha = K.softmax(e)  # (batch_size, input_length)

    # eqn 5
    c = K.batch_dot(h, alpha, axes=1)  # (batch_size, encoding_dim)

    recurrence_result = K.expand_dims(
        K.concatenate([c, y_i], axis=1),
        dim=1)  # (batch_size, 1, 2 * encoding_dim)

    expanded_h = Input(shape=(1, 2 * encoding_dim),
                       name='expanded_h')
    gru = Sequential([
        GRU(output_dim,
            return_sequences=False,
            input_shape=(1, 2 * encoding_dim))
    ])
    model = Model(input=[expanded_h],
                  output=[gru(expanded_h)])  # (batch_size, 1, output_dim)
    return model(recurrence_result)


output, _ = theano.scan(recurrence,
                        sequences=K.permute_dimensions(y, [1, 0, 2]),
                        non_sequences=h)

layer = Lambda(lambda encoded_state: output,
               output_shape=(batch_size, output_dim))
layer.build((input_length, encoding_dim))

print(K.eval(layer(h)))
Esempio n. 38
0
def train_test():
    # generate_img()
    imgs, labels, labels_encode = load_img()
    
    # labels_input = Input([None], dtype='int32')

    img_w = 156
    img_h = 64
    conv_filters = 16
    kernel_size = (3, 3)
    input_shape = (img_w, img_h, 1)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512

    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv1')(input_data)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
    inner = Conv2D(conv_filters, kernel_size, padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv2')(inner)
    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)

    conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirectional GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
    gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(len(chars) + 1, kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)
    base_model = Model(inputs=input_data, outputs=y_pred)

    labels = Input(name='the_labels', shape=[4], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    fit_model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    fit_model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)




    # adadelta = Adadelta(lr=0.05)
    # fit_model.compile(
    #     loss=lambda y_true, y_pred: y_pred,
    #     optimizer=adadelta)
    # fit_model.summary()
    # import sys
    # sys.exit()
    
    fit_model.fit_generator(
    generate_data(imgs, labels_encode, 32), 
    epochs=10, 
    steps_per_epoch=100, 
    verbose=1)
    fit_model.save('fit_model.h5')
    base_model.save('model.h5')