def build(self): assert self.config['question_len'] == self.config['answer_len'] question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) # cnn cnns = [Convolution1D(filter_length=filter_length, nb_filter=500, activation='tanh', border_mode='same') for filter_length in [2, 3, 5, 7]] question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat') # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True enc = Dense(100, activation='tanh') question_pool = enc(maxpool(question_cnn)) answer_pool = enc(maxpool(answer_cnn)) return question_pool, answer_pool
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], # mask_zero=True, weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) # question rnn part f_rnn = LSTM(141, return_sequences=True, consume_less='mem') b_rnn = LSTM(141, return_sequences=True, consume_less='mem', go_backwards=True) question_f_rnn = f_rnn(question_embedding) question_b_rnn = b_rnn(question_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = merge([maxpool(question_f_rnn), maxpool(question_b_rnn)], mode='concat', concat_axis=-1) # answer rnn part from attention_lstm import AttentionLSTMWrapper f_rnn = AttentionLSTMWrapper(f_rnn, question_pool, single_attention_param=True) b_rnn = AttentionLSTMWrapper(b_rnn, question_pool, single_attention_param=True) answer_f_rnn = f_rnn(answer_embedding) answer_b_rnn = b_rnn(answer_embedding) answer_pool = merge([maxpool(answer_f_rnn), maxpool(answer_b_rnn)], mode='concat', concat_axis=-1) return question_pool, answer_pool
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) f_rnn = LSTM(141, return_sequences=True, implementation=1) b_rnn = LSTM(141, return_sequences=True, implementation=1, go_backwards=True) qf_rnn = f_rnn(question_embedding) qb_rnn = b_rnn(question_embedding) # question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1) question_pool = concatenate([qf_rnn, qb_rnn], axis=-1) af_rnn = f_rnn(answer_embedding) ab_rnn = b_rnn(answer_embedding) # answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1) answer_pool = concatenate([af_rnn, ab_rnn], axis=-1) # cnn cnns = [Conv1D(kernel_size=kernel_size, filters=500, activation='tanh', padding='same') for kernel_size in [1, 2, 3, 5]] # question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') question_cnn = concatenate([cnn(question_pool) for cnn in cnns], axis=-1) # answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') answer_cnn = concatenate([cnn(answer_pool) for cnn in cnns], axis=-1) maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) return question_pool, answer_pool
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], mask_zero=True, # dropout=0.2, weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_embedding) answer_pool = maxpool(answer_embedding) return question_pool, answer_pool
def build(self): assert self.config['question_len'] == self.config['answer_len'] question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) hidden_layer = TimeDistributed(Dense(200, activation='tanh')) question_hl = hidden_layer(question_embedding) answer_hl = hidden_layer(answer_embedding) # cnn cnns = [Conv1D(kernel_size=kernel_size, filters=1000, activation='tanh', padding='same') for kernel_size in [2, 3, 5, 7]] # question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat') question_cnn = concatenate([cnn(question_hl) for cnn in cnns], axis=-1) # answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat') answer_cnn = concatenate([cnn(answer_hl) for cnn in cnns], axis=-1) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True # enc = Dense(100, activation='tanh') # question_pool = enc(maxpool(question_cnn)) # answer_pool = enc(maxpool(answer_cnn)) question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) return question_pool, answer_pool
def build(self): question = self.question answer = self.get_answer() rnn_model = get_model(question_maxlen=self.model_params.get('question_len', 20), answer_maxlen=self.model_params.get('question_len', 60), vocab_len=self.config['n_words'], n_hidden=256, load_save=True) rnn_model.trainable = False answer_inverted = rnn_model(answer) argmax = Lambda(lambda x: K.argmax(x, axis=2), output_shape=lambda x: (x[0], x[1])) argmax.trainable = False answer_argmax = argmax(answer_inverted) # add embedding layers weights = self.model_params.get('initial_embed_weights', None) weights = weights if weights is None else [weights] embedding = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), # W_regularizer=regularizers.activity_l1(1e-4), W_constraint=constraints.nonneg(), dropout=0.5, weights=weights, mask_zero=True) question_embedding = embedding(question) answer_embedding = embedding(answer_argmax) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_maxpool = maxpool(question_embedding) answer_maxpool = maxpool(answer_embedding) # activation activation = Activation('linear') question_output = activation(question_maxpool) answer_output = activation(answer_maxpool) return question_output, answer_output
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) f_rnn = LSTM(141, return_sequences=True, consume_less='mem') b_rnn = LSTM(141, return_sequences=True, consume_less='mem') qf_rnn = f_rnn(question_embedding) qb_rnn = b_rnn(question_embedding) question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1) af_rnn = f_rnn(answer_embedding) ab_rnn = b_rnn(answer_embedding) answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1) # cnn cnns = [Convolution1D(filter_length=filter_length, nb_filter=500, activation='tanh', border_mode='same') for filter_length in [1, 2, 3, 5]] question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = maxpool(question_cnn) answer_pool = maxpool(answer_cnn) return question_pool, answer_pool
# block = BatchNormalization()(block) block = Dropout(0.1)(block) if pool_length[i]: block = MaxPooling1D(pool_length=pool_length[i])(block) block = Lambda(max_1d, output_shape=(nb_filter[-1],))(block) block = Dense(128, activation='relu')(block) return block max_features = len(chars) + 1 char_embedding = 40 document = Input(shape=(max_sentences, maxlen), dtype='int64') in_sentence = Input(shape=(maxlen, ), dtype='int64') embedded = Lambda(binarize, output_shape=binarize_outshape)(in_sentence) block2 = char_block(embedded, [100, 200, 200], filter_length=[5, 3, 3], subsample=[1, 1, 1], pool_length=[2, 2, 2]) block3 = char_block(embedded, [200, 300, 300], filter_length=[7, 3, 3], subsample=[1, 1, 1], pool_length=[2, 2, 2]) sent_encode = merge([block2, block3], mode='concat', concat_axis=-1) sent_encode = Dropout(0.4)(sent_encode) encoder = Model(input=in_sentence, output=sent_encode) encoded = TimeDistributed(encoder)(document) lstm_h = 80 forwards = LSTM(lstm_h, return_sequences=False, dropout_W=0.15, dropout_U=0.15, consume_less='gpu')(encoded) backwards = LSTM(lstm_h, return_sequences=False, dropout_W=0.15, dropout_U=0.15, consume_less='gpu', go_backwards=True)(encoded)
from dlnn.tests.ml.conv_test import layer_1_conv, layer_3_conv, layer_6_conv from dlnn.tests.ml.elm_func_test import layer_9_flatten, layer_10_a_dense, layer_12_a_dense, layer_10_b_dense, \ layer_12_b_dense, layer_10_c_dense, layer_12_c_dense, layer_13_concatenate, layer_14_reshape, \ layer_15_merge_categorical from dlnn.tests.ml.pooling_test import layer_5_pool, layer_8_pool from dlnn.tests.ml.repos_helper import corpus_data, label_init, corpus_label, normalized, corr_step_1, corr_step_2, \ corr_step_3, corr_step_4, corr_step_5, corr_step_6, corr_step_7, corr_step_8, corr_step_8_full, corr_step_9, \ corr_step_10_a_bias_init, corr_step_10_a_kernel_init, corr_step_10_b_kernel_init, corr_step_10_b_bias_init, \ corr_step_10_c_bias_init, corr_step_10_c_kernel_init, corr_step_10_a, corr_step_10_b, corr_step_10_c, \ corr_step_11_a, corr_step_11_b, corr_step_11_c from dlnn.tests.ml.testcase import TestCase from dlnn.util import to_categorical from dlnn.util.Initializers import Unifinv inputs = Input(shape=(corpus_data.shape[-1], )) scale = Lambda(lambda x: x * 1.0 / 300.0)(inputs) reshape = Reshape([1, 1, 4])(scale) tile = Lambda(lambda x: K.tile(x, (1, 1, 4, 1)))(reshape) step_1_conv = layer_1_conv()(tile) step_2_activation = layer_2_activation()(step_1_conv) step_3_conv = layer_3_conv()(step_2_activation) step_4_activation = layer_4_activation()(step_3_conv) step_5_pool = layer_5_pool(2)(step_4_activation) step_6_conv = layer_6_conv()(step_5_pool) step_7_activation = layer_7_activation()(step_6_conv) step_8_pool = layer_8_pool(1)(step_7_activation) step_9_flatten = layer_9_flatten()(step_8_pool) step_10_a_dense = layer_10_a_dense()(step_9_flatten) step_11_a_activation = layer_11_a_activation()(step_10_a_dense) step_12_a_dense = layer_12_a_dense()(step_11_a_activation) step_10_b_dense = layer_10_b_dense()(step_9_flatten)
def __init__(self, image_size, channels, conv_layers, feature_maps, filter_shapes, strides, dense_layers, dense_neurons, dense_dropouts, latent_dim, activation='relu', eps_mean=0.0, eps_std=1.0): self.history = LossHistory() # check that arguments are proper length; if len(filter_shapes) != conv_layers: raise Exception( "number of convolutional layers must equal length of filter_shapes list" ) if len(strides) != conv_layers: raise Exception( "number of convolutional layers must equal length of strides list" ) if len(feature_maps) != conv_layers: raise Exception( "number of convolutional layers must equal length of feature_maps list" ) if len(dense_neurons) != dense_layers: raise Exception( "number of dense layers must equal length of dense_neurons list" ) if len(dense_dropouts) != dense_layers: raise Exception( "number of dense layers must equal length of dense_dropouts list" ) # even shaped filters may cause problems in theano backend; even_filters = [ f for pair in filter_shapes for f in pair if f % 2 == 0 ] if K.image_dim_ordering() == 'th' and len(even_filters) > 0: warnings.warn( 'Even shaped filters may cause problems in Theano backend') if K.image_dim_ordering( ) == 'channels_first' and len(even_filters) > 0: warnings.warn( 'Even shaped filters may cause problems in Theano backend') self.eps_mean = eps_mean self.eps_std = eps_std self.image_size = image_size # define input layer; if K.image_dim_ordering() == 'th' or K.image_dim_ordering( ) == 'channels_first': self.input = Input(shape=(channels, image_size[0], image_size[1])) else: self.input = Input(shape=(image_size[0], image_size[1], channels)) # define convolutional encoding layers; self.encode_conv = [] layer = Convolution2D(feature_maps[0], filter_shapes[0], padding='same', activation=activation, strides=strides[0])(self.input) self.encode_conv.append(layer) for i in range(1, conv_layers): layer = Convolution2D(feature_maps[i], filter_shapes[i], padding='same', activation=activation, strides=strides[i])(self.encode_conv[i - 1]) self.encode_conv.append(layer) # define dense encoding layers; self.flat = Flatten()(self.encode_conv[-1]) self.encode_dense = [] layer = Dense(dense_neurons[0], activation=activation)(Dropout( dense_dropouts[0])(self.flat)) self.encode_dense.append(layer) for i in range(1, dense_layers): layer = Dense(dense_neurons[i], activation=activation)(Dropout( dense_dropouts[i])(self.encode_dense[i - 1])) self.encode_dense.append(layer) # define embedding layer; self.z_mean = Dense(latent_dim)(self.encode_dense[-1]) self.z_log_var = Dense(latent_dim)(self.encode_dense[-1]) self.z = Lambda(self._sampling, output_shape=(latent_dim, ))( [self.z_mean, self.z_log_var]) # save all decoding layers for generation model; self.all_decoding = [] # define dense decoding layers; self.decode_dense = [] layer = Dense(dense_neurons[-1], activation=activation) self.all_decoding.append(layer) self.decode_dense.append(layer(self.z)) for i in range(1, dense_layers): layer = Dense(dense_neurons[-i - 1], activation=activation) self.all_decoding.append(layer) self.decode_dense.append(layer(self.decode_dense[i - 1])) # dummy model to get image size after encoding convolutions; self.decode_conv = [] if K.image_dim_ordering() == 'th' or K.image_dim_ordering( ) == 'channels_first': dummy_input = np.ones((1, channels, image_size[0], image_size[1])) else: dummy_input = np.ones((1, image_size[0], image_size[1], channels)) dummy = Model(self.input, self.encode_conv[-1]) conv_size = dummy.predict(dummy_input).shape layer = Dense(conv_size[1] * conv_size[2] * conv_size[3], activation=activation) self.all_decoding.append(layer) self.decode_dense.append(layer(self.decode_dense[-1])) reshape = Reshape(conv_size[1:]) self.all_decoding.append(reshape) self.decode_conv.append(reshape(self.decode_dense[-1])) # define deconvolutional decoding layers; for i in range(1, conv_layers): if K.image_dim_ordering() == 'th' or K.image_dim_ordering( ) == 'channels_first': dummy_input = np.ones( (1, channels, image_size[0], image_size[1])) else: dummy_input = np.ones( (1, image_size[0], image_size[1], channels)) dummy = Model(self.input, self.encode_conv[-i - 1]) conv_size = list(dummy.predict(dummy_input).shape) if K.image_dim_ordering() == 'th' or K.image_dim_ordering( ) == 'channels_first': conv_size[1] = feature_maps[-i] else: conv_size[3] = feature_maps[-i] layer = Conv2DTranspose(feature_maps[-i - 1], filter_shapes[-i], padding='same', activation=activation, strides=strides[-i]) self.all_decoding.append(layer) self.decode_conv.append(layer(self.decode_conv[i - 1])) layer = Conv2DTranspose(channels, filter_shapes[0], padding='same', activation='sigmoid', strides=strides[0]) self.all_decoding.append(layer) self.output = layer(self.decode_conv[-1]) # build model; self.model = Model(self.input, self.output) self.optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) self.model.compile(optimizer=self.optimizer, loss=self._vae_loss) # print "model summary:" # self.model.summary() # model for embeddings; self.embedder = Model(self.input, self.z_mean) # model for generation; self.decoder_input = Input(shape=(latent_dim, )) self.generation = [] self.generation.append(self.all_decoding[0](self.decoder_input)) for i in range(1, len(self.all_decoding)): self.generation.append(self.all_decoding[i](self.generation[i - 1])) self.generator = Model(self.decoder_input, self.generation[-1])
def f(x): if use_prelu: x = PReLU()(x) else: x = Lambda(relu)(x) return x
def get_siamese_model(input_shape): """ Model architecture """ def initialize_weights(): return 'glorot_uniform' def initialize_bias(): return 'zeros' # Define the tensors for the two input images left_input = Input(input_shape) right_input = Input(input_shape) # Convolutional Neural Network model = Sequential() model.add( Conv2D(64, (10, 10), activation='relu', input_shape=input_shape, kernel_initializer=initialize_weights(), kernel_regularizer=regularizers.l2(2e-4))) model.add(MaxPooling2D()) model.add( Conv2D(128, (7, 7), activation='relu', kernel_initializer=initialize_weights(), bias_initializer=initialize_bias(), kernel_regularizer=regularizers.l2(2e-4))) model.add(MaxPooling2D()) model.add( Conv2D(128, (4, 4), activation='relu', kernel_initializer=initialize_weights(), bias_initializer=initialize_bias(), kernel_regularizer=regularizers.l2(2e-4))) model.add(MaxPooling2D()) model.add( Conv2D(256, (4, 4), activation='relu', kernel_initializer=initialize_weights(), bias_initializer=initialize_bias(), kernel_regularizer=regularizers.l2(2e-4))) model.add(Flatten()) model.add(Dense(4096, activation='sigmoid')) # Generate the encodings (feature vectors) for the two images encoded_l = model(left_input) encoded_r = model(right_input) # Add a customized layer to compute the absolute difference between the encodings L1_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) L1_distance = L1_layer([encoded_l, encoded_r]) # Add a dense layer with a sigmoid unit to generate the similarity score prediction = Dense(1, activation='sigmoid', bias_initializer=initialize_bias)(L1_distance) # Connect the inputs with the outputs siamese_net = Model(inputs=[left_input, right_input], outputs=prediction) # return the model return siamese_net
plt.imshow(original_image) plt.savefig('./examples/original_image.png') flipped_image = cv2.flip(image,1) plt.figure() plt.imshow(flipped_image) plt.savefig('./examples/flipped_image.png') image_taken=image_taken+1 #get the training data X_train = np.array(argumented_images) y_train = np.array(argumented_steering_angles) #buid ing the model model = Sequential() model.add(Lambda(lambda x: x / 255.0 - 0.5, input_shape=(160,320,3))) model.add(Cropping2D(cropping=((70,25),(0,0)))) model.add(Convolution2D(24,5,5,subsample=(2,2),activation="relu")) model.add(Convolution2D(36,5,5,subsample=(2,2),activation="relu")) model.add(Convolution2D(48,5,5,subsample=(2,2),activation="relu")) model.add(Convolution2D(64,3,3,activation="relu")) model.add(Convolution2D(64,3,3,activation="relu")) model.add(Flatten()) model.add(Dense(100)) model.add(Activation('relu')) model.add(Dense(50)) model.add(Activation('relu'))
def create_model(filter_kernels, dense_outputs, maxlen, vocab_size, nb_filter, cat_output): initializer = RandomNormal(mean=0.0, stddev=0.05, seed=None) # Define what the input shape looks like inputs = Input(shape=(maxlen, ), dtype='int64') # Option one: # Uncomment following code to use a lambda layer to create a onehot encoding of a sequence of characters on the fly. # Holding one-hot encodings in memory is very inefficient. # The output_shape of embedded layer will be: batch x maxlen x vocab_size # import tensorflow as tf def one_hot(x): return tf.one_hot(x, vocab_size, on_value=1.0, off_value=0.0, axis=-1, dtype=tf.float32) def one_hot_outshape(in_shape): return in_shape[0], in_shape[1], vocab_size embedded = Lambda(one_hot, output_shape=one_hot_outshape)(inputs) # Option two: # Or, simply use Embedding layer as following instead of use lambda to create one-hot layer # Think of it as a one-hot embedding and a linear layer mashed into a single layer. # See discussion here: https://github.com/keras-team/keras/issues/4838 # Note this will introduce one extra layer of weights (of size vocab_size x vocab_size = 69*69 = 4761) # embedded = Embedding(input_dim=vocab_size, output_dim=vocab_size)(inputs) # All the convolutional layers... conv = Convolution1D(filters=nb_filter, kernel_size=filter_kernels[0], kernel_initializer=initializer, padding='valid', activation='relu', input_shape=(maxlen, vocab_size), name='Conv1')(embedded) conv = MaxPooling1D(pool_size=3, name='MaxPool1')(conv) conv1 = Convolution1D(filters=nb_filter, kernel_size=filter_kernels[1], kernel_initializer=initializer, padding='valid', activation='relu', name='Conv2')(conv) conv1 = MaxPooling1D(pool_size=3, name='MaxPool2')(conv1) conv2 = Convolution1D(filters=nb_filter, kernel_size=filter_kernels[2], kernel_initializer=initializer, padding='valid', activation='relu', name='Conv3')(conv1) conv3 = Convolution1D(filters=nb_filter, kernel_size=filter_kernels[3], kernel_initializer=initializer, padding='valid', activation='relu', name='Conv4')(conv2) conv4 = Convolution1D(filters=nb_filter, kernel_size=filter_kernels[4], kernel_initializer=initializer, padding='valid', activation='relu', name='Conv5')(conv3) conv5 = Convolution1D(filters=nb_filter, kernel_size=filter_kernels[5], kernel_initializer=initializer, padding='valid', activation='relu', name='Conv6')(conv4) conv5 = MaxPooling1D(pool_size=3, name='MaxPool3')(conv5) k = 40 # K-max pooling def kmax_outshape(in_shape): return (in_shape[0], in_shape[2] * k) def KMaxPooling(inputs): # swap last two dimensions since top_k will be applied along the last dimension shifted_input = tf.transpose(inputs, [0, 2, 1]) # extract top_k, returns two tensors [values, indices] top_k = tf.nn.top_k(shifted_input, k=k, sorted=True, name='TopK')[0] return top_k # conv5 = Lambda(KMaxPooling, output_shape=kmax_outshape)(conv5) conv5 = Flatten()(conv5) # Two dense layers with dropout of .5 z = Dropout(0.5)(Dense(dense_outputs, activation='relu')(conv5)) z = Dropout(0.5)(Dense(dense_outputs, activation='relu')(z)) # Output dense layer with softmax activation pred = Dense(cat_output, activation='softmax', name='output')(z) model = Model(inputs=inputs, outputs=pred) print(model.summary()) sgd = SGD(lr=0.01, momentum=0.9) adam = Adam( lr=0.001 ) # Feel free to use SGD above. I found Adam with lr=0.001 is faster than SGD with lr=0.01 model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) return model
def build_model(image_size, n_classes, mode='training', l2_regularization=0.0, min_scale=0.1, max_scale=0.9, scales=None, aspect_ratios_global=[0.5, 1.0, 2.0], aspect_ratios_per_layer=None, two_boxes_for_ar1=True, steps=None, offsets=None, clip_boxes=False, variances=[1.0, 1.0, 1.0, 1.0], coords='centroids', normalize_coords=False, subtract_mean=None, divide_by_stddev=None, swap_channels=False, confidence_thresh=0.01, iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False): n_predictor_layers = 3 # The number of predictor conv layers in the network n_classes += 1 # Account for the background class. l2_reg = l2_regularization # Make the internal name shorter. img_height, img_width, img_channels = image_size[0], image_size[ 1], image_size[2] ############################################################################ # Get a few exceptions out of the way. ############################################################################ if aspect_ratios_global is None and aspect_ratios_per_layer is None: raise ValueError( "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified." ) if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}." .format(n_predictor_layers, len(aspect_ratios_per_layer))) if (min_scale is None or max_scale is None) and scales is None: raise ValueError( "Either `min_scale` and `max_scale` or `scales` need to be specified." ) if scales: if len(scales) != n_predictor_layers + 1: raise ValueError( "It must be either scales is None or len(scales) == {}, but len(scales) == {}." .format(n_predictor_layers + 1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) if len( variances ) != 4: # We need one variance value for each of the four box coordinates raise ValueError( "4 variance values must be pased, but {} values were received.". format(len(variances))) variances = np.array(variances) if np.any(variances <= 0): raise ValueError( "All variances must be >0, but the variances given are {}".format( variances)) if (not (steps is None)) and (len(steps) != n_predictor_layers): raise ValueError( "You must provide at least one step value per predictor layer.") if (not (offsets is None)) and (len(offsets) != n_predictor_layers): raise ValueError( "You must provide at least one offset value per predictor layer.") ############################################################################ # Compute the anchor box parameters. ############################################################################ # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. if aspect_ratios_per_layer: aspect_ratios = aspect_ratios_per_layer else: aspect_ratios = [aspect_ratios_global] * n_predictor_layers # Compute the number of boxes to be predicted per cell for each predictor layer. # We need this so that we know how many channels the predictor layers need to have. if aspect_ratios_per_layer: n_boxes = [] for ar in aspect_ratios_per_layer: if (1 in ar) & two_boxes_for_ar1: n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1 else: n_boxes.append(len(ar)) else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer if (1 in aspect_ratios_global) & two_boxes_for_ar1: n_boxes = len(aspect_ratios_global) + 1 else: n_boxes = len(aspect_ratios_global) n_boxes = [n_boxes] * n_predictor_layers if steps is None: steps = [None] * n_predictor_layers if offsets is None: offsets = [None] * n_predictor_layers ############################################################################ # Define functions for the Lambda layers below. ############################################################################ def identity_layer(tensor): return tensor def input_mean_normalization(tensor): return tensor - np.array(subtract_mean) def input_stddev_normalization(tensor): return tensor / np.array(divide_by_stddev) def input_channel_swap(tensor): if len(swap_channels) == 3: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]] ], axis=-1) elif len(swap_channels) == 4: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]], tensor[..., swap_channels[3]] ], axis=-1) ############################################################################ # Build the network. ############################################################################ x = Input(shape=(img_height, img_width, img_channels)) # The following identity layer is only needed so that the subsequent lambda layers can be optional. x1 = Lambda(identity_layer, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x) if not (subtract_mean is None): x1 = Lambda(input_mean_normalization, output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(input_stddev_normalization, output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) if swap_channels: x1 = Lambda(input_channel_swap, output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1) conv1 = Conv2D(32, (5, 5), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1')(x1) conv1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')( conv1 ) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3 conv1 = ELU(name='elu1')(conv1) pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1) conv2 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2')(pool1) conv2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(conv2) conv2 = ELU(name='elu2')(conv2) pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2) conv3 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3')(pool2) conv3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(conv3) conv3 = ELU(name='elu3')(conv3) pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3) conv4 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4')(pool3) conv4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4) conv4 = ELU(name='elu4')(conv4) pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(conv4) conv5 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5')(pool4) conv5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5) conv5 = ELU(name='elu5')(conv5) pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(conv5) conv6 = Conv2D(32, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6')(pool5) conv6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6) conv6 = ELU(name='elu6')(conv6) classes4 = Conv2D(n_boxes[0] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes4')(conv4) classes5 = Conv2D(n_boxes[0] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes5')(conv5) classes6 = Conv2D(n_boxes[1] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes6')(conv6) boxes4 = Conv2D(n_boxes[0] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes4')(conv4) boxes5 = Conv2D(n_boxes[0] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes5')(conv5) boxes6 = Conv2D(n_boxes[1] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes6')(conv6) # Generate the anchor boxes anchors4 = AnchorBoxes(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors4')(boxes4) anchors5 = AnchorBoxes(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors5')(boxes5) anchors6 = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors6')(boxes6) # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)` # We want the classes isolated in the last axis to perform softmax on them classes4_reshaped = Reshape((-1, n_classes), name='classes4_reshape')(classes4) classes5_reshaped = Reshape((-1, n_classes), name='classes5_reshape')(classes5) classes6_reshaped = Reshape((-1, n_classes), name='classes6_reshape')(classes6) # Reshape the box coordinate predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)` # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss boxes4_reshaped = Reshape((-1, 4), name='boxes4_reshape')(boxes4) boxes5_reshaped = Reshape((-1, 4), name='boxes5_reshape')(boxes5) boxes6_reshaped = Reshape((-1, 4), name='boxes6_reshape')(boxes6) # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` anchors4_reshaped = Reshape((-1, 8), name='anchors4_reshape')(anchors4) anchors5_reshaped = Reshape((-1, 8), name='anchors5_reshape')(anchors5) anchors6_reshaped = Reshape((-1, 8), name='anchors6_reshape')(anchors6) # Concatenate the predictions from the different layers and the assosciated anchor box tensors # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, # so we want to concatenate along axis 1 # Output shape of `classes_concat`: (batch, n_boxes_total, n_classes) classes_concat = Concatenate(axis=1, name='classes_concat')( [classes4_reshaped, classes5_reshaped, classes6_reshaped]) # Output shape of `boxes_concat`: (batch, n_boxes_total, 4) boxes_concat = Concatenate(axis=1, name='boxes_concat')( [boxes4_reshaped, boxes5_reshaped, boxes6_reshaped]) # Output shape of `anchors_concat`: (batch, n_boxes_total, 8) anchors_concat = Concatenate(axis=1, name='anchors_concat')( [anchors4_reshaped, anchors5_reshaped, anchors6_reshaped]) # The box coordinate predictions will go into the loss function just the way they are, # but for the class predictions, we'll apply a softmax activation layer first classes_softmax = Activation('softmax', name='classes_softmax')(classes_concat) # Concatenate the class and box coordinate predictions and the anchors to one large predictions tensor # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) predictions = Concatenate(axis=2, name='predictions')( [classes_softmax, boxes_concat, anchors_concat]) if mode == 'training': model = Model(inputs=x, outputs=predictions) elif mode == 'inference': decoded_predictions = DecodeDetections( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) elif mode == 'inference_fast': decoded_predictions = DecodeDetectionsFast( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) else: raise ValueError( "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'." .format(mode)) if return_predictor_sizes: # The spatial dimensions are the same for the `classes` and `boxes` predictor layers. predictor_sizes = np.array([ classes4._keras_shape[1:3], classes5._keras_shape[1:3], classes6._keras_shape[1:3] ]) return model, predictor_sizes else: return model
embedder = Embedding(MAX_TOKENS + 1, embedding_dim, weights=[embeddings], trainable=False) doc_embedding = embedder(document) l_embedding = embedder(left_context) r_embedding = embedder(right_context) # I use LSTM RNNs instead of vanilla RNNs as described in the paper. forward = LSTM(hidden_dim_1, return_sequences=True)(l_embedding) # See equation (1). backward = LSTM(hidden_dim_1, return_sequences=True, go_backwards=True)(r_embedding) # See equation (2). # Keras returns the output sequences in reverse order. backward = Lambda(lambda x: backend.reverse(x, axes=1))(backward) together = concatenate([forward, doc_embedding, backward], axis=2) # See equation (3). semantic = TimeDistributed(Dense(hidden_dim_2, activation="tanh"))( together) # See equation (4). # Keras provides its own max-pooling layers, but they cannot handle variable length input # (as far as I can tell). As a result, I define my own max-pooling layer here. pool_rnn = Lambda(lambda x: backend.max(x, axis=1), output_shape=(hidden_dim_2, ))(semantic) # See equation (5). output = Dense(NUM_CLASSES, input_dim=hidden_dim_2, activation="softmax")(pool_rnn) # See equations (6) and (7). model = Model(inputs=[document, left_context, right_context], outputs=output)
def __init__(self, input_size, weights=None): input_image = Input(shape=(input_size, input_size, 3)) # the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K) def space_to_depth_x2(x): return tf.space_to_depth(x, block_size=2) # Layer 1 x = Conv2D(32, (3, 3), strides=(1, 1), padding='same', name='conv_1', use_bias=False)(input_image) x = BatchNormalization(name='norm_1')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 2 x = Conv2D(64, (3, 3), strides=(1, 1), padding='same', name='conv_2', use_bias=False)(x) x = BatchNormalization(name='norm_2')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 3 x = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_3', use_bias=False)(x) x = BatchNormalization(name='norm_3')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 4 x = Conv2D(64, (1, 1), strides=(1, 1), padding='same', name='conv_4', use_bias=False)(x) x = BatchNormalization(name='norm_4')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 5 x = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_5', use_bias=False)(x) x = BatchNormalization(name='norm_5')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 6 x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_6', use_bias=False)(x) x = BatchNormalization(name='norm_6')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 7 x = Conv2D(128, (1, 1), strides=(1, 1), padding='same', name='conv_7', use_bias=False)(x) x = BatchNormalization(name='norm_7')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 8 x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_8', use_bias=False)(x) x = BatchNormalization(name='norm_8')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 9 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_9', use_bias=False)(x) x = BatchNormalization(name='norm_9')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 10 x = Conv2D(256, (1, 1), strides=(1, 1), padding='same', name='conv_10', use_bias=False)(x) x = BatchNormalization(name='norm_10')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 11 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_11', use_bias=False)(x) x = BatchNormalization(name='norm_11')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 12 x = Conv2D(256, (1, 1), strides=(1, 1), padding='same', name='conv_12', use_bias=False)(x) x = BatchNormalization(name='norm_12')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 13 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_13', use_bias=False)(x) x = BatchNormalization(name='norm_13')(x) x = LeakyReLU(alpha=0.1)(x) skip_connection = x x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 14 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_14', use_bias=False)(x) x = BatchNormalization(name='norm_14')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 15 x = Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_15', use_bias=False)(x) x = BatchNormalization(name='norm_15')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 16 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_16', use_bias=False)(x) x = BatchNormalization(name='norm_16')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 17 x = Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_17', use_bias=False)(x) x = BatchNormalization(name='norm_17')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 18 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_18', use_bias=False)(x) x = BatchNormalization(name='norm_18')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 19 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_19', use_bias=False)(x) x = BatchNormalization(name='norm_19')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 20 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_20', use_bias=False)(x) x = BatchNormalization(name='norm_20')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 21 skip_connection = Conv2D(64, (1, 1), strides=(1, 1), padding='same', name='conv_21', use_bias=False)(skip_connection) skip_connection = BatchNormalization(name='norm_21')(skip_connection) skip_connection = LeakyReLU(alpha=0.1)(skip_connection) skip_connection = Lambda(space_to_depth_x2)(skip_connection) x = concatenate([skip_connection, x]) # Layer 22 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_22', use_bias=False)(x) x = BatchNormalization(name='norm_22')(x) x = LeakyReLU(alpha=0.1)(x) self.feature_extractor = Model(input_image, x) if weights == 'imagenet': print( 'Imagenet for YOLO backend are not available yet, defaulting to random weights' ) elif weights == None: pass else: print('Loaded backend weigths: ' + weights) self.feature_extractor.load_weights(weights)
def Inception_Inflated3d(include_top=True, weights=None, input_tensor=None, input_shape=None, dropout_prob=0.0, endpoint_logit=True, classes=400): """Instantiates the Inflated 3D Inception v1 architecture. Optionally loads weights pre-trained on Kinetics. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Note that the default input frame(image) size for this model is 224x224. # Arguments include_top: whether to include the the classification layer at the top of the network. weights: one of `None` (random initialization) or 'kinetics_only' (pre-training on Kinetics dataset only). or 'imagenet_and_kinetics' (pre-training on ImageNet and Kinetics datasets). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(NUM_FRAMES, 224, 224, 3)` (with `channels_last` data format) or `(NUM_FRAMES, 3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels. NUM_FRAMES should be no smaller than 8. The authors used 64 frames per example for training and testing on kinetics dataset Also, Width and height should be no smaller than 32. E.g. `(64, 150, 150, 3)` would be one valid value. dropout_prob: optional, dropout probability applied in dropout layer after global average pooling layer. 0.0 means no dropout is applied, 1.0 means dropout is applied to all features. Note: Since Dropout is applied just before the classification layer, it is only useful when `include_top` is set to True. endpoint_logit: (boolean) optional. If True, the model's forward pass will end at producing logits. Otherwise, softmax is applied after producing the logits to produce the class probabilities prediction. Setting this parameter to True is particularly useful when you want to combine results of rgb model and optical flow model. - `True` end model forward pass at logit output - `False` go further after logit to produce softmax predictions Note: This parameter is only useful when `include_top` is set to True. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in WEIGHTS_NAME or weights is None or os.path.exists(weights)): raise ValueError( 'The `weights` argument should be either ' '`None` (random initialization) or %s' % str(WEIGHTS_NAME) + ' ' 'or a valid path to a file containing `weights` values') if weights in WEIGHTS_NAME and include_top and classes != 400: raise ValueError( 'If using `weights` as one of these %s, with `include_top`' ' as true, `classes` should be 400' % str(WEIGHTS_NAME)) # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_frame_size=224, min_frame_size=32, default_num_frames=64, min_num_frames=8, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 4 # Downsampling via convolution (spatial and temporal) x = conv3d_bn(img_input, 64, 7, 7, 7, strides=(2, 2, 2), padding='same', name='Conv3d_1a_7x7') # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_2a_3x3')(x) x = conv3d_bn(x, 64, 1, 1, 1, strides=(1, 1, 1), padding='same', name='Conv3d_2b_1x1') x = conv3d_bn(x, 192, 3, 3, 3, strides=(1, 1, 1), padding='same', name='Conv3d_2c_3x3') # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_3a_3x3')(x) # Mixed 3b branch_0 = conv3d_bn(x, 64, 1, 1, 1, padding='same', name='Conv3d_3b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_3b_1a_1x1') branch_1 = conv3d_bn(branch_1, 128, 3, 3, 3, padding='same', name='Conv3d_3b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_3b_2a_1x1') branch_2 = conv3d_bn(branch_2, 32, 3, 3, 3, padding='same', name='Conv3d_3b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 32, 1, 1, 1, padding='same', name='Conv3d_3b_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3b') # Mixed 3c branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_1a_1x1') branch_1 = conv3d_bn(branch_1, 192, 3, 3, 3, padding='same', name='Conv3d_3c_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_3c_2a_1x1') branch_2 = conv3d_bn(branch_2, 96, 3, 3, 3, padding='same', name='Conv3d_3c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_3c_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3c') # Downsampling (spatial and temporal) x = MaxPooling3D((3, 3, 3), strides=(2, 2, 2), padding='same', name='MaxPool2d_4a_3x3')(x) # Mixed 4b branch_0 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_4b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_4b_1a_1x1') branch_1 = conv3d_bn(branch_1, 208, 3, 3, 3, padding='same', name='Conv3d_4b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_4b_2a_1x1') branch_2 = conv3d_bn(branch_2, 48, 3, 3, 3, padding='same', name='Conv3d_4b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4b_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4b') # Mixed 4c branch_0 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4c_0a_1x1') branch_1 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4c_1a_1x1') branch_1 = conv3d_bn(branch_1, 224, 3, 3, 3, padding='same', name='Conv3d_4c_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4c_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4c_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4c') # Mixed 4d branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_1a_1x1') branch_1 = conv3d_bn(branch_1, 256, 3, 3, 3, padding='same', name='Conv3d_4d_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4d_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4d_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4d_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4d_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4d') # Mixed 4e branch_0 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4e_0a_1x1') branch_1 = conv3d_bn(x, 144, 1, 1, 1, padding='same', name='Conv3d_4e_1a_1x1') branch_1 = conv3d_bn(branch_1, 288, 3, 3, 3, padding='same', name='Conv3d_4e_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4e_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4e_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4e_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4e_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4e') # Mixed 4f branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_4f_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4f_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_4f_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4f_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_4f_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4f_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_4f_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4f') # Downsampling (spatial and temporal) x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same', name='MaxPool2d_5a_2x2')(x) # Mixed 5b branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_5b_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5b') # Mixed 5c branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1') branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1') branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3') branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5c') if include_top: # Classification block x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) x = Dropout(dropout_prob)(x) x = conv3d_bn(x, classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1') num_frames_remaining = int(x.shape[1]) x = Reshape((num_frames_remaining, classes))(x) # logits (raw scores for each class) x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False), output_shape=lambda s: (s[0], s[2]))(x) if not endpoint_logit: x = Activation('softmax', name='prediction')(x) else: h = int(x.shape[2]) w = int(x.shape[3]) x = AveragePooling3D((2, h, w), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) inputs = img_input # create model model = Model(inputs, x, name='i3d_inception') # load weights if weights in WEIGHTS_NAME: if weights == WEIGHTS_NAME[0]: # rgb_kinetics_only if include_top: weights_url = WEIGHTS_PATH['rgb_kinetics_only'] model_name = 'i3d_inception_rgb_kinetics_only.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['rgb_kinetics_only'] model_name = 'i3d_inception_rgb_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[1]: # flow_kinetics_only if include_top: weights_url = WEIGHTS_PATH['flow_kinetics_only'] model_name = 'i3d_inception_flow_kinetics_only.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['flow_kinetics_only'] model_name = 'i3d_inception_flow_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[2]: # rgb_imagenet_and_kinetics if include_top: weights_url = WEIGHTS_PATH['rgb_imagenet_and_kinetics'] model_name = 'i3d_inception_rgb_imagenet_and_kinetics.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['rgb_imagenet_and_kinetics'] model_name = 'i3d_inception_rgb_imagenet_and_kinetics_no_top.h5' elif weights == WEIGHTS_NAME[3]: # flow_imagenet_and_kinetics if include_top: weights_url = WEIGHTS_PATH['flow_imagenet_and_kinetics'] model_name = 'i3d_inception_flow_imagenet_and_kinetics.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['flow_imagenet_and_kinetics'] model_name = 'i3d_inception_flow_imagenet_and_kinetics_no_top.h5' downloaded_weights_path = get_file(model_name, weights_url, cache_subdir='models') model.load_weights(downloaded_weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first' and K.backend( ) == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your keras config ' 'at ~/.keras/keras.json.') elif weights is not None: model.load_weights(weights) return model
h = Dense(intermediate_dim, activation='relu')(x) z_mean = Dense(latent_dim)(h) z_log_var = Dense(latent_dim)(h) def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=epsilon_std) return z_mean + K.exp(z_log_var / 2) * epsilon # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim, ))([z_mean, z_log_var]) # we instantiate these layers separately so as to reuse them later decoder_h = Dense(intermediate_dim, activation='relu') # Deepen decoder after this decoder_mean = Dense(original_dim, activation='sigmoid') h_decoded = decoder_h(z) x_decoded_mean = decoder_mean(h_decoded) # Custom loss layer class CustomVariationalLayer(Layer): def __init__(self, **kwargs): self.is_placeholder = True super(CustomVariationalLayer, self).__init__(**kwargs)
# embeddingPrompt('char') embedding_layer_c = Embedding(len(char.index) + 1, CHAR_EMBEDDING_DIM, weights=[char_embedding_matrix], input_length=MAX_WORD_LENGTH, trainable=trainable, mask_zero=mask) sequence_input_c = Input(shape=( padsize, MAX_WORD_LENGTH, ), dtype='int32') embedded_sequences_c = embedding_layer_c(sequence_input_c) rone = Lambda(reshape_one)(embedded_sequences_c) merge_m = 'sum' # raw_input('Enter merge mode for GRU Karakter: ') merge_m_c = merge_m dropout = 0.2 # input('Enter dropout for GRU: ') rec_dropout = dropout # input('Enter GRU Karakter recurrent dropout: ') gru_karakter = Bidirectional(GRU(CHAR_EMBEDDING_DIM, return_sequences=False, dropout=dropout, recurrent_dropout=rec_dropout), merge_mode=merge_m, weights=None)(rone) rtwo = Lambda(reshape_two)(gru_karakter) """ Combine word + char model
def make_decoder_layer(all_anchors, num_classes, input_shape): # Lambda layer for postprocessing YOLOv3 output def decode(yolo_outputs): num_scales = len(yolo_outputs) anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_scales == 3 else [[3, 4, 5], [1, 2, 3]] b_min_max_list = [] box_confidence_list = [] class_probs_list = [] for scale_idx in np.arange(3): anchors = all_anchors[anchor_mask[scale_idx]] output = yolo_outputs[scale_idx] num_anchors = len(anchors) batch_size = K.shape(output)[0] grid_shape = K.shape(output)[1:3] grid_height = grid_shape[0] # height grid_width = grid_shape[1] # width # reshape to tensor of dimensions batch_size, grid_height, grid_width, num_anchors, 5 + num_classes # the five box parameters are: # t_x, t_y determine the center point of the box # t_w, t_h determine the width and height of the box # the box confidence indicates the confidence that box contains an object and box is accurate output = K.reshape( output, [-1, grid_height, grid_width, num_anchors, 5 + num_classes]) # compute b_x, b_y for each cell and each anchor c_x = K.tile(K.reshape(K.arange(grid_width), [1, -1, 1, 1]), [grid_height, 1, num_anchors, 1]) c_y = K.tile(K.reshape(K.arange(grid_height), [-1, 1, 1, 1]), [1, grid_width, num_anchors, 1]) c_xy = K.concatenate([c_x, c_y]) c_xy = K.cast(c_xy, K.dtype(output)) b_xy = (K.sigmoid(output[..., :2]) + c_xy) / K.cast( grid_shape[::-1], K.dtype(output)) # compute b_w and b_h for each cell and each anchor p_wh = K.tile( K.reshape(K.constant(anchors), [1, 1, num_anchors, 2]), [grid_height, grid_width, 1, 1]) b_wh = p_wh * K.exp(output[..., 2:4]) / K.cast( input_shape[::-1], K.dtype(output)) b_min_max = K.reshape(convert_box_params( b_xy, b_wh), [batch_size, -1, 4]) # y_min, x_min, y_max, x_max # compute box confidence for each cell and each anchor box_confidence = K.reshape(K.sigmoid(output[..., 4]), [batch_size, -1]) # compute class probabilities for each cell and each anchor class_probs = K.reshape(K.sigmoid(output[..., 5:]), [batch_size, -1, num_classes]) b_min_max_list.append(b_min_max) box_confidence_list.append(box_confidence) class_probs_list.append(class_probs) return [ K.concatenate(b_min_max_list, axis=1), K.concatenate(box_confidence_list, axis=1), K.concatenate(class_probs_list, axis=1) ] return Lambda(decode)
_train_data, _test_data = read.split(feature_data, test_id) _train_data = read.remove_class(_train_data, [a_label]) _train_data = create_train_instances(_train_data) _support_data, _test_data = read.support_set_split( _test_data, samples_per_class) _support_data, _support_labels = read.flatten(_support_data) _support_data = np.array(_support_data) numsupportset = samples_per_class * classes_per_set input1 = Input((numsupportset + 1, feature_length)) modelinputs = [] base_network = mlp_embedding() for lidx in range(numsupportset): modelinputs.append( base_network(Lambda(lambda x: x[:, lidx, :])(input1))) targetembedding = base_network(Lambda(lambda x: x[:, -1, :])(input1)) modelinputs.append(targetembedding) supportlabels = Input((numsupportset, classes_per_set)) modelinputs.append(supportlabels) knnsimilarity = MatchCosine(nway=classes_per_set, n_samp=samples_per_class)(modelinputs) model = Model(inputs=[input1, supportlabels], outputs=knnsimilarity) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.fit([_train_data[0], _train_data[1]], _train_data[2], epochs=epochs, batch_size=batch_size,
import math # Use pre-trained VGG16 model as a start point. input_height = 65 input_width = 320 vgg = VGG16(include_top=False, weights='imagenet', input_shape=(input_height, input_width, 3)) # freeze all layers weight for layer in vgg.layers: layer.trainable = False #vgg.summary() input_shape = Input(shape=(160, 320, 3)) normalize = Lambda(lambda x: x / 255.0 - 0.5)(input_shape) # Normalize inputs crop_input = Cropping2D(cropping=((70, 25), (0, 0)))(normalize) vgg16 = vgg(crop_input) flatten = Flatten()(vgg16) fc1 = Dense(2048, activation='relu')(flatten) d1 = Dropout(0.5)(fc1) # dropout regularization fc2 = Dense(2048, activation='relu')(d1) d2 = Dropout(0.5)(fc2) # dropout regularization prediction = Dense(1)(d2) model = Model(inputs=input_shape, outputs=prediction) model.compile(optimizer='Adam', loss='mse') # model.summary() model.fit_generator(train_generator, steps_per_epoch=math.ceil(len(train_data) / 32), epochs=5,
def train_lstm(x1_train, x1_test, f1_train, f1_test, x2_train, x2_test, f2_train, f2_test, y_train, y_test): print('Defining a Simple Keras Model...') input_shape = x1_train.shape[1:] input_shape2 = f1_train.shape[1:] base_network = create_base_network(input_shape, input_shape2) Mydot = Lambda(lambda x: K.batch_dot(x[0], x[1])) #Q-part--得 hidden层的平均 input_con = Input(shape=input_shape) input_f1 = Input(shape=input_shape2) hid_con = base_network([input_con, input_f1]) ave_con = GlobalAveragePooling1D()(hid_con) print('ave_con:', np.shape(ave_con)) ave_con = Reshape((100, 1))(ave_con) print('avg_con_reshape:', np.shape(ave_con)) #A-part--得 hidden层输出 input_tag = Input(shape=input_shape) input_f2 = Input(shape=input_shape2) hid_tag = base_network([input_tag, input_f2]) #A-part Attention1 tag_at = MyLayer_one()([hid_tag, ave_con]) print('*****************') print(tag_at) tag_at = Flatten()(tag_at) print(tag_at) tag_at = Activation('softmax')(tag_at) print(tag_at) tag_at = RepeatVector(1)(tag_at) print(tag_at) print(hid_tag) att_tag_mul = Mydot([tag_at, hid_tag]) print('att_tag_mul:', np.shape(att_tag_mul)) at_done = Flatten()(att_tag_mul) print('out:', np.shape(at_done)) output = Dropout(0.2)(at_done) output = Dense(3)(output) output = Activation('softmax')(output) model = Model(inputs=[input_con, input_f1, input_tag, input_f2], outputs=output) print('Compiling the Model...') model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print("Train...") earlyStopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto') saveBestModel = callbacks.ModelCheckpoint('lstm_model/part_1_relation.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='auto') model.fit([x1_train, f1_train, x2_train, f2_train], y_train, batch_size=batch_size, epochs=n_epoch, verbose=1, validation_data=([x1_test, f1_test, x2_test, f2_test], y_test), callbacks=[earlyStopping, saveBestModel])
def create_model(anchors, class_names, is_use_center_loss, load_pretrained=True, freeze_body=True): ''' returns the body of the model and the model # Params: load_pretrained: whether or not to load the pretrained model or initialize all weights freeze_body: whether or not to freeze all weights except for the last layer's # Returns: model_body: YOLOv2 with new output layer model: YOLOv2 with custom loss Lambda layer ''' # detectors_mask_shape = (13, 13, 5, 1) # matching_boxes_shape = (13, 13, 5, 5) detectors_mask_shape = (5, 5, 5, 1) matching_boxes_shape = (5, 5, 5, 5) # Create model input layers. image_input = Input(shape=(160, 160, 3)) boxes_input = Input(shape=(None, 5)) detectors_mask_input = Input(shape=detectors_mask_shape) matching_boxes_input = Input(shape=matching_boxes_shape) # Create model body. yolo_model = yolo_body(image_input, len(anchors), len(class_names)) topless_yolo = Model(yolo_model.input, yolo_model.layers[-2].output) if load_pretrained: # Save topless yolo: topless_yolo_path = os.path.join('model_data', 'pretrained_best.h5') if not os.path.exists(topless_yolo_path): print("CREATING TOPLESS WEIGHTS FILE") yolo_path = os.path.join('model_data', 'yolo.h5') model_body = load_model(yolo_path) model_body = Model(model_body.inputs, model_body.layers[-2].output) model_body.save_weights(topless_yolo_path) # topless_yolo.load_weights(topless_yolo_path) if freeze_body: for layer in topless_yolo.layers: layer.trainable = False final_layer = Conv2D(len(anchors) * (5 + len(class_names)), (1, 1), activation='linear')(topless_yolo.output) model_body = Model(image_input, final_layer) model_loss = None # Place model loss on CPU to reduce GPU memory usage. with tf.device('/cpu:0'): # TODO: Replace Lambda with custom Keras layer for loss. if not is_use_center_loss: model_loss = Lambda( yolo_loss, output_shape=(1,), name='yolo_loss', arguments={'anchors': anchors, 'num_classes': len(class_names)}) \ ([model_body.output, boxes_input, detectors_mask_input, matching_boxes_input]) else: model_center_loss = Lambda( yolo_center_loss, output_shape=(1,), name='yolo_loss', arguments={'anchors': anchors, 'num_classes': len(class_names), 'ratio':0.1, 'alpha':0.5}) \ ([model_body.output, boxes_input, detectors_mask_input, matching_boxes_input]) model = Model([ model_body.input, boxes_input, detectors_mask_input, matching_boxes_input ], model_loss) model.load_weights(os.path.join('model_data', 'pretrained_best.h5')) return model_body, model
def createPreProcessingLayers(): #Creates a model with the initial pre-processing layers. model = Sequential() model.add(Lambda(lambda x: (x / 255.0) - 0.5, input_shape=(160,320,3))) model.add(Cropping2D(cropping=((50,20), (0,0)))) return model
train_lines = label_lines[:int(len(label_lines) * config.validation_split)] valid_lines = label_lines[int(len(label_lines) * config.validation_split):] model_yolo = yolo.DarkNet()(n_class=num_classes, n_anchor=num_anchors) model_yolo.summary() h, w = config.image_input_shape y_true = [ Input(shape=(h // config.scale_size[l], w // config.scale_size[l], num_anchors // 3, num_classes + 5)) for l in range(3) ] model_loss = Lambda(yolo.yolo_loss, output_shape=(1, ), name='yolo_loss', arguments={ 'anchors': anchors, 'num_classes': num_classes })([*model_yolo.output, *y_true]) model = Model([model_yolo.input, *y_true], model_loss) model.compile(optimizer=Adam(1e-3), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) model.fit_generator( generator=data_generator(label_lines=train_lines, batch_size=config.batch_size, input_shape=config.image_input_shape, anchors=anchors, num_classes=num_classes),
m = tf.add(y1, y2) x = tf.divide(n, m) return x embedding_size = 128 # inputs = tf.placeholder(tf.float32, [None, 96, 96, 3], name='input') inputs = Input((img_size[0], img_size[1], 3), name='group_input') base_module = InceptionResNetV2(weights=None, input_tensor=inputs, classes=embedding_size) custom_input = base_module.output # 图片总数量 = batch_size * 2 x = Lambda(lambda x: tf.reshape(x, [batch_size, 2, embedding_size]), name='prediction_reshape')(custom_input) # 矩阵相似计算:x^2相似 x = Lambda(similarity, input_shape=[2, embedding_size], name='similarity')(x) # # 线性回归 x = Dense(1, activation='sigmoid', name='final_predict')(x) model = Model(base_module.input, x) model.load_weights('face_model_epoch_19.h5') def get_img_data(img_path): img = image.load_img(img_path, target_size=[96, 96]) img = image.img_to_array(img) / 255. return img
yield sklearn.utils.shuffle(X_train, y_train) # compile and train the model using the generator function train_generator = generator(train_samples, batch_size=16) validation_generator = generator(validation_samples, batch_size=16) ch, row, col = 3, 160, 320 # Trimmed image format from keras.models import Sequential from keras.layers import Dense, Flatten, Lambda, Cropping2D, Activation, Dropout, Convolution2D from keras.layers.convolutional import Conv2D from keras.layers.pooling import MaxPooling2D model = Sequential() model.add(Lambda(lambda x: x / 255.0 - 0.5, input_shape=(row, col, ch), output_shape=(row, col, ch))) model.add(Cropping2D(cropping=((70, 26), (0, 0)))) model.add(Convolution2D(6, (5, 5), border_mode='same', activation='relu')) model.add(MaxPooling2D(4, 4)) model.add(Convolution2D(16, (5, 5), border_mode='same', activation='relu')) model.add(MaxPooling2D(2, 4)) model.add(Convolution2D(33, (3, 3), border_mode='same', activation='relu')) model.add(MaxPooling2D(1, 2)) model.add(Flatten()) model.add(Dense(190)) model.add(Dropout(0.75)) model.add(Activation('relu')) model.add(Dense(84)) model.add(Activation('relu')) model.add(Dense(1))
def create_model(self): self._set_model_params() act = 'relu' input_data = Input(name='the_input', shape=self.input_shape, dtype='float32') inner = Convolution2D(self.conv_num_filters, self.filter_size, self.filter_size, border_mode='same', activation=act, name='conv1')(input_data) inner = MaxPooling2D(pool_size=(self.pool_size_1, self.pool_size_1), name='max1')(inner) inner = Convolution2D(self.conv_num_filters, self.filter_size, self.filter_size, border_mode='same', activation=act, name='conv2')(inner) inner = MaxPooling2D(pool_size=(self.pool_size_2, self.pool_size_2), name='max2')(inner) conv_to_rnn_dims = (int( (self.img_h / (self.pool_size_1 * self.pool_size_2)) * self.conv_num_filters), int(self.img_w / (self.pool_size_1 * self.pool_size_2))) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) inner = Permute(dims=(2, 1), name='permute')(inner) # cuts down input size going into RNN: inner = TimeDistributed( Dense(self.time_dense_size, activation=act, name='dense1'))(inner) # Two layers of bidirecitonal GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(self.rnn_size, return_sequences=True, name='gru1')(inner) gru_1b = GRU(self.rnn_size, return_sequences=True, go_backwards=True, name='gru1_b')(inner) gru1_merged = merge([gru_1, gru_1b], mode='sum') gru_2 = GRU(self.rnn_size, return_sequences=True, name='gru2')(gru1_merged) gru_2b = GRU(self.rnn_size, return_sequences=True, go_backwards=True)(gru1_merged) # transforms RNN output to character activations: inner = TimeDistributed(Dense(self.output_size, name='dense2'))(merge([gru_2, gru_2b], mode='concat')) y_pred = Activation('softmax', name='softmax')(inner) # Model(input=[input_data], output=y_pred).summary() labels = Input(name='the_labels', shape=[self.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name="ctc")( [y_pred, labels, input_length, label_length]) lr = 0.03 # clipnorm seems to speeds up convergence clipnorm = 5 sgd = SGD(lr=lr, decay=3e-7, momentum=0.9, nesterov=True, clipnorm=clipnorm) model = Model(input=[input_data, labels, input_length, label_length], output=[loss_out]) # model.summary() # the loss calc occurs elsewhere, so use a dummy lambda func for the loss if self.weight_file is not None: model.load_weights(self.weight_file) model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=sgd) self.model = model self._predictor = K.function([input_data], [y_pred]) return model
#get directory of input images and create array of images and store images in the directory to the array test_dir = "C:/Users/panka/OneDrive/Desktop/Aditya/image data 2018-19/Test_Resized" #get labels pickle and convert to dataframe then sort by the filename to go along with the images test_labels_file = "C:/Users/panka/OneDrive/Desktop/Aditya/image data 2018-19/Testing_Input_Resized.pkl" test_labels = pd.read_pickle(test_labels_file) test_datagen = ImageDataGenerator(rescale=1./255,preprocessing_function=image_transform) test_generator = test_datagen.flow_from_dataframe(dataframe=test_labels,directory=test_dir,target_size=(108,192),x_col='Filename',y_col=['Right Ankle x','Right Knee x','Right Hip x','Left Hip x','Left Knee x','Left Ankle x','Pelvis x','Thorax x','Upper Neck x','Head Top x','Right Wrist x','Right Elbow x','Right Shoulder x','Left Shoulder x','Left Elbow x','Left Wrist x','Right Ankle y','Right Knee y','Right Hip y','Left Hip y','Left Knee y','Left Ankle y','Pelvis y','Thorax y','Upper Neck y','Head Top y','Right Wrist y','Right Elbow y','Right Shoulder y','Left Shoulder y','Left Elbow y','Left Wrist y'],class_mode='other',batch_size=8) #create model model = Sequential() #add model layers model.add(Conv2D(1, kernel_size=1, input_shape=(108,192,3), activation='relu')) model.add(Lambda(image_transform)) model.add(Conv2D(64, kernel_size=3, activation='relu')) model.add(Conv2D(64, kernel_size=3, activation='relu')) model.add(Conv2D(64, kernel_size=3, activation='relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Conv2D(128, kernel_size=3, activation='relu')) model.add(Conv2D(128, kernel_size=3, activation='relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Conv2D(256, kernel_size=3, activation='relu')) model.add(Conv2D(256, kernel_size=3, activation='relu')) model.add(Conv2D(256, kernel_size=3, activation='relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Flatten()) model.add(Dense(32, activation='relu')) #compile model using accuracy to measure model performance
margin = 1. return K.mean((1. - y_true) * K.square(y_pred) + y_true * K.square(K.maximum(margin - y_pred, 0.))) inp = Input((224, 224, 3)) mobile_model = MobileNetV2(include_top=False, input_shape=(224, 224, 3), input_tensor=inp, pooling='avg') x = Dense(512, activation='relu')(mobile_model.output) x = Dropout(0.3)(x) x = Dense(128)(x) x = Lambda(lambda x: K.l2_normalize(x, axis=1))(x) model_top = Model(inputs=inp, outputs=x) #model_top.summary() inp_1 = Input((224, 224, 3)) inp_2 = Input((224, 224, 3)) out_1 = model_top(inp_1) out_2 = model_top(inp_2) merge_layer = Lambda(euclidean_dist)([out_1, out_2]) model = Model(inputs=[inp_1, inp_2], outputs=merge_layer) #model.summary()
def CreateModel(self): ''' 定义CNN/LSTM/CTC模型,使用函数式模型 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) 隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2 隐藏层:全连接层 输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出 ''' input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layer_h1 = Conv2D(32, (3, 3), use_bias=False, activation='relu', padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 layer_h1 = Dropout(0.05)(layer_h1) layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 layer_h3 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h2) # 池化层 # layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 layer_h3 = Dropout(0.05)(layer_h3) layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 layer_h4 = Dropout(0.1)(layer_h4) layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 layer_h6 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h5) # 池化层 layer_h6 = Dropout(0.1)(layer_h6) layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 layer_h7 = Dropout(0.15)(layer_h7) layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 layer_h9 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h8) # 池化层 layer_h9 = Dropout(0.15)(layer_h9) layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 layer_h10 = Dropout(0.2)(layer_h10) layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 layer_h12 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 layer_h12 = Dropout(0.2)(layer_h12) layer_h13 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h12) # 卷积层 layer_h13 = Dropout(0.2)(layer_h13) layer_h14 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h13) # 卷积层 layer_h15 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h14) # 池化层 # test=Model(inputs = input_data, outputs = layer_h12) # test.summary() layer_h16 = Reshape((200, 3200))(layer_h15) # Reshape层 # layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 # layer_h6 = Dropout(0.2)(layer_h5) # 随机中断部分神经网络连接,防止过拟合 layer_h16 = Dropout(0.3)(layer_h16) layer_h17 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h16) # 全连接层 layer_h17 = Dropout(0.3)(layer_h17) layer_h18 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h17) # 全连接层 y_pred = Activation('softmax', name='Activation0')(layer_h18) model_data = Model(inputs=input_data, outputs=y_pred) # model_data.summary() labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer # layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.summary() # clipnorm seems to speeds up convergence # sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) # opt = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06) opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.0, epsilon=10e-8) # model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=opt) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) # print('[*提示] 创建模型成功,模型编译成功') print('[*Info] Create Model Successful, Compiles Model Successful. ') return model, model_data
def build_nn_model( self, element_dim=103, conv_window=3, conv_filters=64, rnn_dim=64, recipe_latent_dim=8, intermediate_dim=64, latent_dim=8, max_material_length=10, charset_size=50, ): self.latent_dim = latent_dim self.recipe_latent_dim = recipe_latent_dim self.original_dim = max_material_length * charset_size x_mat = Input(shape=(max_material_length, charset_size), name="material_in") conv_x1 = Conv1D(conv_filters, conv_window, padding="valid", activation="relu", name='conv_enc_1')(x_mat) conv_x2 = Conv1D(conv_filters, conv_window, padding="valid", activation="relu", name='conv_enc_2')(conv_x1) conv_x3 = Conv1D(conv_filters, conv_window, padding="valid", activation="relu", name='conv_enc_3')(conv_x2) h_flatten = Flatten()(conv_x3) h = Dense(intermediate_dim, activation="relu", name="hidden_enc")(h_flatten) z_mean_func = Dense(latent_dim, name="means_enc") z_log_var_func = Dense(latent_dim, name="vars_enc") z_mean = z_mean_func(h) z_log_var = z_log_var_func(h) def sample(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(latent_dim, ), mean=0.0, stddev=1.0) return z_mean + K.exp(z_log_var / 2) * epsilon z = Lambda(sample, name="lambda_sample")([z_mean, z_log_var]) c_element = Input(shape=(element_dim, ), name="cond_element_in") c_latent_recipe = Input(shape=(recipe_latent_dim, ), name="cond_latent_recipe_in") z_conditional = Concatenate(name="concat_cond")( [z, c_latent_recipe, c_element]) decoder_h = Dense(intermediate_dim, activation="relu", name="hidden_dec") decoder_h_repeat = RepeatVector(max_material_length, name="h_rep_dec") decoder_h_gru_1 = GRU(rnn_dim, return_sequences=True, name="recurrent_dec_1") decoder_h_gru_2 = GRU(rnn_dim, return_sequences=True, name="recurrent_dec_2") decoder_h_gru_3 = GRU(rnn_dim, return_sequences=True, name="recurrent_dec_3") decoder_mat = TimeDistributed(Dense(charset_size, activation='softmax'), name="means_material_dec") h_decoded = decoder_h(z_conditional) h_decode_repeat = decoder_h_repeat(h_decoded) gru_h_decode_1 = decoder_h_gru_1(h_decode_repeat) gru_h_decode_2 = decoder_h_gru_2(gru_h_decode_1) gru_h_decode_3 = decoder_h_gru_3(gru_h_decode_2) x_decoded_mat = decoder_mat(gru_h_decode_3) def vae_xent_loss(x, x_decoded_mean): x = K.flatten(x) x_decoded_mean = K.flatten(x_decoded_mean) rec_loss = self.original_dim * metrics.binary_crossentropy( x, x_decoded_mean) kl_loss = -0.5 * K.mean( 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return rec_loss + kl_loss encoder = Model(inputs=[x_mat], outputs=[z_mean]) decoder_x_input = Input(shape=(latent_dim, )) decoder_inputs = Concatenate(name="concat_cond_dec")( [decoder_x_input, c_latent_recipe, c_element]) _h_decoded = decoder_h(decoder_inputs) _h_decode_repeat = decoder_h_repeat(_h_decoded) _gru_h_decode_1 = decoder_h_gru_1(_h_decode_repeat) _gru_h_decode_2 = decoder_h_gru_2(_gru_h_decode_1) _gru_h_decode_3 = decoder_h_gru_3(_gru_h_decode_2) _x_decoded_mat = decoder_mat(_gru_h_decode_3) decoder = Model(inputs=[decoder_x_input, c_latent_recipe, c_element], outputs=[_x_decoded_mat]) vae = Model(inputs=[x_mat, c_latent_recipe, c_element], outputs=[x_decoded_mat]) vae.compile(optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True), loss=vae_xent_loss, metrics=['categorical_accuracy']) self.vae = vae self.encoder = encoder self.decoder = decoder
vgg16 = VGG16(weights='imagenet', include_top=False) x = vgg16.output x = Flatten(name='flatten')(x) x = Dense(4096, activation = 'relu', name='fc1')(x) x = Dropout(0.2,name='drop1')(x) x = Dense(4096, activation = 'relu', name='fc2')(x) feature = Dropout(0.2,name='drop2')(x) base_model = Model(vgg16.input,feature) cls_out = Dense(751,activation='softmax', name='softmax')(base_model.output) cls_model = Model(vgg16.input,cls_out) input1 = Input(shape=input_shape) input2 = Input(shape=input_shape) fea1,fea2 = base_model(input1), base_model(input2) cls1,cls2 = cls_model(input1), cls_model(input2) distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([fea1, fea2]) model = Model(inputs = [input1, input2], outputs = [distance,cls1,cls2]) # train rms = RMSprop() model.compile(loss=[contrastive_loss,'categorical_crossentropy','categorical_crossentropy'], optimizer=rms, loss_weights=[1.,0.5,0.5]) model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], [tr_y,tr_c1,tr_c2], batch_size=128, epochs=epochs)
def build_model(image_size, n_classes, mode='training', l2_regularization=0.0, min_scale=0.1, max_scale=0.9, scales=None, aspect_ratios_global=[0.5, 1.0, 2.0], aspect_ratios_per_layer=None, two_boxes_for_ar1=True, steps=None, offsets=None, limit_boxes=False, variances=[1.0, 1.0, 1.0, 1.0], coords='centroids', normalize_coords=False, subtract_mean=None, divide_by_stddev=None, swap_channels=False, confidence_thresh=0.01, iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False): ''' Build a Keras model with SSD architecture, see references. The model consists of convolutional feature layers and a number of convolutional predictor layers that take their input from different feature layers. The model is fully convolutional. The implementation found here is a smaller version of the original architecture used in the paper (where the base network consists of a modified VGG-16 extended by a few convolutional feature layers), but of course it could easily be changed to an arbitrarily large SSD architecture by following the general design pattern used here. This implementation has 7 convolutional layers and 4 convolutional predictor layers that take their input from layers 4, 5, 6, and 7, respectively. Most of the arguments that this function takes are only needed for the anchor box layers. In case you're training the network, the parameters passed here must be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading trained weights, the parameters passed here must be the same as the ones used to produce the trained weights. Some of these arguments are explained in more detail in the documentation of the `SSDBoxEncoder` class. Note: Requires Keras v2.0 or later. Training currently works only with the TensorFlow backend (v1.0 or later). Arguments: image_size (tuple): The input image size in the format `(height, width, channels)`. n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode, the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes, the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding, non-maximum suppression, and top-k filtering. The difference between latter two modes is that 'inference' follows the exact procedure of the original Caffe implementation, while 'inference_fast' uses a faster prediction decoding procedure. l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers. min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. All scaling factors between the smallest and the largest will be linearly interpolated. Note that the second to last of the linearly interpolated scaling factors will actually be the scaling factor for the last predictor layer, while the last scaling factor is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer. This list must be one element longer than the number of predictor layers. The first `k` elements are the scaling factors for the `k` predictor layers, while the last element is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional last scaling factor must be passed either way, even if it is not being used. Defaults to `None`. If a list is passed, this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero. aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be generated. This list is valid for all predictor layers. The original implementation uses more aspect ratios for some predictor layers and fewer for others. If you want to do that, too, then use the next argument instead. Defaults to `[0.5, 1.0, 2.0]`. aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each predictor layer. This allows you to set the aspect ratios for each predictor layer individually. If a list is passed, it overrides `aspect_ratios_global`. two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise. If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated using the scaling factor for the respective layer, the second one will be generated using geometric mean of said scaling factor and next bigger scaling factor. Defaults to `True`, following the original implementation. steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. If no steps are provided, then they will be computed such that the anchor box center points will form an equidistant grid within the image dimensions. offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either floats or tuples of two floats. These numbers represent for each predictor layer how many pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions of the step size specified in the `steps` argument. If the list contains floats, then that value will be used for both spatial dimensions. If the list contains tuples of two floats, then they represent `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size, which is also the recommended setting. limit_boxes (bool, optional): If `True`, limits box coordinates to stay within image boundaries. This would normally be set to `True`, but here it defaults to `False`, following the original implementation. variances (list, optional): A list of 4 floats >0 with scaling factors (actually it's not factors but divisors to be precise) for the encoded predicted box coordinates. A variance value of 1.0 would apply no scaling at all to the predictions, while values in (0,1) upscale the encoded predictions and values greater than 1.0 downscale the encoded predictions. If you want to reproduce the configuration of the original SSD, set this to `[0.1, 0.1, 0.2, 0.2]`, provided the coordinate format is 'centroids'. coords (str, optional): The box coordinate format to be used. Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height) or 'minmax' for the format `(xmin, xmax, ymin, ymax)`. normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates, i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values of any shape that is broadcast-compatible with the image shape. The elements of this array will be subtracted from the image pixel intensity values. For example, pass a list of three integers to perform per-channel mean normalization for color images. divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or floating point values of any shape that is broadcast-compatible with the image shape. The image pixel intensity values will be divided by the elements of this array. For example, pass a list of three integers to perform per-channel standard deviation normalization for color images. swap_channels (bool, optional): If `True`, the color channel order of the input images will be reversed, i.e. if the input color channel order is RGB, the color channels will be swapped to BGR. confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific positive class in order to be considered for the non-maximum suppression stage for the respective class. A lower value will result in a larger part of the selection process being done by the non-maximum suppression stage, while a larger value will result in a larger part of the selection process happening in the confidence thresholding stage. iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold` with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers to the box's confidence score. top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the non-maximum suppression stage. nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage. return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since you can always get their sizes easily via the Keras API, but it's convenient and less error-prone to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the spatial dimensions of the predictor layers), for inference you don't need them. Returns: model: The Keras SSD model. predictor_sizes (optional): A Numpy array containing the `(height, width)` portion of the output tensor shape for each convolutional predictor layer. During training, the generator function needs this in order to transform the ground truth labels into tensors of identical structure as the output tensors of the model, which is in turn needed for the cost function. References: https://arxiv.org/abs/1512.02325v5 ''' n_predictor_layers = 4 # The number of predictor conv layers in the network n_classes += 1 # Account for the background class. l2_reg = l2_regularization # Make the internal name shorter. img_height, img_width, img_channels = image_size[0], image_size[1], image_size[2] ############################################################################ # Get a few exceptions out of the way. ############################################################################ if aspect_ratios_global is None and aspect_ratios_per_layer is None: raise ValueError("`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified.") if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError("It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}.".format(n_predictor_layers, len(aspect_ratios_per_layer))) if (min_scale is None or max_scale is None) and scales is None: raise ValueError("Either `min_scale` and `max_scale` or `scales` need to be specified.") if scales: if len(scales) != n_predictor_layers+1: raise ValueError("It must be either scales is None or len(scales) == {}, but len(scales) == {}.".format(n_predictor_layers+1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers+1) if len(variances) != 4: # We need one variance value for each of the four box coordinates raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances))) variances = np.array(variances) if np.any(variances <= 0): raise ValueError("All variances must be >0, but the variances given are {}".format(variances)) if (not (steps is None)) and (len(steps) != n_predictor_layers): raise ValueError("You must provide at least one step value per predictor layer.") if (not (offsets is None)) and (len(offsets) != n_predictor_layers): raise ValueError("You must provide at least one offset value per predictor layer.") ############################################################################ # Compute the anchor box parameters. ############################################################################ # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. if aspect_ratios_per_layer: aspect_ratios = aspect_ratios_per_layer else: aspect_ratios = [aspect_ratios_global] * n_predictor_layers # Compute the number of boxes to be predicted per cell for each predictor layer. # We need this so that we know how many channels the predictor layers need to have. if aspect_ratios_per_layer: n_boxes = [] for ar in aspect_ratios_per_layer: if (1 in ar) & two_boxes_for_ar1: n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1 else: n_boxes.append(len(ar)) else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer if (1 in aspect_ratios_global) & two_boxes_for_ar1: n_boxes = len(aspect_ratios_global) + 1 else: n_boxes = len(aspect_ratios_global) n_boxes = [n_boxes] * n_predictor_layers if steps is None: steps = [None] * n_predictor_layers if offsets is None: offsets = [None] * n_predictor_layers ############################################################################ # Build the network. ############################################################################ x = Input(shape=(img_height, img_width, img_channels)) # The following identity layer is only needed so that the subsequent lambda layers can be optional. x1 = Lambda(lambda z: z, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x) if not (subtract_mean is None): x1 = Lambda(lambda z: z - np.array(subtract_mean), output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(lambda z: z / np.array(divide_by_stddev), output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) if swap_channels and (img_channels == 3): x1 = Lambda(lambda z: z[...,::-1], output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1) conv1 = Conv2D(32, (5, 5), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1')(x1) conv1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')(conv1) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3 conv1 = ELU(name='elu1')(conv1) pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1) conv2 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2')(pool1) conv2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(conv2) conv2 = ELU(name='elu2')(conv2) pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2) conv3 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3')(pool2) conv3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(conv3) conv3 = ELU(name='elu3')(conv3) pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3) conv4 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4')(pool3) conv4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4) conv4 = ELU(name='elu4')(conv4) pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(conv4) conv5 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5')(pool4) conv5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5) conv5 = ELU(name='elu5')(conv5) pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(conv5) conv6 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6')(pool5) conv6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6) conv6 = ELU(name='elu6')(conv6) pool6 = MaxPooling2D(pool_size=(2, 2), name='pool6')(conv6) conv7 = Conv2D(32, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7')(pool6) conv7 = BatchNormalization(axis=3, momentum=0.99, name='bn7')(conv7) conv7 = ELU(name='elu7')(conv7) # The next part is to add the convolutional predictor layers on top of the base network # that we defined above. Note that I use the term "base network" differently than the paper does. # To me, the base network is everything that is not convolutional predictor layers or anchor # box layers. In this case we'll have four predictor layers, but of course you could # easily rewrite this into an arbitrarily deep base network and add an arbitrary number of # predictor layers on top of the base network by simply following the pattern shown here. # Build the convolutional predictor layers on top of conv layers 4, 5, 6, and 7. # We build two predictor layers on top of each of these layers: One for class prediction (classification), one for box coordinate prediction (localization) # We precidt `n_classes` confidence values for each box, hence the `classes` predictors have depth `n_boxes * n_classes` # We predict 4 box coordinates for each box, hence the `boxes` predictors have depth `n_boxes * 4` # Output shape of `classes`: `(batch, height, width, n_boxes * n_classes)` classes4 = Conv2D(n_boxes[0] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes4')(conv4) classes5 = Conv2D(n_boxes[1] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes5')(conv5) classes6 = Conv2D(n_boxes[2] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes6')(conv6) classes7 = Conv2D(n_boxes[3] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes7')(conv7) # Output shape of `boxes`: `(batch, height, width, n_boxes * 4)` boxes4 = Conv2D(n_boxes[0] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes4')(conv4) boxes5 = Conv2D(n_boxes[1] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes5')(conv5) boxes6 = Conv2D(n_boxes[2] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes6')(conv6) boxes7 = Conv2D(n_boxes[3] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes7')(conv7) # Generate the anchor boxes # Output shape of `anchors`: `(batch, height, width, n_boxes, 8)` anchors4 = AnchorBoxes(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors4')(boxes4) anchors5 = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors5')(boxes5) anchors6 = AnchorBoxes(img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[2], this_offsets=offsets[2], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors6')(boxes6) anchors7 = AnchorBoxes(img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[3], this_offsets=offsets[3], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors7')(boxes7) # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)` # We want the classes isolated in the last axis to perform softmax on them classes4_reshaped = Reshape((-1, n_classes), name='classes4_reshape')(classes4) classes5_reshaped = Reshape((-1, n_classes), name='classes5_reshape')(classes5) classes6_reshaped = Reshape((-1, n_classes), name='classes6_reshape')(classes6) classes7_reshaped = Reshape((-1, n_classes), name='classes7_reshape')(classes7) # Reshape the box coordinate predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)` # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss boxes4_reshaped = Reshape((-1, 4), name='boxes4_reshape')(boxes4) boxes5_reshaped = Reshape((-1, 4), name='boxes5_reshape')(boxes5) boxes6_reshaped = Reshape((-1, 4), name='boxes6_reshape')(boxes6) boxes7_reshaped = Reshape((-1, 4), name='boxes7_reshape')(boxes7) # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` anchors4_reshaped = Reshape((-1, 8), name='anchors4_reshape')(anchors4) anchors5_reshaped = Reshape((-1, 8), name='anchors5_reshape')(anchors5) anchors6_reshaped = Reshape((-1, 8), name='anchors6_reshape')(anchors6) anchors7_reshaped = Reshape((-1, 8), name='anchors7_reshape')(anchors7) # Concatenate the predictions from the different layers and the assosciated anchor box tensors # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, # so we want to concatenate along axis 1 # Output shape of `classes_concat`: (batch, n_boxes_total, n_classes) classes_concat = Concatenate(axis=1, name='classes_concat')([classes4_reshaped, classes5_reshaped, classes6_reshaped, classes7_reshaped]) # Output shape of `boxes_concat`: (batch, n_boxes_total, 4) boxes_concat = Concatenate(axis=1, name='boxes_concat')([boxes4_reshaped, boxes5_reshaped, boxes6_reshaped, boxes7_reshaped]) # Output shape of `anchors_concat`: (batch, n_boxes_total, 8) anchors_concat = Concatenate(axis=1, name='anchors_concat')([anchors4_reshaped, anchors5_reshaped, anchors6_reshaped, anchors7_reshaped]) # The box coordinate predictions will go into the loss function just the way they are, # but for the class predictions, we'll apply a softmax activation layer first classes_softmax = Activation('softmax', name='classes_softmax')(classes_concat) # Concatenate the class and box coordinate predictions and the anchors to one large predictions tensor # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) predictions = Concatenate(axis=2, name='predictions')([classes_softmax, boxes_concat, anchors_concat]) if mode == 'training': model = Model(inputs=x, outputs=predictions) elif mode == 'inference': decoded_predictions = DecodeDetections(confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) elif mode == 'inference_fast': decoded_predictions = DecodeDetections2(confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) else: raise ValueError("`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'.".format(mode)) if return_predictor_sizes: # Get the spatial dimensions (height, width) of the convolutional predictor layers, we need them to generate the default boxes # The spatial dimensions are the same for the `classes` and `boxes` predictors predictor_sizes = np.array([classes4._keras_shape[1:3], classes5._keras_shape[1:3], classes6._keras_shape[1:3], classes7._keras_shape[1:3]]) return model, predictor_sizes else: return model
alpha = K.softmax(e) # (batch_size, input_length) # eqn 5 c = K.batch_dot(h, alpha, axes=1) # (batch_size, encoding_dim) recurrence_result = K.expand_dims( K.concatenate([c, y_i], axis=1), dim=1) # (batch_size, 1, 2 * encoding_dim) expanded_h = Input(shape=(1, 2 * encoding_dim), name='expanded_h') gru = Sequential([ GRU(output_dim, return_sequences=False, input_shape=(1, 2 * encoding_dim)) ]) model = Model(input=[expanded_h], output=[gru(expanded_h)]) # (batch_size, 1, output_dim) return model(recurrence_result) output, _ = theano.scan(recurrence, sequences=K.permute_dimensions(y, [1, 0, 2]), non_sequences=h) layer = Lambda(lambda encoded_state: output, output_shape=(batch_size, output_dim)) layer.build((input_length, encoding_dim)) print(K.eval(layer(h)))
def train_test(): # generate_img() imgs, labels, labels_encode = load_img() # labels_input = Input([None], dtype='int32') img_w = 156 img_h = 64 conv_filters = 16 kernel_size = (3, 3) input_shape = (img_w, img_h, 1) pool_size = 2 time_dense_size = 32 rnn_size = 512 act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirectional GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(len(chars) + 1, kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) base_model = Model(inputs=input_data, outputs=y_pred) labels = Input(name='the_labels', shape=[4], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) fit_model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss fit_model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) # adadelta = Adadelta(lr=0.05) # fit_model.compile( # loss=lambda y_true, y_pred: y_pred, # optimizer=adadelta) # fit_model.summary() # import sys # sys.exit() fit_model.fit_generator( generate_data(imgs, labels_encode, 32), epochs=10, steps_per_epoch=100, verbose=1) fit_model.save('fit_model.h5') base_model.save('model.h5')