def basic_cnn(nb_words, EMBEDDING_DIM, \ embedding_matrix, MAX_SEQUENCE_LENGTH, \ num_rnn, num_dense, rate_drop_rnn, \ rate_drop_dense, act): ''' This is the basic cnn model model: input layer; embedding layer; several cnn layer; dense layer; output layer ''' embedding_layer = Embedding(nb_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=True) conv1 = Conv1D(filters=128, kernel_size=1, padding='same', activation='relu') conv2 = Conv1D(filters=128, kernel_size=2, padding='same', activation='relu') conv3 = Conv1D(filters=128, kernel_size=3, padding='same', activation='relu') conv4 = Conv1D(filters=128, kernel_size=4, padding='same', activation='relu') conv5 = Conv1D(filters=32, kernel_size=5, padding='same', activation='relu') conv6 = Conv1D(filters=32, kernel_size=6, padding='same', activation='relu') sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_1 = embedding_layer(sequence_1_input) sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_2 = embedding_layer(sequence_2_input) conv1a = conv1(embedded_sequences_1) glob1a = GlobalAveragePooling1D()(conv1a) conv1b = conv1(embedded_sequences_2) glob1b = GlobalAveragePooling1D()(conv1b) conv2a = conv2(embedded_sequences_1) glob2a = GlobalAveragePooling1D()(conv2a) conv2b = conv2(embedded_sequences_2) glob2b = GlobalAveragePooling1D()(conv2b) conv3a = conv3(embedded_sequences_1) glob3a = GlobalAveragePooling1D()(conv3a) conv3b = conv3(embedded_sequences_2) glob3b = GlobalAveragePooling1D()(conv3b) conv4a = conv4(embedded_sequences_1) glob4a = GlobalAveragePooling1D()(conv4a) conv4b = conv4(embedded_sequences_2) glob4b = GlobalAveragePooling1D()(conv4b) conv5a = conv5(embedded_sequences_1) glob5a = GlobalAveragePooling1D()(conv5a) conv5b = conv5(embedded_sequences_2) glob5b = GlobalAveragePooling1D()(conv5b) conv6a = conv6(embedded_sequences_1) glob6a = GlobalAveragePooling1D()(conv6a) conv6b = conv6(embedded_sequences_2) glob6b = GlobalAveragePooling1D()(conv6b) mergea = concatenate([glob1a, glob2a, glob3a, glob4a, glob5a, glob6a]) mergeb = concatenate([glob1b, glob2b, glob3b, glob4b, glob5b, glob6b]) # We take the explicit absolute difference between the two sentences # Furthermore we take the multiply different entries to get a different measure of equalness diff = Lambda(lambda x: K.abs(x[0] - x[1]), output_shape=(4 * 128 + 2 * 32, ))([mergea, mergeb]) mul = Lambda(lambda x: x[0] * x[1], output_shape=(4 * 128 + 2 * 32, ))([mergea, mergeb]) merge = concatenate([diff, mul]) # The MLP that determines the outcome x = Dropout(0.2)(merge) x = BatchNormalization()(x) x = Dense(300, activation='relu')(x) x = Dropout(0.2)(x) x = BatchNormalization()(x) preds = Dense(3, activation='softmax')(x) ######################################## ## train the model ######################################## model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['acc']) model.summary() # print(STAMP) return model
def train_test(): # generate_img() imgs, labels, labels_encode = load_img() # labels_input = Input([None], dtype='int32') img_w = 156 img_h = 64 conv_filters = 16 kernel_size = (3, 3) input_shape = (img_w, img_h, 1) pool_size = 2 time_dense_size = 32 rnn_size = 512 act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirectional GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(len(chars) + 1, kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) base_model = Model(inputs=input_data, outputs=y_pred) labels = Input(name='the_labels', shape=[4], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) fit_model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss fit_model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) # adadelta = Adadelta(lr=0.05) # fit_model.compile( # loss=lambda y_true, y_pred: y_pred, # optimizer=adadelta) # fit_model.summary() # import sys # sys.exit() fit_model.fit_generator( generate_data(imgs, labels_encode, 32), epochs=10, steps_per_epoch=100, verbose=1) fit_model.save('fit_model.h5') base_model.save('model.h5')
def build_nn_model( self, element_dim=103, conv_window=3, conv_filters=64, rnn_dim=64, recipe_latent_dim=8, intermediate_dim=64, latent_dim=8, max_material_length=10, charset_size=50, ): self.latent_dim = latent_dim self.recipe_latent_dim = recipe_latent_dim self.original_dim = max_material_length * charset_size x_mat = Input(shape=(max_material_length, charset_size), name="material_in") conv_x1 = Conv1D(conv_filters, conv_window, padding="valid", activation="relu", name='conv_enc_1')(x_mat) conv_x2 = Conv1D(conv_filters, conv_window, padding="valid", activation="relu", name='conv_enc_2')(conv_x1) conv_x3 = Conv1D(conv_filters, conv_window, padding="valid", activation="relu", name='conv_enc_3')(conv_x2) h_flatten = Flatten()(conv_x3) h = Dense(intermediate_dim, activation="relu", name="hidden_enc")(h_flatten) z_mean_func = Dense(latent_dim, name="means_enc") z_log_var_func = Dense(latent_dim, name="vars_enc") z_mean = z_mean_func(h) z_log_var = z_log_var_func(h) def sample(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(latent_dim, ), mean=0.0, stddev=1.0) return z_mean + K.exp(z_log_var / 2) * epsilon z = Lambda(sample, name="lambda_sample")([z_mean, z_log_var]) c_element = Input(shape=(element_dim, ), name="cond_element_in") c_latent_recipe = Input(shape=(recipe_latent_dim, ), name="cond_latent_recipe_in") z_conditional = Concatenate(name="concat_cond")( [z, c_latent_recipe, c_element]) decoder_h = Dense(intermediate_dim, activation="relu", name="hidden_dec") decoder_h_repeat = RepeatVector(max_material_length, name="h_rep_dec") decoder_h_gru_1 = GRU(rnn_dim, return_sequences=True, name="recurrent_dec_1") decoder_h_gru_2 = GRU(rnn_dim, return_sequences=True, name="recurrent_dec_2") decoder_h_gru_3 = GRU(rnn_dim, return_sequences=True, name="recurrent_dec_3") decoder_mat = TimeDistributed(Dense(charset_size, activation='softmax'), name="means_material_dec") h_decoded = decoder_h(z_conditional) h_decode_repeat = decoder_h_repeat(h_decoded) gru_h_decode_1 = decoder_h_gru_1(h_decode_repeat) gru_h_decode_2 = decoder_h_gru_2(gru_h_decode_1) gru_h_decode_3 = decoder_h_gru_3(gru_h_decode_2) x_decoded_mat = decoder_mat(gru_h_decode_3) def vae_xent_loss(x, x_decoded_mean): x = K.flatten(x) x_decoded_mean = K.flatten(x_decoded_mean) rec_loss = self.original_dim * metrics.binary_crossentropy( x, x_decoded_mean) kl_loss = -0.5 * K.mean( 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return rec_loss + kl_loss encoder = Model(inputs=[x_mat], outputs=[z_mean]) decoder_x_input = Input(shape=(latent_dim, )) decoder_inputs = Concatenate(name="concat_cond_dec")( [decoder_x_input, c_latent_recipe, c_element]) _h_decoded = decoder_h(decoder_inputs) _h_decode_repeat = decoder_h_repeat(_h_decoded) _gru_h_decode_1 = decoder_h_gru_1(_h_decode_repeat) _gru_h_decode_2 = decoder_h_gru_2(_gru_h_decode_1) _gru_h_decode_3 = decoder_h_gru_3(_gru_h_decode_2) _x_decoded_mat = decoder_mat(_gru_h_decode_3) decoder = Model(inputs=[decoder_x_input, c_latent_recipe, c_element], outputs=[_x_decoded_mat]) vae = Model(inputs=[x_mat, c_latent_recipe, c_element], outputs=[x_decoded_mat]) vae.compile(optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True), loss=vae_xent_loss, metrics=['categorical_accuracy']) self.vae = vae self.encoder = encoder self.decoder = decoder
def gamba_unet(): from keras import regularizers from keras.activations import softmax from keras.layers import Input, Conv2D, Conv2DTranspose, BatchNormalization, Concatenate, Lambda, Activation, Reshape, Add from keras.models import Model inputs = Input(shape=(432, 432, 2)) weight_matrix = Lambda(lambda z: z[:, :, :, 1])(inputs) weight_matrix = Reshape((432, 432, 1))(weight_matrix) reshape = Lambda(lambda z: z[:, :, :, 0])(inputs) reshape = Reshape((432, 432, 1))(reshape) reg = 0.01 #reshape=Dropout(0.2)(reshape) ## Hyperparameter optimization only on visible layer Level1_l = Conv2D(filters=32, kernel_size=(1, 1), strides=1, kernel_regularizer=regularizers.l2(reg))(reshape) Level1_l = BatchNormalization(axis=-1)(Level1_l) Level1_l_shortcut = Level1_l #Level1_l# Level1_l = Activation('relu')(Level1_l) Level1_l = Conv2D( filters=32, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))( Level1_l ) #(Level1_l)# ## kernel_initializer='glorot_uniform' is the default Level1_l = BatchNormalization(axis=-1)(Level1_l) #Level1_l=InstanceNormalization(axis=-1)(Level1_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level1_l = Activation('relu')(Level1_l) #Level1_l=Dropout(0.5)(Level1_l) Level1_l = Conv2D(filters=32, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level1_l) Level1_l = BatchNormalization(axis=-1)(Level1_l) #Level1_l=InstanceNormalization(axis=-1)(Level1_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level1_l = Add()([Level1_l, Level1_l_shortcut]) Level1_l = Activation('relu')(Level1_l) Level2_l = Conv2D(filters=64, kernel_size=(2, 2), strides=2, kernel_regularizer=regularizers.l2(reg))(Level1_l) Level2_l = BatchNormalization(axis=-1)(Level2_l) Level2_l_shortcut = Level2_l Level2_l = Activation('relu')(Level2_l) Level2_l = Conv2D(filters=64, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level2_l) Level2_l = BatchNormalization(axis=-1)(Level2_l) #Level2_l=InstanceNormalization(axis=-1)(Level2_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level2_l = Activation('relu')(Level2_l) #Level2_l=Dropout(0.5)(Level2_l) Level2_l = Conv2D(filters=64, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level2_l) Level2_l = BatchNormalization(axis=-1)(Level2_l) #Level2_l=InstanceNormalization(axis=-1)(Level2_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level2_l = Add()([Level2_l, Level2_l_shortcut]) Level2_l = Activation('relu')(Level2_l) Level3_l = Conv2D(filters=128, kernel_size=(2, 2), strides=2, kernel_regularizer=regularizers.l2(reg))(Level2_l) Level3_l = BatchNormalization(axis=-1)(Level3_l) Level3_l_shortcut = Level3_l Level3_l = Activation('relu')(Level3_l) Level3_l = Conv2D(filters=128, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level3_l) Level3_l = BatchNormalization(axis=-1)(Level3_l) #Level3_l=InstanceNormalization(axis=-1)(Level3_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level3_l = Activation('relu')(Level3_l) #Level3_l=Dropout(0.5)(Level3_l) Level3_l = Conv2D(filters=128, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level3_l) Level3_l = BatchNormalization(axis=-1)(Level3_l) #Level3_l=InstanceNormalization(axis=-1)(Level3_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level3_l = Add()([Level3_l, Level3_l_shortcut]) Level3_l = Activation('relu')(Level3_l) Level4_l = Conv2D(filters=256, kernel_size=(2, 2), strides=2, kernel_regularizer=regularizers.l2(reg))(Level3_l) Level4_l = BatchNormalization(axis=-1)(Level4_l) Level4_l_shortcut = Level4_l Level4_l = Activation('relu')(Level4_l) Level4_l = Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level4_l) Level4_l = BatchNormalization(axis=-1)(Level4_l) #Level4_l=InstanceNormalization(axis=-1)(Level4_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level4_l = Activation('relu')(Level4_l) #Level4_l=Dropout(0.5)(Level4_l) Level4_l = Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level4_l) Level4_l = BatchNormalization(axis=-1)(Level4_l) #Level4_l=InstanceNormalization(axis=-1)(Level4_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level4_l = Add()([Level4_l, Level4_l_shortcut]) Level4_l = Activation('relu')(Level4_l) Level5_l = Conv2D(filters=512, kernel_size=(2, 2), strides=2, kernel_regularizer=regularizers.l2(reg))(Level4_l) Level5_l = BatchNormalization(axis=-1)(Level5_l) Level5_l_shortcut = Level5_l Level5_l = Activation('relu')(Level5_l) Level5_l = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level5_l) Level5_l = BatchNormalization(axis=-1)(Level5_l) #Level5_l=InstanceNormalization(axis=-1)(Level5_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level5_l = Activation('relu')(Level5_l) #Level5_l=Dropout(0.5)(Level5_l) Level5_l = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level5_l) Level5_l = BatchNormalization(axis=-1)(Level5_l) #Level5_l=InstanceNormalization(axis=-1)(Level5_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level5_l = Add()([Level5_l, Level5_l_shortcut]) Level5_l = Activation('relu')(Level5_l) Level6_l = Conv2D(filters=1024, kernel_size=(3, 3), strides=3, kernel_regularizer=regularizers.l2(reg))(Level5_l) Level6_l = BatchNormalization(axis=-1)(Level6_l) Level6_l_shortcut = Level6_l Level6_l = Activation('relu')(Level6_l) Level6_l = Conv2D(filters=1024, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level6_l) Level6_l = BatchNormalization(axis=-1)(Level6_l) #Level5_l=InstanceNormalization(axis=-1)(Level5_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level6_l = Activation('relu')(Level6_l) #Level5_l=Dropout(0.5)(Level5_l) Level6_l = Conv2D(filters=1024, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level6_l) Level6_l = BatchNormalization(axis=-1)(Level6_l) #Level5_l=InstanceNormalization(axis=-1)(Level5_l) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level6_l = Add()([Level6_l, Level6_l_shortcut]) Level6_l = Activation('relu')(Level6_l) Level5_r = Conv2DTranspose( filters=512, kernel_size=(3, 3), strides=3, kernel_regularizer=regularizers.l2(reg))(Level6_l) Level5_r = BatchNormalization(axis=-1)(Level5_r) Level5_r_shortcut = Level5_r #Level4_r=InstanceNormalization(axis=-1)(Level4_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level5_r = Activation('relu')(Level5_r) merge5 = Concatenate(axis=-1)([Level5_l, Level5_r]) Level5_r = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(merge5) Level5_r = BatchNormalization(axis=-1)(Level5_r) #Level4_r=InstanceNormalization(axis=-1)(Level4_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level5_r = Activation('relu')(Level5_r) #Level4_r=Dropout(0.5)(Level4_r) Level5_r = Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level5_r) Level5_r = BatchNormalization(axis=-1)(Level5_r) #Level4_r=InstanceNormalization(axis=-1)(Level4_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level5_r = Add()([Level5_r, Level5_r_shortcut]) Level5_r = Activation('relu')(Level5_r) Level4_r = Conv2DTranspose( filters=256, kernel_size=(2, 2), strides=2, kernel_regularizer=regularizers.l2(reg))(Level5_r) Level4_r = BatchNormalization(axis=-1)(Level4_r) Level4_r_shortcut = Level4_r #Level4_r=InstanceNormalization(axis=-1)(Level4_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level4_r = Activation('relu')(Level4_r) merge4 = Concatenate(axis=-1)([Level4_l, Level4_r]) Level4_r = Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(merge4) Level4_r = BatchNormalization(axis=-1)(Level4_r) #Level4_r=InstanceNormalization(axis=-1)(Level4_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level4_r = Activation('relu')(Level4_r) #Level4_r=Dropout(0.5)(Level4_r) Level4_r = Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level4_r) Level4_r = BatchNormalization(axis=-1)(Level4_r) #Level4_r=InstanceNormalization(axis=-1)(Level4_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level4_r = Add()([Level4_r, Level4_r_shortcut]) Level4_r = Activation('relu')(Level4_r) Level3_r = Conv2DTranspose( filters=128, kernel_size=(2, 2), strides=2, kernel_regularizer=regularizers.l2(reg))(Level4_r) Level3_r = BatchNormalization(axis=-1)(Level3_r) Level3_r_shortcut = Level3_r #Level3_r=InstanceNormalization(axis=-1)(Level3_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level3_r = Activation('relu')(Level3_r) merge3 = Concatenate(axis=-1)([Level3_l, Level3_r]) Level3_r = Conv2D(filters=128, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(merge3) Level3_r = BatchNormalization(axis=-1)(Level3_r) #Level3_r=InstanceNormalization(axis=-1)(Level3_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level3_r = Activation('relu')(Level3_r) #Level3_r=Dropout(0.5)(Level3_r) Level3_r = Conv2D(filters=128, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level3_r) Level3_r = BatchNormalization(axis=-1)(Level3_r) #Level3_r=InstanceNormalization(axis=-1)(Level3_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level3_r = Add()([Level3_r, Level3_r_shortcut]) Level3_r = Activation('relu')(Level3_r) Level2_r = Conv2DTranspose( filters=64, kernel_size=(2, 2), strides=2, kernel_regularizer=regularizers.l2(reg))(Level3_r) Level2_r = BatchNormalization(axis=-1)(Level2_r) Level2_r_shortcut = Level2_r #Level2_r=InstanceNormalization(axis=-1)(Level2_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level2_r = Activation('relu')(Level2_r) merge2 = Concatenate(axis=-1)([Level2_l, Level2_r]) Level2_r = Conv2D(filters=64, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(merge2) Level2_r = BatchNormalization(axis=-1)(Level2_r) #Level2_r=InstanceNormalization(axis=-1)(Level2_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level2_r = Activation('relu')(Level2_r) #Level2_r=Dropout(0.5)(Level2_r) Level2_r = Conv2D(filters=64, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level2_r) Level2_r = BatchNormalization(axis=-1)(Level2_r) #Level2_r=InstanceNormalization(axis=-1)(Level2_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level2_r = Add()([Level2_r, Level2_r_shortcut]) Level2_r = Activation('relu')(Level2_r) Level1_r = Conv2DTranspose( filters=32, kernel_size=(2, 2), strides=2, kernel_regularizer=regularizers.l2(reg))(Level2_r) Level1_r = BatchNormalization(axis=-1)(Level1_r) Level1_r_shortcut = Level1_r #Level1_r=InstanceNormalization(axis=-1)(Level1_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level1_r = Activation('relu')(Level1_r) merge1 = Concatenate(axis=-1)([Level1_l, Level1_r]) Level1_r = Conv2D(filters=32, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(merge1) Level1_r = BatchNormalization(axis=-1)(Level1_r) #Level1_r=InstanceNormalization(axis=-1)(Level1_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level1_r = Activation('relu')(Level1_r) #Level1_r=Dropout(0.5)(Level1_r) Level1_r = Conv2D(filters=32, kernel_size=(3, 3), strides=1, padding='same', kernel_regularizer=regularizers.l2(reg))(Level1_r) Level1_r = BatchNormalization()(Level1_r) #Level1_r=InstanceNormalization(axis=-1)(Level1_r) ## Instance Normalization. Use InstanceNormalization() for Layer Normalization. Level1_r = Add()([Level1_r, Level1_r_shortcut]) Level1_r = Activation('relu')(Level1_r) output = Conv2D(filters=7, kernel_size=(1, 1), strides=1, kernel_regularizer=regularizers.l2(reg))(Level1_r) #output=BatchNormalization(axis=-1)(output) output = Lambda(lambda x: softmax(x, axis=-1))(output) output = Concatenate(axis=-1)([output, weight_matrix]) model = Model(inputs=inputs, outputs=output) return model
def create_model(anchors, class_names, feature_extractor='darknet19', load_pretrained=False, pretrained_path=None, freeze_body=False): ''' returns the body of the model and the model # Params: load_pretrained: whether or not to load the pretrained model or initialize all weights freeze_body: whether or not to freeze all weights except for the last layer's # Returns: model_body: YOLOv2 with new output layer model: YOLOv2 with custom loss Lambda layer ''' num_anchors = len(anchors) detectors_mask_shape = (FEAT_H, FEAT_W, num_anchors, 1) matching_boxes_shape = (FEAT_H, FEAT_W, num_anchors, 5) # Create model input layers. image_input = Input(shape=(IMAGE_H, IMAGE_W, 3)) boxes_input = Input(shape=(None, 5)) detectors_mask_input = Input(shape=detectors_mask_shape) matching_boxes_input = Input(shape=matching_boxes_shape) # Create model body. if feature_extractor == 'darknet19': yolo_model = yolo_body_darknet( image_input, len(anchors), len(class_names), network_config=[SHALLOW_DETECTOR, USE_X0_FEATURE]) elif feature_extractor == 'mobilenet': yolo_model = yolo_body_mobilenet( image_input, len(anchors), len(class_names), network_config=[SHALLOW_DETECTOR, USE_X0_FEATURE]) else: assert (False) # yolo_model.summary() if load_pretrained: if pretrained_path: yolo_model.load_weights(pretrained_path) else: print('No pretrained weights!') if freeze_body: for layer in yolo_model.layers: layer.trainable = False model_body = Model(image_input, yolo_model.output) # model_body.summary() # Place model loss on CPU to reduce GPU memory usage. with tf.device('/cpu:0'): # TODO: Replace Lambda with custom Keras layer for loss. model_loss = Lambda(yolo_loss, output_shape=(1, ), name='yolo_loss', arguments={ 'anchors': anchors, 'num_classes': len(class_names) })([ model_body.output, boxes_input, detectors_mask_input, matching_boxes_input ]) model = Model([ model_body.input, boxes_input, detectors_mask_input, matching_boxes_input ], model_loss) return model_body, model
from keras.models import Sequential, Model from keras.layers import Cropping2D, Lambda from keras.layers import Dense, Flatten, Dropout, MaxPooling2D from keras.layers.convolutional import Conv2D from keras.callbacks import ModelCheckpoint # Adding teh original Nvidia model to train on the sample data for completing track 1 model = Sequential() # trim image to only see section with road # Did not help much #model.add(Cropping2D(cropping=((50,20), (0,0)))) # preprocessing model.add(Lambda(lambda x: x / 127.5 - 1.0, input_shape=(160, 320, 3))) model.add(Conv2D(24, 5, 5, activation='elu', subsample=(2, 2))) model.add(Conv2D(36, 5, 5, activation='elu', subsample=(2, 2))) model.add(Conv2D(48, 5, 5, activation='elu', subsample=(2, 2))) model.add(Conv2D(64, 3, 3, activation='elu')) model.add(Conv2D(64, 3, 3, activation='elu')) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(100, activation='elu')) model.add(Dense(50, activation='elu')) model.add(Dense(10, activation='elu')) model.add(Dense(1)) # Adding checkpoint
# compile and train the model using the generator function train_generator = generator(train_samples_x, train_samples_y, batch_size=128) validation_generator = generator(validation_samples_x, validation_samples_y, batch_size=128) # Definition of Hyper Parameters epochs = 4 ch, row, col = 3, 65, 320 # Trimmed image format drop_rate = 0.2 # Layout of model model = Sequential() model.add(Lambda(lambda x: (x / 255.0) - 0.5, input_shape=(160, 320, 3))) model.add(Cropping2D(cropping=((70, 25), (0, 0)))) model.add(Convolution2D(24, 5, 5, subsample=(2, 2), activation='relu')) model.add(Convolution2D(36, 5, 5, subsample=(2, 2), activation='relu')) model.add(Convolution2D(48, 5, 5, subsample=(2, 2), activation='relu')) model.add(Convolution2D(64, 3, 3, activation='relu')) model.add(Convolution2D(64, 3, 3, activation='relu')) model.add(Flatten()) model.add(Dense(100)) model.add(Dropout(drop_rate)) model.add(Dense(50)) model.add(Dropout(drop_rate)) model.add(Dense(10)) model.add(Dropout(drop_rate)) model.add(Dense(1)) model.compile(loss='mse', optimizer='adam')
def myCrossLayer(nb_flow=2, map_height=16, map_width=8, nb_layers=3, window_len=12, nb_filter=64, external_dim=None, filter_size=3): """ the final model :param nb_flow: number of measurements, also number of channels of each picture sample :param map_height: grid map height, here is 16 :param map_width: grid map width, here is 8 :param nb_layers: number of cnn layers :return: """ window_len_pic_fea = [] main_inputs = [] if external_dim == None: for i in range(window_len): inputs = Input(shape=(nb_flow, map_height, map_width)) main_inputs.append(inputs) cnn_fea = dense_conv3D(nb_filter=nb_filter, nb_col=filter_size, nb_row=filter_size, padding='same', nb_layers=nb_layers, dense_units=1024, dropout_rate=0.5)(inputs) # cnn_fea_flatten = Reshape(([nb_layers * 1024]))(cnn_fea) cnn_fea_flatten = Reshape(([1024]))(cnn_fea) # cnn_fea_flatten = Dropout(rate=0.3)(cnn_fea_flatten) # cnn_fea_flatten = expand_dims(cnn_fea_flatten, axis=1) cnn_fea_flatten = Lambda(expand_dim_backend)(cnn_fea_flatten) window_len_pic_fea.append(cnn_fea_flatten) # add external feature here if external_dim != None and external_dim > 0: for i in range(window_len): # todo : use two tensor to represent the data and meta_data respectively inputs = Input(shape=((nb_flow, map_height, map_width), external_dim)) main_inputs.append(inputs) inputs_0 = inputs inputs_1 = inputs cnn_fea = dense_conv3D(nb_filter=nb_filter, nb_col=filter_size, nb_row=filter_size, padding='same', nb_layers=nb_layers, dense_units=1024, dropout_rate=0.5)(inputs_0) # cnn_fea_flatten = Reshape(([nb_layers * 1024]))(cnn_fea) cnn_fea_flatten = Reshape(([1024]))(cnn_fea) # cnn_fea_flatten = Dropout(rate=0.3)(cnn_fea_flatten) # cnn_fea_flatten = expand_dims(cnn_fea_flatten, axis=1) cnn_fea_flatten = Lambda(expand_dim_backend)(cnn_fea_flatten) window_len_pic_fea.append(cnn_fea_flatten) external_input = inputs_1 # external_input = Input(shape=(external_dim,)) main_inputs.append(external_input) # todo: change the code here embedding = Dense(nb_layers * 1024, activation='relu')(external_input) external_out = Lambda(expand_dim_backend)(embedding) new_concatenate_fea = [] for pic_fea in window_len_pic_fea: tmp_con = Concatenate(axis=-1)([pic_fea, external_out]) new_concatenate_fea.append(tmp_con) window_len_pic_fea = new_concatenate_fea outputs = add_densenet(nb_flow=nb_flow, map_height=map_height, map_width=map_width)(window_len_pic_fea) # outputs = add_lstm(nb_flow=nb_flow, map_height=map_height, map_width=map_width)(window_len_pic_fea) # outputs = attention_after_LSTM(nb_flow=nb_flow, map_height=map_height, # map_width=map_width, window_len=window_len)(window_len_pic_fea) model = Model(inputs=main_inputs, outputs=outputs) return model
x = Input(batch_shape=(batch_size, original_dim)) h = Dense(intermediate_dim, activation='relu')(x) z_mean = Dense(latent_dim)(h) z_log_var = Dense(latent_dim)(h) def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0., std=epsilon_std) return z_mean + K.exp(z_log_var / 2) * epsilon z = Lambda(sampling, output_shape=(latent_dim, ))([z_mean, z_log_var]) decoder_h = Dense(intermediate_dim, activation='relu') decoder_mean = Dense(original_dim, activation='sigmoid') h_decoded = decoder_h(z) x_decoded_mean = decoder_mean(h_decoded) ##### KEY HERE x_decoded_mean2 = Reshape([28, 28, 1])(x_decoded_mean) def generator_loss(x, x_decoded_mean): xent_loss = original_dim * objectives.binary_crossentropy( x, x_decoded_mean)
def build_model(self): self.f_enc = self.build_encoder(output_dims=self.z_dims * 2) self.f_dec = self.build_decoder() self.f_dis = self.build_discriminator() self.f_cls = self.build_classifier() # Algorithm x_r = Input(shape=self.input_shape) c = Input(shape=(self.num_attrs, )) z_params = self.f_enc([x_r, c]) z_avg = Lambda(lambda x: x[:, :self.z_dims], output_shape=(self.z_dims, ))(z_params) z_log_var = Lambda(lambda x: x[:, self.z_dims:], output_shape=(self.z_dims, ))(z_params) z = Lambda(sample_normal, output_shape=(self.z_dims, ))([z_avg, z_log_var]) kl_loss = KLLossLayer()([z_avg, z_log_var]) z_p = Input(shape=(self.z_dims, )) x_f = self.f_dec([z, c]) x_p = self.f_dec([z_p, c]) y_r, f_D_x_r = self.f_dis(x_r) y_f, f_D_x_f = self.f_dis(x_f) y_p, f_D_x_p = self.f_dis(x_p) d_loss = DiscriminatorLossLayer()([y_r, y_f, y_p]) c_r, f_C_x_r = self.f_cls(x_r) c_f, f_C_x_f = self.f_cls(x_f) c_p, f_C_x_p = self.f_cls(x_p) g_loss = GeneratorLossLayer()( [x_r, x_f, f_D_x_r, f_D_x_f, f_C_x_r, f_C_x_f]) gd_loss = FeatureMatchingLayer()([f_D_x_r, f_D_x_p]) gc_loss = FeatureMatchingLayer()([f_C_x_r, f_C_x_p]) c_loss = ClassifierLossLayer()([c, c_r]) # Build classifier trainer set_trainable(self.f_enc, False) set_trainable(self.f_dec, False) set_trainable(self.f_dis, False) set_trainable(self.f_cls, True) self.cls_trainer = Model(inputs=[x_r, c], outputs=[c_loss]) self.cls_trainer = multi_gpu_model(self.cls_trainer, gpus=self.gpus) self.cls_trainer.compile(loss=[zero_loss], optimizer=Adam(lr=1.0e-4, beta_1=0.9)) self.cls_trainer.summary() # Build discriminator trainer set_trainable(self.f_enc, False) set_trainable(self.f_dec, False) set_trainable(self.f_dis, True) set_trainable(self.f_cls, False) self.dis_trainer = Model(inputs=[x_r, c, z_p], outputs=[d_loss]) self.dis_trainer = multi_gpu_model(self.dis_trainer, gpus=self.gpus) self.dis_trainer.compile( loss=[zero_loss], optimizer=Adam(lr=1.0e-4, beta_1=0.9), metrics=[discriminator_accuracy(y_r, y_f, y_p)]) self.dis_trainer.summary() # Build generator trainer set_trainable(self.f_enc, False) set_trainable(self.f_dec, True) set_trainable(self.f_dis, False) set_trainable(self.f_cls, False) self.dec_trainer = Model(inputs=[x_r, c, z_p], outputs=[g_loss, gd_loss, gc_loss]) self.dec_trainer = multi_gpu_model(self.dec_trainer, gpus=self.gpus) self.dec_trainer.compile(loss=[zero_loss, zero_loss, zero_loss], optimizer=Adam(lr=1.0e-4, beta_1=0.9), metrics=[generator_accuracy(y_p, y_f)]) # Build autoencoder set_trainable(self.f_enc, True) set_trainable(self.f_dec, False) set_trainable(self.f_dis, False) set_trainable(self.f_cls, False) self.enc_trainer = Model(inputs=[x_r, c, z_p], outputs=[g_loss, kl_loss]) self.enc_trainer = multi_gpu_model(self.enc_trainer, gpus=self.gpus) self.enc_trainer.compile(loss=[zero_loss, zero_loss], optimizer=Adam(lr=1.0e-4, beta_1=0.9)) self.enc_trainer.summary() # Store trainers self.store_to_save('cls_trainer') self.store_to_save('dis_trainer') self.store_to_save('dec_trainer') self.store_to_save('enc_trainer')
model.add(LeakyReLU(alpha=0.2)) model.add(Dense(np.prod(img_shape))) model.add(Activation('sigmoid')) model.add(Reshape(img_shape)) return model encoder = encoder_model() generator = generator_model() x = Input(shape=img_shape) z_mean, z_log_var = encoder(x) z = Lambda(sampling)([z_mean, z_log_var]) recon_x = generator(z) # instantiate VAE model vae = Model(x, recon_x) # Compute VAE loss xent_loss = K.sum(metrics.binary_crossentropy(x, recon_x), axis=1) kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) vae_loss = K.mean(xent_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='rmsprop')
x = Dense(2)(x) ip1 = PReLU(name='ip1')(x) ip2 = Dense(num_classes, activation='softmax')(ip1) model = Model(inputs=inputs, outputs=[ip2]) model.compile(loss="categorical_crossentropy", optimizer=SGD(lr=0.05), metrics=['accuracy']) if isCenterloss: lambda_c = 0.2 input_target = Input( shape=(1, )) # single value ground truth labels as inputs centers = Embedding(10, 2)(input_target) l2_loss = Lambda( lambda x: K.sum(K.square(x[0] - x[1][:, 0]), 1, keepdims=True), name='l2_loss')([ip1, centers]) model_centerloss = Model(inputs=[inputs, input_target], outputs=[ip2, l2_loss]) model_centerloss.compile( optimizer=SGD(lr=0.05), loss=["categorical_crossentropy", lambda y_true, y_pred: y_pred], loss_weights=[1, lambda_c], metrics=['accuracy']) # prepare callback histories = TYY_callbacks.Histories(isCenterloss) # fit if isCenterloss: random_y_train = np.random.rand(x_train.shape[0], 1)
print("loaded model",model.layers[0].input_shape[1]) # ml = model.layers[0].input_shape[1] # if (ml != max_length): # print("model length",ml,"different from data length",max_length) # max_length = ml else: # model = Sequential() # model.add(Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=False, input_shape=(max_length,))) # model.add(LSTM_use(hidden_size, return_sequences=True)) # model.add(LSTM_use(max_output + 1, return_sequences=False)) # model.add(Dense(max_output +1)) # model.add(Activation('softmax')) inputs = Input(shape=(None,None)) print("k",inputs.shape) x0 = Lambda(lambda x : x[:,0,:])(inputs) x1 = Lambda(lambda x : x[:,1,:])(inputs) x2 = Lambda(lambda x : x[:,2,:])(inputs) x3 = Lambda(lambda x : x[:,3,:])(inputs) # x4 = Lambda(lambda x : x[:,4,:])(inputs) # x5 = Lambda(lambda x : x[:,5,:])(inputs) # x6 = Lambda(lambda x : x[:,6,:])(inputs) # x7 = Lambda(lambda x : x[:,7,:])(inputs) # x8 = Lambda(lambda x : x[:,8,:])(inputs) # x9 = Lambda(lambda x : x[:,9,:])(inputs) embeds0 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=not args.embed_not_trainable)(x0) embeds1 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=not args.embed_not_trainable)(x1) embeds2 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=not args.embed_not_trainable)(x2) embeds3 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=not args.embed_not_trainable)(x3) # embeds4 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=True)(x4) # embeds5 = Embedding(len(vocab), len(vocab), embeddings_initializer='identity', trainable=True)(x5)
def basic_attention(nb_words, EMBEDDING_DIM, \ embedding_matrix, MAX_SEQUENCE_LENGTH, \ num_rnn, num_dense, rate_drop_rnn, \ rate_drop_dense, act): ''' This is the basic attention model model: input layer; embedding layer; rnn layer; attention layer; dense layer; output layer ''' embedding_layer = Embedding(nb_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=True) rnn_layer = Bidirectional( GRU(num_rnn, dropout=rate_drop_rnn, recurrent_dropout=rate_drop_rnn, return_sequences=True)) attention_W = TimeDistributed(Dense(350, activation='tanh')) attention_w = TimeDistributed(Dense(1)) attention_softmax = Activation('softmax') attention_sum = Lambda(lambda x: K.sum(x, axis=1)) sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_1 = embedding_layer(sequence_1_input) x1 = rnn_layer(embedded_sequences_1) attention1 = attention_W(x1) attention1 = attention_w(attention1) attention1 = attention_softmax(attention1) attention1 = Permute([2, 1])(attention1) x1 = Permute([2, 1])(x1) x1 = multiply([attention1, x1]) x1 = Permute([2, 1])(x1) x1 = attention_sum(x1) sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_2 = embedding_layer(sequence_2_input) x2 = rnn_layer(embedded_sequences_2) attention2 = attention_W(x2) attention2 = attention_w(attention2) attention2 = attention_softmax(attention2) attention2 = Permute([2, 1])(attention2) x2 = Permute([2, 1])(x2) x2 = multiply([attention2, x2]) x2 = Permute([2, 1])(x2) x2 = attention_sum(x2) merged = multiply([x1, x2]) merged = Dropout(rate_drop_dense)(merged) merged = BatchNormalization()(merged) merged = Dense(num_dense, activation=act)(merged) merged = Dropout(rate_drop_dense)(merged) merged = BatchNormalization()(merged) preds = Dense(3, activation='softmax')(merged) ######################################## ## train the model ######################################## model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['acc']) model.summary() # print(STAMP) return model
def f(x): return Lambda(batchnorm, output_shape=tuple([xx for xx in x._keras_shape if xx is not None]))(x)
batch_size] yield get_matrices(train_data_batch) def generate_validation_batch(): while True: for i in range(validation_part.shape[0] // batch_size): validation_data_batch = validation_part.iloc[i * batch_size:(i + 1) * batch_size] yield get_matrices(validation_data_batch) inp = Input(shape=(4, None, 300)) get_context_2 = Lambda(lambda batch: batch[:, 0, :, :])(inp) get_context_1 = Lambda(lambda batch: batch[:, 1, :, :])(inp) get_context_0 = Lambda(lambda batch: batch[:, 2, :, :])(inp) get_reply = Lambda(lambda batch: batch[:, 3, :, :])(inp) shared_lstm = LSTM(100) encoded_context_2 = shared_lstm(get_context_2) encoded_context_1 = shared_lstm(get_context_1) encoded_context_0 = shared_lstm(get_context_0) encoded_reply = shared_lstm(get_reply) stacked = keras.layers.concatenate( [encoded_context_2, encoded_context_1, encoded_context_0, encoded_reply]) drop0 = Dropout(0.4)(stacked)
def separable_inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): """Adds a Inception-ResNet block. This function builds 3 types of Inception-ResNet blocks mentioned in the paper, controlled by the `block_type` argument (which is the block name used in the official TF-slim implementation): - Inception-ResNet-A: `block_type='block35'` - Inception-ResNet-B: `block_type='block17'` - Inception-ResNet-C: `block_type='block8'` # Arguments x: input tensor. scale: scaling factor to scale the residuals (i.e., the output of passing `x` through an inception module) before adding them to the shortcut branch. Let `r` be the output from the residual branch, the output of this block will be `x + scale * r`. block_type: `'block35'`, `'block17'` or `'block8'`, determines the network structure in the residual branch. block_idx: an `int` used for generating layer names. The Inception-ResNet blocks are repeated many times in this network. We use `block_idx` to identify each of the repetitions. For example, the first Inception-ResNet-A block will have `block_type='block35', block_idx=0`, ane the layer names will have a common prefix `'block35_0'`. activation: activation function to use at the end of the block (see [activations](../activations.md)). When `activation=None`, no activation is applied (i.e., "linear" activation: `a(x) = x`). # Returns Output tensor for the block. # Raises ValueError: if `block_type` is not one of `'block35'`, `'block17'` or `'block8'`. """ if block_type == 'block35': branch_0 = conv2d_bn(x, 32, 1) branch_1 = conv2d_bn(x, 32, 1) branch_1 = SeparableConv2D(filters=32, kernel_size=3, padding='same')(branch_1) branch_2 = conv2d_bn(x, 32, 1) branch_2 = SeparableConv2D(filters=48, kernel_size=3, padding='same')(branch_2) branch_2 = SeparableConv2D(filters=64, kernel_size=3, padding='same')(branch_2) branches = [branch_0, branch_1, branch_2] elif block_type == 'block17': branch_0 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(x, 128, 1) branch_1 = SeparableConv2D(filters=160, kernel_size=[1, 7], padding='same')(branch_1) branch_1 = SeparableConv2D(filters=192, kernel_size=[7, 1], padding='same')(branch_1) branches = [branch_0, branch_1] elif block_type == 'block8': branch_0 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(x, 192, 1) branch_1 = SeparableConv2D(filters=224, kernel_size=[1, 3], padding='same')(branch_1) branch_1 = SeparableConv2D(filters=256, kernel_size=[3, 1], padding='same')(branch_1) branches = [branch_0, branch_1] else: raise ValueError('Unknown Inception-ResNet block type. ' 'Expects "block35", "block17" or "block8", ' 'but got: ' + str(block_type)) block_name = block_type + '_' + str(block_idx) channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 mixed = Concatenate(axis=channel_axis, name=block_name + '_mixed')(branches) up = conv2d_bn(mixed, K.int_shape(x)[channel_axis], 1, activation=None, use_bias=True, name=block_name + '_conv') x = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, output_shape=K.int_shape(x)[1:], arguments={'scale': scale}, name=block_name)([x, up]) if activation is not None: x = Activation(activation, name=block_name + '_ac')(x) return x
def build_model(self, relu_target, input_tensor, style_encoded_tensor=None, batch_size=8, feature_weight=1, pixel_weight=1, tv_weight=0, learning_rate=1e-4, lr_decay=5e-5): '''Build the EncoderDecoder architecture for a given relu layer. Args: relu_target: Layer of VGG to decode from input_tensor: If None then a placeholder will be created, else use this tensor as the input to the encoder style_encoded_tensor: Tensor for style image features at the same relu layer. Used only at test time. batch_size: Batch size for training feature_weight: Float weight for feature reconstruction loss pixel_weight: Float weight for pixel reconstruction loss tv_weight: Float weight for total variation loss learning_rate: Float LR lr_decay: Float linear decay for training Returns: EncoderDecoder namedtuple with input/encoding/output tensors and ops for training. ''' with tf.name_scope('encoder_decoder_' + relu_target): ### Build encoder for reluX_1 with tf.name_scope('content_encoder_' + relu_target): if input_tensor is None: # This is the first level encoder that takes original content imgs content_imgs = tf.placeholder_with_default( tf.constant([[[[0., 0., 0.]]]]), shape=(None, None, None, 3), name='content_imgs') else: # This is an intermediate-level encoder that takes output tensor from previous level as input content_imgs = input_tensor # Build content layer encoding model content_layer = self.vgg_model.get_layer(relu_target).output content_encoder_model = Model(inputs=self.vgg_model.input, outputs=content_layer) # Setup content layer encodings for content images content_encoded = content_encoder_model(content_imgs) ### Build style encoder & WCT if test mode if self.mode != 'train': # Apply WCT if flag is set to true. Otherwise, pass content_encoded along unchanged. with tf.name_scope('wct_' + relu_target): decoder_input = tf.cond( self.apply_wct, lambda: wct_tf( content_encoded, style_encoded_tensor, self.alpha), lambda: content_encoded) else: decoder_input = content_encoded ### Build decoder with tf.name_scope('decoder_' + relu_target): n_channels = content_encoded.get_shape()[-1].value decoder_model = self.build_decoder(input_shape=(None, None, n_channels), relu_target=relu_target) # Wrap the decoder_input tensor so that it has the proper shape for decoder_model decoder_input_wrapped = tf.placeholder_with_default( decoder_input, shape=[None, None, None, n_channels]) # Reconstruct/decode from encoding decoded = decoder_model( Lambda(lambda x: x)(decoder_input_wrapped) ) # Lambda converts TF tensor to Keras # Content layer encoding for stylized out decoded_encoded = content_encoder_model(decoded) if self.mode == 'train': # Train & summary ops only needed for training phase ### Losses with tf.name_scope('losses_' + relu_target): # Feature loss between encodings of original & reconstructed feature_loss = feature_weight * mse(decoded_encoded, content_encoded) # Pixel reconstruction loss between decoded/reconstructed img and original pixel_loss = pixel_weight * mse(decoded, content_imgs) # Total Variation loss if tv_weight > 0: tv_loss = tv_weight * tf.reduce_mean( tf.image.total_variation(decoded)) else: tv_loss = tf.constant(0.) total_loss = feature_loss + pixel_loss + tv_loss ### Training ops with tf.name_scope('train_' + relu_target): global_step = tf.Variable(0, name='global_step_train', trainable=False) # self.learning_rate = tf.train.exponential_decay(learning_rate, self.global_step, 100, 0.96, staircase=False) learning_rate = torch_decay(learning_rate, global_step, lr_decay) d_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999) # Only train decoder vars, encoder is frozen d_vars = [ var for var in tf.trainable_variables() if 'decoder_' + relu_target in var.name ] train_op = d_optimizer.minimize(total_loss, var_list=d_vars, global_step=global_step) ### Loss & image summaries with tf.name_scope('summary_' + relu_target): feature_loss_summary = tf.summary.scalar( 'feature_loss', feature_loss) pixel_loss_summary = tf.summary.scalar('pixel_loss', pixel_loss) tv_loss_summary = tf.summary.scalar('tv_loss', tv_loss) total_loss_summary = tf.summary.scalar('total_loss', total_loss) content_imgs_summary = tf.summary.image( 'content_imgs', content_imgs) decoded_images_summary = tf.summary.image( 'decoded_images', clip(decoded)) for var in d_vars: tf.summary.histogram(var.op.name, var) summary_op = tf.summary.merge_all() else: # For inference set unnneeded ops to None pixel_loss, feature_loss, tv_loss, total_loss, train_op, global_step, learning_rate, summary_op = [ None ] * 8 # Put it all together encoder_decoder = EncoderDecoder( content_input=content_imgs, content_encoder_model=content_encoder_model, content_encoded=content_encoded, style_encoded=style_encoded_tensor, decoder_input=decoder_input, decoder_model=decoder_model, decoded=decoded, decoded_encoded=decoded_encoded, pixel_loss=pixel_loss, feature_loss=feature_loss, tv_loss=tv_loss, total_loss=total_loss, train_op=train_op, global_step=global_step, learning_rate=learning_rate, summary_op=summary_op) return encoder_decoder
center_angle = float(batch_sample[3]) images.append(img) angles.append(center_angle) # trim image to only see section with road X_train = np.array(images) y_train = np.array(angles) yield sklearn.utils.shuffle(X_train, y_train) # compile and train the model using the generator function train_generator = generator(train_samples, batch_size=batch_size) validation_generator = generator(validation_samples, batch_size=batch_size) model = Sequential([ Lambda(lambda x: x / 255.0 - 0.5, input_shape=(160, 320, 3)), Cropping2D(cropping=((65, 25), (0, 0))), Conv2D(filters=24, kernel_size=(5, 5), strides=strides_1, padding=padding, activation="elu"), MaxPool2D(strides=(2, 2)), Conv2D(filters=36, kernel_size=(5, 5), strides=strides_1, padding=padding, activation="elu"), MaxPool2D(strides=(2, 2)), Conv2D(filters=48, kernel_size=(5, 5),
def Inception_Inflated3d(include_top=True, weights=None, input_tensor=None, input_shape=None, dropout_prob=0.0, endpoint_logit=True, classes=400): """Instantiates the Inflated 3D Inception v1 architecture. Optionally loads weights pre-trained on Kinetics. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Note that the default input frame(image) size for this model is 224x224. # Arguments include_top: whether to include the the classification layer at the top of the network. weights: one of `None` (random initialization) or 'kinetics_only' (pre-training on Kinetics dataset only). or 'imagenet_and_kinetics' (pre-training on ImageNet and Kinetics datasets). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(NUM_FRAMES, 224, 224, 3)` (with `channels_last` data format) or `(NUM_FRAMES, 3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels. NUM_FRAMES should be no smaller than 8. The authors used 64 frames per example for training and testing on kinetics dataset Also, Width and height should be no smaller than 32. E.g. `(64, 150, 150, 3)` would be one valid value. dropout_prob: optional, dropout probability applied in dropout layer after global average pooling layer. 0.0 means no dropout is applied, 1.0 means dropout is applied to all features. Note: Since Dropout is applied just before the classification layer, it is only useful when `include_top` is set to True. endpoint_logit: (boolean) optional. If True, the model's forward pass will end at producing logits. Otherwise, softmax is applied after producing the logits to produce the class probabilities prediction. Setting this parameter to True is particularly useful when you want to combine results of rgb model and optical flow model. - `True` end model forward pass at logit output - `False` go further after logit to produce softmax predictions Note: This parameter is only useful when `include_top` is set to True. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in WEIGHTS_NAME or weights is None or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or %s' % str(WEIGHTS_NAME) + ' ' 'or a valid path to a file containing `weights` values') if weights in WEIGHTS_NAME and include_top and classes != 400: raise ValueError('If using `weights` as one of these %s, with `include_top`' ' as true, `classes` should be 400' % str(WEIGHTS_NAME)) # Determine proper input shape input_shape = _obtain_input_shape( input_shape, default_frame_size=224, min_frame_size=32, default_num_frames=64, min_num_frames=8, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 4 # Downsampling via convolution (spatial and temporal) x = conv3d_bn(img_input, 64, 7, 7, 7, strides=(2, 2, 2), padding='same', name='Conv3d_1a_7x7') # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_2a_3x3')(x) x = conv3d_bn(x, 64, 1, 1, 1, strides=(1, 1, 1), padding='same', name='Conv3d_2b_1x1') x = conv3d_bn(x, 192, 3, 3, 3, strides=(1, 1, 1), padding='same', name='Conv3d_2c_3x3') # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_3a_3x3')(x) # Mixed 3b branch_0 = conv3d_bn(x, 64, 1, 1, 1, padding='same', name='Conv3d_3b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_3b_1a_1x1') branch_1 = conv3d_bn(branch_1, 128, 3, 3, 3, padding='same', name='Conv3d_3b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_3b_2a_1x1') branch_2 = conv3d_bn(branch_2, 32, 3, 3, 3, padding='same', name='Conv3d_3b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 32, 1, 1, 1, padding='same', name='Conv3d_3b_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3b') # Mixed 3c branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_1a_1x1') branch_1 = conv3d_bn(branch_1, 192, 3, 3, 3, padding='same', name='Conv3d_3c_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_3c_2a_1x1') branch_2 = conv3d_bn(branch_2, 96, 3, 3, 3, padding='same', name='Conv3d_3c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_3c_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3c') # Downsampling (spatial and temporal) x = MaxPooling3D((3, 3, 3), strides=(2, 2, 2), padding='same', name='MaxPool2d_4a_3x3')(x) # Mixed 4b branch_0 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_4b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_4b_1a_1x1') branch_1 = conv3d_bn(branch_1, 208, 3, 3, 3, padding='same', name='Conv3d_4b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_4b_2a_1x1') branch_2 = conv3d_bn(branch_2, 48, 3, 3, 3, padding='same', name='Conv3d_4b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4b_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4b') # Mixed 4c branch_0 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4c_0a_1x1') branch_1 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4c_1a_1x1') branch_1 = conv3d_bn(branch_1, 224, 3, 3, 3, padding='same', name='Conv3d_4c_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4c_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4c_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4c') # Mixed 4d branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_1a_1x1') branch_1 = conv3d_bn(branch_1, 256, 3, 3, 3, padding='same', name='Conv3d_4d_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4d_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4d_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4d_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4d_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4d') # Mixed 4e branch_0 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4e_0a_1x1') branch_1 = conv3d_bn(x, 144, 1, 1, 1, padding='same', name='Conv3d_4e_1a_1x1') branch_1 = conv3d_bn(branch_1, 288, 3, 3, 3, padding='same', name='Conv3d_4e_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4e_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4e_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4e_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4e_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4e') # Mixed 4f branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_4f_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4f_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_4f_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4f_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_4f_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4f_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_4f_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4f') # Downsampling (spatial and temporal) x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same', name='MaxPool2d_5a_2x2')(x) # Mixed 5b branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_5b_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5b') # Mixed 5c branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1') branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1') branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3') branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5c') if include_top: # Classification block x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) x = Dropout(dropout_prob)(x) x = conv3d_bn(x, classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1') num_frames_remaining = int(x.shape[1]) x = Reshape((num_frames_remaining, classes))(x) # logits (raw scores for each class) x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False), output_shape=lambda s: (s[0], s[2]))(x) if not endpoint_logit: x = Activation('softmax', name='prediction')(x) else: h = int(x.shape[2]) w = int(x.shape[3]) x = AveragePooling3D((2, h, w), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) inputs = img_input # create model model = Model(inputs, x, name='i3d_inception') # load weights if weights in WEIGHTS_NAME: if weights == WEIGHTS_NAME[0]: # rgb_kinetics_only if include_top: weights_url = WEIGHTS_PATH['rgb_kinetics_only'] model_name = 'i3d_inception_rgb_kinetics_only.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['rgb_kinetics_only'] model_name = 'i3d_inception_rgb_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[1]: # flow_kinetics_only if include_top: weights_url = WEIGHTS_PATH['flow_kinetics_only'] model_name = 'i3d_inception_flow_kinetics_only.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['flow_kinetics_only'] model_name = 'i3d_inception_flow_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[2]: # rgb_imagenet_and_kinetics if include_top: weights_url = WEIGHTS_PATH['rgb_imagenet_and_kinetics'] model_name = 'i3d_inception_rgb_imagenet_and_kinetics.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['rgb_imagenet_and_kinetics'] model_name = 'i3d_inception_rgb_imagenet_and_kinetics_no_top.h5' elif weights == WEIGHTS_NAME[3]: # flow_imagenet_and_kinetics if include_top: weights_url = WEIGHTS_PATH['flow_imagenet_and_kinetics'] model_name = 'i3d_inception_flow_imagenet_and_kinetics.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['flow_imagenet_and_kinetics'] model_name = 'i3d_inception_flow_imagenet_and_kinetics_no_top.h5' downloaded_weights_path = get_file(model_name, weights_url, cache_subdir='models') model.load_weights(downloaded_weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first' and K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your keras config ' 'at ~/.keras/keras.json.') elif weights is not None: model.load_weights(weights) return model
def YOLOMODEL(path): input_image = Input(shape=(input_size, input_size, 3)) true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4)) # Layer 1 x = Conv2D(32, (3, 3), strides=(1, 1), padding='same', name='conv_1', use_bias=False)(input_image) x = BatchNormalization(name='norm_1')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 2 x = Conv2D(64, (3, 3), strides=(1, 1), padding='same', name='conv_2', use_bias=False)(x) x = BatchNormalization(name='norm_2')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 3 x = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_3', use_bias=False)(x) x = BatchNormalization(name='norm_3')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 4 x = Conv2D(64, (1, 1), strides=(1, 1), padding='same', name='conv_4', use_bias=False)(x) x = BatchNormalization(name='norm_4')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 5 x = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_5', use_bias=False)(x) x = BatchNormalization(name='norm_5')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 6 x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_6', use_bias=False)(x) x = BatchNormalization(name='norm_6')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 7 x = Conv2D(128, (1, 1), strides=(1, 1), padding='same', name='conv_7', use_bias=False)(x) x = BatchNormalization(name='norm_7')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 8 x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_8', use_bias=False)(x) x = BatchNormalization(name='norm_8')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 9 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_9', use_bias=False)(x) x = BatchNormalization(name='norm_9')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 10 x = Conv2D(256, (1, 1), strides=(1, 1), padding='same', name='conv_10', use_bias=False)(x) x = BatchNormalization(name='norm_10')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 11 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_11', use_bias=False)(x) x = BatchNormalization(name='norm_11')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 12 x = Conv2D(256, (1, 1), strides=(1, 1), padding='same', name='conv_12', use_bias=False)(x) x = BatchNormalization(name='norm_12')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 13 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_13', use_bias=False)(x) x = BatchNormalization(name='norm_13')(x) x = LeakyReLU(alpha=0.1)(x) skip_connection = x x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 14 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_14', use_bias=False)(x) x = BatchNormalization(name='norm_14')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 15 x = Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_15', use_bias=False)(x) x = BatchNormalization(name='norm_15')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 16 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_16', use_bias=False)(x) x = BatchNormalization(name='norm_16')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 17 x = Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_17', use_bias=False)(x) x = BatchNormalization(name='norm_17')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 18 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_18', use_bias=False)(x) x = BatchNormalization(name='norm_18')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 19 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_19', use_bias=False)(x) x = BatchNormalization(name='norm_19')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 20 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_20', use_bias=False)(x) x = BatchNormalization(name='norm_20')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 21 skip_connection = Conv2D(64, (1, 1), strides=(1, 1), padding='same', name='conv_21', use_bias=False)(skip_connection) skip_connection = BatchNormalization( name='norm_21')(skip_connection) skip_connection = LeakyReLU(alpha=0.1)(skip_connection) skip_connection = Lambda(space_to_depth_x2)(skip_connection) x = concatenate([skip_connection, x]) # Layer 22 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_22', use_bias=False)(x) x = BatchNormalization(name='norm_22')(x) x = LeakyReLU(alpha=0.1)(x) feature_extractor = Model(input_image, x, name='FULLYOLO') features = feature_extractor(input_image) grid_h, grid_w = feature_extractor.get_output_shape_at(-1)[1:3] # make the object detection layer output = Conv2D(nb_box * (4 + 1 + nb_class), (1, 1), strides=(1, 1), padding='same', name='conv_23', kernel_initializer='lecun_normal')(features) output = Reshape( (grid_h, grid_w, nb_box, 4 + 1 + nb_class))(output) output = Lambda(lambda args: args[0])([output, true_boxes]) model = Model([input_image, true_boxes], output) # initialize the weights of the detection layer layer = model.layers[-4] weights = layer.get_weights() new_kernel = np.random.normal( size=weights[0].shape) / (grid_h * grid_w) new_bias = np.random.normal(size=weights[1].shape) / (grid_h * grid_w) layer.set_weights([new_kernel, new_bias]) model.load_weights(path) #print(model.summary()) return model
def _tf_grid_mask_frame(frame, image_size, n_neighbor_pixels, grid_side): """Compute a grid mask for TensorFlow. :param frame: :param image_size: :param n_neighbor_pixels: :param grid_side: :return: """ max_n_peds = frame.shape.as_list()[0] pids = frame[:, 0] # -------------------- # compute id_mask # -------------------- def compute_id_mask(pids): id_mask = tf.tensordot(tf.expand_dims(pids, axis=1), tf.transpose(tf.expand_dims(pids, axis=1)), axes=(1, 0)) id_mask = tf.cast(id_mask, tf.bool) # mask self-to-self (diagonal elements) id_mask = tf.logical_and( tf.logical_not(tf.cast(tf.eye(max_n_peds), tf.bool)), id_mask) id_mask = tf.expand_dims(id_mask, axis=2) id_mask = tf.cast(id_mask, tf.float32) return id_mask id_mask = Lambda(compute_id_mask)(pids) bound = n_neighbor_pixels / np.array(image_size) pos = frame[:, 1:] tl = pos - bound / 2 br = pos + bound / 2 frame_mask = [] for self_index in range(max_n_peds): is_neighbor = Lambda(lambda pos: tf.cast( tf.reduce_all(tf.concat( [tl[self_index] <= pos, pos < br[self_index]], axis=1), axis=1), np.int32))(pos) cell_xy = Lambda(lambda pos: tf.cast( tf.floor(((pos - tl[self_index]) / bound) * grid_side), tf.int32))( pos) cell_index = cell_xy[:, 0] + cell_xy[:, 1] * grid_side cell_index = cell_index * is_neighbor self_frame_mask = tf.stack(tf.map_fn( lambda c: tf.eye(grid_side**2, dtype=np.int32)[c], cell_index), axis=0) self_frame_mask *= tf.expand_dims(is_neighbor, 1) frame_mask.append(self_frame_mask) frame_mask = tf.stack(frame_mask, axis=0) frame_mask = tf.cast(frame_mask, tf.float32) # mask not exist elements & self-to-self pair frame_mask *= id_mask return frame_mask
def yolo_body_mobilenet(inputs, num_anchors, num_classes, weights='imagenet', network_config=[False, False]): """ Mobile Detector Implementation :param feature_extractor: :param num_classes: :param num_anchors: :return: """ fine_grained_layers = [17, 27, 43] shallow_detector, use_x0 = network_config if shallow_detector: fine_grained_layers = fine_grained_layers[0:2] num_final_layers = 512 final_feature_layer = 69 else: fine_grained_layers = fine_grained_layers[1:] num_final_layers = 1024 final_feature_layer = -1 feature_model = MobileNet(input_tensor=inputs, include_top=False, weights=None) feature_model = Model( inputs=feature_model.input, outputs=feature_model.layers[final_feature_layer].output) if weights == 'imagenet': print('Loading pretrained weights from ImageNet...') trained_model = MobileNet(input_shape=(224, 224, 3), include_top=False, weights='imagenet') trained_layers = trained_model.layers feature_layers = feature_model.layers for i in range(0, min(len(feature_layers), len(trained_layers))): weights = trained_layers[i].get_weights() feature_layers[i].set_weights(weights) x2 = feature_model.output x1 = feature_model.layers[fine_grained_layers[1]].output x0 = feature_model.layers[fine_grained_layers[0]].output x2 = _depthwise_conv_block(x2, num_final_layers, 1.0, block_id=14) x2 = _depthwise_conv_block(x2, num_final_layers, 1.0, block_id=15) # Reroute x1 x1 = Conv2D(64, (1, 1), padding='same', use_bias=False, strides=(1, 1))(x1) x1 = BatchNormalization()(x1) # To keep keras to tensorflow conversion happy x1 = Lambda(relu_6)(x1) x1_reshaped = Lambda(space_to_depth_x2, output_shape=space_to_depth_x2_output_shape, name='space_to_depth_x2')(x1) # Reroute x0 x0 = Conv2D(16, (1, 1), padding='same', use_bias=False, strides=(1, 1))(x0) x0 = BatchNormalization()(x0) x0 = Lambda(relu_6)(x0) x0_reshaped = Lambda(space_to_depth_x4, output_shape=space_to_depth_x4_output_shape, name='space_to_depth_x4')(x0) if use_x0: x = concatenate([x0_reshaped, x1_reshaped, x2]) else: x = concatenate([x1_reshaped, x2]) x = _depthwise_conv_block(x, num_final_layers, 1.0, block_id=16) x = Conv2D(num_anchors * (num_classes + 5), (1, 1))(x) model = Model(inputs=feature_model.input, outputs=x) return model
std = (para_pred[:, :, 1]) print("mean.shape: ", mean.shape) print('std.shape: ', std.shape) likelihood = log_gaussian(y_true[:, :, 0], mean, std) print("likelihood.shape: ", likelihood.shape) print('==end of custom loss===') result = K.mean(likelihood) return -result #aux_in = Input(shape=(input_window_length,n_dims, ), name='aux_input') aux_in = Input(shape=(None, ), name='aux_input', dtype='int32') # in salute to https://gist.github.com/bzamecnik/a33052ec46ee7efeb217856d98a4fb5f aux_in_full = Lambda(K.one_hot, arguments={'num_classes': n_dims}, output_shape=(None, n_dims))(aux_in) x = Dense(20, activation='sigmoid')(aux_in_full) #x = Embedding(input_dim=370, output_dim=20, input_length = 192)(aux_in) main_in = Input(shape=( None, n_features, ), name="main_input") input1 = layers.concatenate([main_in, x]) lstm_out1 = LSTM(40, return_sequences=True)(input1) drop_out1 = Dropout(0.2)(lstm_out1) lstm_out2 = LSTM(40, return_sequences=True)(drop_out1) drop_out2 = Dropout(0.2)(lstm_out2) lstm_out3 = LSTM(40, return_sequences=True)(lstm_out2) drop_out3 = Dropout(0.2)(lstm_out3)
def yolo_body_darknet(inputs, num_anchors, num_classes, weights='yolov2', network_config=[False, False]): """Create YOLO_V2 model CNN body in Keras.""" fine_grained_layers = [17, 27, 43] shallow_detector, use_x0 = network_config if shallow_detector: fine_grained_layers = fine_grained_layers[0:2] num_final_layers = 512 final_feature_layer = 43 else: fine_grained_layers = fine_grained_layers[1:] num_final_layers = 1024 final_feature_layer = -1 feature_model = darknet19(inputs, include_top=False) feature_model = Model( inputs=feature_model.input, outputs=feature_model.layers[final_feature_layer].output) if weights == 'yolov2': print("Loading pre-trained yolov2 weights") # Save topless yolo: yolo_path = os.path.join('model_data', 'yolo.h5') trained_model = load_model(yolo_path) # trained_model = Model(trained_model.inputs, trained_model.output) trained_layers = trained_model.layers feature_layers = feature_model.layers for i in range(0, min(len(feature_layers), len(trained_layers))): weights = trained_layers[i].get_weights() feature_layers[i].set_weights(weights) x2 = feature_model.output x1 = feature_model.layers[fine_grained_layers[1]].output x0 = feature_model.layers[fine_grained_layers[0]].output x2 = DarknetConv2D_BN_Leaky(num_final_layers, (3, 3))(x2) x2 = DarknetConv2D_BN_Leaky(num_final_layers, (3, 3))(x2) x1 = DarknetConv2D_BN_Leaky(64, (1, 1))(x1) # TODO: Allow Keras Lambda to use func arguments for output_shape? x1_reshaped = Lambda(space_to_depth_x2, output_shape=space_to_depth_x2_output_shape, name='space_to_depth_x2')(x1) x0 = DarknetConv2D_BN_Leaky(16, (1, 1))(x0) # TODO: #304Allow Keras Lambda to use func arguments for output_shape? x0_reshaped = Lambda(space_to_depth_x4, output_shape=space_to_depth_x4_output_shape, name='space_to_depth_x4')(x0) if use_x0: x = concatenate([x0_reshaped, x1_reshaped, x2]) else: x = concatenate([x1_reshaped, x2]) x = DarknetConv2D_BN_Leaky(num_final_layers, (3, 3))(x) x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x) return Model(feature_model.inputs, x)
def ssc_300(image_size, n_classes, l2_regularization=0.0005, min_scale=None, max_scale=None, scales=None, aspect_ratios_global=None, aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=None, subtract_mean=[123, 117, 104], divide_by_stddev=None, swap_channels=[2, 1, 0], predictors=[ 'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2' ], hidden_size=[250, 250, 100], output_activation=False, lstm=False, condense_predictors=False): """ Build a Keras model with SSC300 architecture, see references. The base network is a reduced atrous VGG-16, extended by the SSD architecture, as described in the paper. Most of the arguments that this function takes are only needed for the anchor box layers. In case you're training the network. Note: Requires Keras v2.0 or later. Currently works only with the TensorFlow backend (v1.0 or later). References: https://arxiv.org/abs/1512.02325v5 :param tuple image_size: The input image size in the format `(height, width, channels)`. :param int n_classes: The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. :param float l2_regularization: The L2-regularization rate. Applies to all convolutional layers. Set to zero to deactivate L2-regularization. :param float min_scale: The smallest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. :param float max_scale: The largest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. All scaling factors between the smallest and the largest will be linearly interpolated. Note that the second to last of the linearly interpolated scaling factors will actually be the scaling factor for the last predictor layer, while the last scaling factor is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. :param list scales: A list of floats containing scaling factors per convolutional predictor layer. This list must be one element longer than the number of predictor layers. The first `k` elements are the scaling factors for the `k` predictor layers, while the last element is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional last scaling factor must be passed either way, even if it is not being used. If a list is passed, this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero. :param list aspect_ratios_global: The list of aspect ratios for which anchor boxes are to be generated. This list is valid for all prediction layers. :param list aspect_ratios_per_layer: A list containing one aspect ratio list for each prediction layer. This allows you to set the aspect ratios for each predictor layer individually, which is the case for the original SSD300 implementation. If a list is passed, it overrides `aspect_ratios_global`. :param bool two_boxes_for_ar1: Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise. If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated using the scaling factor for the respective layer, the second one will be generated using geometric mean of said scaling factor and next bigger scaling factor. :param list steps: `None` or a list with as many elements as there are predictor layers. The elements can be either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. If no steps are provided, then they will be computed such that the anchor box center points will form an equidistant grid within the image dimensions. :param list offsets: `None` or a list with as many elements as there are predictor layers. The elements can be either floats or tuples of two floats. These numbers represent for each predictor layer how many pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions of the step size specified in the `steps` argument. If the list contains floats, then that value will be used for both spatial dimensions. If the list contains tuples of two floats, then they represent `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size. :param list subtract_mean: `None` or an array-like object of integers or floating point values of any shape that is broadcast-compatible with the image shape. The elements of this array will be subtracted from the image pixel intensity values. For example, pass a list of three integers to perform per-channel mean normalization for color images. :param list divide_by_stddev: `None` or an array-like object of non-zero integers or floating point values of any shape that is broadcast-compatible with the image shape. The image pixel intensity values will be divided by the elements of this array. For example, pass a list of three integers to perform per-channel standard deviation normalization for color images. :param list swap_channels: Either `False` or a list of integers representing the desired order in which the input image channels should be swapped. :param list predictors: names of the convolutional layers used as predictors :param list hidden_size: number of neurons for the 3 hidden fully-connected layers :param bool output_activation: whether to include or not the softplus activation function after the hidden layers :param bool lstm: whether to add or not an LSTM cell on top of the hidden layer :param bool condense_predictors: whether to condense or not the predictors in a single prediction :return model: The Keras SSC300 model. """ n_predictor_layers = len( predictors ) # The number of predictor conv layers in the network is 6 for the original SSD300. l2_reg = l2_regularization # Make the internal name shorter. img_height, img_width, img_channels = image_size[0], image_size[ 1], image_size[2] ############################################################################ # Get a few exceptions out of the way. ############################################################################ if aspect_ratios_global is None and aspect_ratios_per_layer is None: raise ValueError( "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified." ) if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}." .format(n_predictor_layers, len(aspect_ratios_per_layer))) if (min_scale is None or max_scale is None) and scales is None: raise ValueError( "Either `min_scale` and `max_scale` or `scales` need to be specified." ) if scales: if len(scales) != n_predictor_layers + 1: raise ValueError( "It must be either scales is None or len(scales) == {}, but len(scales) == {}." .format(n_predictor_layers + 1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) if len(hidden_size) != 3: raise ValueError( "3 hidden size values must be passed, but {} values were received." .format(len(hidden_size))) hidden_size = np.array(hidden_size) if np.any(hidden_size <= 0): raise ValueError( "All hidden sizes must be >0, but the sizes given are {}".format( hidden_size)) if (not (steps is None)) and (len(steps) != n_predictor_layers): raise ValueError( "You must provide at least one step value per predictor layer.") if (not (offsets is None)) and (len(offsets) != n_predictor_layers): raise ValueError( "You must provide at least one offset value per predictor layer.") ############################################################################ # Compute the anchor box parameters. ############################################################################ # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. if aspect_ratios_per_layer: aspect_ratios = aspect_ratios_per_layer else: aspect_ratios = [aspect_ratios_global] * n_predictor_layers # Compute the number of boxes to be predicted per cell for each predictor layer. # We need this so that we know how many channels the predictor layers need to have. if aspect_ratios_per_layer: n_boxes = [] for ar in aspect_ratios_per_layer: if (1 in ar) & two_boxes_for_ar1: n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1 else: n_boxes.append(len(ar)) else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer if (1 in aspect_ratios_global) & two_boxes_for_ar1: n_boxes = len(aspect_ratios_global) + 1 else: n_boxes = len(aspect_ratios_global) n_boxes = [n_boxes] * n_predictor_layers if steps is None: steps = [None] * n_predictor_layers if offsets is None: offsets = [None] * n_predictor_layers ############################################################################ # Define functions for the Lambda layers below. ############################################################################ def identity_layer(tensor): return tensor def input_mean_normalization(tensor): return tensor - np.array(subtract_mean) def input_stddev_normalization(tensor): return tensor / np.array(divide_by_stddev) def input_channel_swap(tensor): if len(swap_channels) == 3: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]] ], axis=-1) elif len(swap_channels) == 4: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]], tensor[..., swap_channels[3]] ], axis=-1) ############################################################################ # Build the network. ############################################################################ x = Input(shape=(img_height, img_width, img_channels)) # The following identity layer is only needed so that the subsequent lambda layers can be optional. x1 = Lambda(identity_layer, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x) if not (subtract_mean is None): x1 = Lambda(input_mean_normalization, output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(input_stddev_normalization, output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) if swap_channels: x1 = Lambda(input_channel_swap, output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1) conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_1')(x1) conv1_2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_2')(conv1_1) pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(conv1_2) conv2_1 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_1')(pool1) conv2_2 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_2')(conv2_1) pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(conv2_2) conv3_1 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_1')(pool2) conv3_2 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_2')(conv3_1) conv3_3 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_3')(conv3_2) pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(conv3_3) conv4_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_1')(pool3) conv4_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_2')(conv4_1) conv4_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3')(conv4_2) pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4')(conv4_3) conv5_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_1')(pool4) conv5_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_2')(conv5_1) conv5_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_3')(conv5_2) pool5 = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same', name='pool5')(conv5_3) fc6 = Conv2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc6')(pool5) fc7 = Conv2D(1024, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7')(fc6) conv6_1 = Conv2D(256, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_1')(fc7) conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv6_padding')(conv6_1) conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2')(conv6_1) conv7_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_1')(conv6_2) conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv7_padding')(conv7_1) conv7_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2')(conv7_1) conv8_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_1')(conv7_2) conv8_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2')(conv8_1) conv9_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_1')(conv8_2) conv9_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2')(conv9_1) # Feed conv4_3 into the L2 normalization layer conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3) conv_features = { 'conv4_3': conv4_3_norm, 'fc7': fc7, 'conv6_2': conv6_2, 'conv7_2': conv7_2, 'conv8_2': conv8_2, 'conv9_2': conv9_2 } predictor_layers = [] ### Build the predictor layers on top of the base network for predictor in predictors: flatten = Flatten(name='{}_flat'.format(predictor))( conv_features[predictor]) d1 = Dense(hidden_size[0], name='{}_d1'.format(predictor))(flatten) d1bn = BatchNormalization(name='{}_bn1'.format(predictor))(d1) r1 = Activation(activation='relu', name='{}_r1'.format(predictor))(d1bn) d2 = Dense(hidden_size[1], name='{}_d2'.format(predictor))(r1) d2bn = BatchNormalization(name='{}_bn2'.format(predictor))(d2) r2 = Activation(activation='relu', name='{}_r2'.format(predictor))(d2bn) d3 = Dense(hidden_size[2], name='{}_d3'.format(predictor))(r2) d3bn = BatchNormalization(name='{}_bn3'.format(predictor))(d3) r3 = Activation(activation='relu', name='{}_r3'.format(predictor))(d3bn) pred = Dense(n_classes, name='{}_pred'.format(predictor))(r3) predictor_layers.append(pred) # Concatenate the output of the different predictors # Output shape of `predictions`: (batch, n_predictors, n_classes) predictions = Concatenate(axis=1, name='predictions1')(predictor_layers) if output_activation: predictions = Activation(activation='softplus')(predictions) if lstm: predictions = Reshape((n_predictor_layers, n_classes), name='lstm_predictions_res')(predictions) predictions = Bidirectional(LSTM(20, return_sequences=False), name='lstm_predictions')(predictions) if condense_predictors: predictions = Dense(n_classes, name='predictions_condensed')(predictions) return Model(inputs=x, outputs=predictions)
#get directory of input images and create array of images and store images in the directory to the array test_dir = "C:/Users/panka/OneDrive/Desktop/Aditya/image data 2018-19/Test_Resized" #get labels pickle and convert to dataframe then sort by the filename to go along with the images test_labels_file = "C:/Users/panka/OneDrive/Desktop/Aditya/image data 2018-19/Testing_Input_Resized.pkl" test_labels = pd.read_pickle(test_labels_file) test_datagen = ImageDataGenerator(rescale=1./255,preprocessing_function=image_transform) test_generator = test_datagen.flow_from_dataframe(dataframe=test_labels,directory=test_dir,target_size=(108,192),x_col='Filename',y_col=['Right Ankle x','Right Knee x','Right Hip x','Left Hip x','Left Knee x','Left Ankle x','Pelvis x','Thorax x','Upper Neck x','Head Top x','Right Wrist x','Right Elbow x','Right Shoulder x','Left Shoulder x','Left Elbow x','Left Wrist x','Right Ankle y','Right Knee y','Right Hip y','Left Hip y','Left Knee y','Left Ankle y','Pelvis y','Thorax y','Upper Neck y','Head Top y','Right Wrist y','Right Elbow y','Right Shoulder y','Left Shoulder y','Left Elbow y','Left Wrist y'],class_mode='other',batch_size=8) #create model model = Sequential() #add model layers model.add(Conv2D(1, kernel_size=1, input_shape=(108,192,3), activation='relu')) model.add(Lambda(image_transform)) model.add(Conv2D(64, kernel_size=3, activation='relu')) model.add(Conv2D(64, kernel_size=3, activation='relu')) model.add(Conv2D(64, kernel_size=3, activation='relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Conv2D(128, kernel_size=3, activation='relu')) model.add(Conv2D(128, kernel_size=3, activation='relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Conv2D(256, kernel_size=3, activation='relu')) model.add(Conv2D(256, kernel_size=3, activation='relu')) model.add(Conv2D(256, kernel_size=3, activation='relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Flatten()) model.add(Dense(32, activation='relu')) #compile model using accuracy to measure model performance
def get_cnn_model(image_size, n_classes, mode='training'): n_classes += 1 # 增加一个背景类 img_height, img_width, img_channels = image_size[0], image_size[1], image_size[2] l2_reg = 0.0005 # L2 正则化 ##调整输入 subtract_mean=[123, 117, 104] divide_by_stddev=None swap_channels=[2, 1, 0] #以下四个函数为 Lambda层使用 def identity_layer(tensor): return tensor def input_mean_normalization(tensor): return tensor - np.array(subtract_mean) def input_stddev_normalization(tensor): return tensor / np.array(divide_by_stddev) def input_channel_swap(tensor): if len(swap_channels) == 3: return K.stack([tensor[...,swap_channels[0]], tensor[...,swap_channels[1]], tensor[...,swap_channels[2]]], axis=-1) elif len(swap_channels) == 4: return K.stack([tensor[...,swap_channels[0]], tensor[...,swap_channels[1]], tensor[...,swap_channels[2]], tensor[...,swap_channels[3]]], axis=-1) x = Input(shape=(img_height, img_width, img_channels)) # Tidentity_layer 可选 x1 = Lambda(identity_layer, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x) if not (subtract_mean is None): x1 = Lambda(input_mean_normalization, output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(input_stddev_normalization, output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) if swap_channels: x1 = Lambda(input_channel_swap, output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1) ##改变后的VGG16的实现 conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_1')(x1) conv1_2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_2')(conv1_1) pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(conv1_2) conv2_1 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_1')(pool1) conv2_2 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_2')(conv2_1) pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(conv2_2) conv3_1 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_1')(pool2) conv3_2 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_2')(conv3_1) conv3_3 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_3')(conv3_2) pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(conv3_3) conv4_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_1')(pool3) conv4_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_2')(conv4_1) conv4_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3')(conv4_2) pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4')(conv4_3) conv5_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_1')(pool4) conv5_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_2')(conv5_1) conv5_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_3')(conv5_2) pool5 = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same', name='pool5')(conv5_3) flatten = Flatten(pool5) fc6 = Dense(512, activation='relu', name='fc6')(flatten) dropout = Dropout(0.5)(fc6) fc7 = Dense(n_classes, activation='softmax', name='fc6')(dropout) model = Model(inputs=x, outputs=fc7) return model
def create_model(self): self._set_model_params() act = 'relu' input_data = Input(name='the_input', shape=self.input_shape, dtype='float32') inner = Convolution2D(self.conv_num_filters, self.filter_size, self.filter_size, border_mode='same', activation=act, name='conv1')(input_data) inner = MaxPooling2D(pool_size=(self.pool_size_1, self.pool_size_1), name='max1')(inner) inner = Convolution2D(self.conv_num_filters, self.filter_size, self.filter_size, border_mode='same', activation=act, name='conv2')(inner) inner = MaxPooling2D(pool_size=(self.pool_size_2, self.pool_size_2), name='max2')(inner) conv_to_rnn_dims = (int( (self.img_h / (self.pool_size_1 * self.pool_size_2)) * self.conv_num_filters), int(self.img_w / (self.pool_size_1 * self.pool_size_2))) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) inner = Permute(dims=(2, 1), name='permute')(inner) # cuts down input size going into RNN: inner = TimeDistributed( Dense(self.time_dense_size, activation=act, name='dense1'))(inner) # Two layers of bidirecitonal GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(self.rnn_size, return_sequences=True, name='gru1')(inner) gru_1b = GRU(self.rnn_size, return_sequences=True, go_backwards=True, name='gru1_b')(inner) gru1_merged = merge([gru_1, gru_1b], mode='sum') gru_2 = GRU(self.rnn_size, return_sequences=True, name='gru2')(gru1_merged) gru_2b = GRU(self.rnn_size, return_sequences=True, go_backwards=True)(gru1_merged) # transforms RNN output to character activations: inner = TimeDistributed(Dense(self.output_size, name='dense2'))(merge([gru_2, gru_2b], mode='concat')) y_pred = Activation('softmax', name='softmax')(inner) # Model(input=[input_data], output=y_pred).summary() labels = Input(name='the_labels', shape=[self.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name="ctc")( [y_pred, labels, input_length, label_length]) lr = 0.03 # clipnorm seems to speeds up convergence clipnorm = 5 sgd = SGD(lr=lr, decay=3e-7, momentum=0.9, nesterov=True, clipnorm=clipnorm) model = Model(input=[input_data, labels, input_length, label_length], output=[loss_out]) # model.summary() # the loss calc occurs elsewhere, so use a dummy lambda func for the loss if self.weight_file is not None: model.load_weights(self.weight_file) model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=sgd) self.model = model self._predictor = K.function([input_data], [y_pred]) return model
def cnn_rnn(nb_words, EMBEDDING_DIM, \ embedding_matrix, MAX_SEQUENCE_LENGTH, \ num_rnn, num_dense, rate_drop_rnn, \ rate_drop_dense, act): ''' This is the basic cnn rnn model model: input layer; embedding layer; cnn based attention layer; rnn layer; dense layer; output layer ''' embedding_layer = Embedding(nb_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False) rnn_layer = Bidirectional( GRU(num_rnn, dropout=rate_drop_rnn, recurrent_dropout=rate_drop_rnn)) cnn_layer = Conv1D(activation="relu", padding="valid", strides=1, filters=128, kernel_size=2) # cnn_layer1 = Conv1D(activation="relu", padding="valid", strides=1, filters=64, kernel_size=4) pooling_layer = GlobalMaxPooling1D() cnn_dense = Dense(300) cnn_dropout1 = Dropout(0.35) cnn_dropout2 = Dropout(0.35) cnn_batchnormalization = BatchNormalization() cnn_repeatvector = RepeatVector(EMBEDDING_DIM) cnn_dense1 = Dense(300) cnn_timedistributed = TimeDistributed(Dense(1)) sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_1 = embedding_layer(sequence_1_input) sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_2 = embedding_layer(sequence_2_input) cnn_1 = cnn_layer(embedded_sequences_1) # cnn_1 = cnn_layer1(cnn_1) cnn_1 = pooling_layer(cnn_1) cnn_1 = cnn_dropout1(cnn_1) cnn_1 = cnn_dense(cnn_1) cnn_1 = cnn_dropout2(cnn_1) cnn_1 = cnn_batchnormalization(cnn_1) cnn_2 = cnn_layer(embedded_sequences_2) # cnn_2 = cnn_layer1(cnn_2) cnn_2 = pooling_layer(cnn_2) cnn_2 = cnn_dropout1(cnn_2) cnn_2 = cnn_dense(cnn_2) cnn_2 = cnn_dropout2(cnn_2) cnn_2 = cnn_batchnormalization(cnn_2) # cnn_1 = cnn_repeatvector(cnn_1) # cnn_2 = cnn_repeatvector(cnn_2) cnn_1_t = cnn_dense1(cnn_1) cnn_2_t = cnn_dense1(cnn_2) # cnn_1_t = cnn_timedistributed(cnn_1) # cnn_2_t = cnn_timedistributed(cnn_2) # cnn_1_t = Permute([2, 1])(cnn_1_t) # cnn_2_t = Permute([2, 1])(cnn_2_t) a1 = multiply([cnn_1_t, embedded_sequences_1]) a2 = multiply([cnn_2_t, embedded_sequences_2]) a1 = Permute([2, 1])(a1) a2 = Permute([2, 1])(a2) a1 = Lambda(lambda x: K.sum(x, axis=1))(a1) a2 = Lambda(lambda x: K.sum(x, axis=1))(a2) a1 = Activation('softmax')(a1) a2 = Activation('softmax')(a2) embedded_sequences_1 = Permute([2, 1])(embedded_sequences_1) embedded_sequences_2 = Permute([2, 1])(embedded_sequences_2) x1 = multiply([a1, embedded_sequences_1]) x2 = multiply([a2, embedded_sequences_2]) x1 = Permute([2, 1])(x1) x2 = Permute([2, 1])(x2) x1 = rnn_layer(x1) x2 = rnn_layer(x2) merged = multiply([x1, x2]) merged = Dropout(rate_drop_dense)(merged) merged = BatchNormalization()(merged) merged = Dense(num_dense, activation=act)(merged) merged = Dropout(rate_drop_dense)(merged) merged = BatchNormalization()(merged) preds = Dense(3, activation='softmax')(merged) # x1 = TimeDistributed(Dense(EMBEDDING_DIM, activation='relu'))(embedded_sequences_1) # x1 = Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, ))(x1) # y1 = TimeDistributed(Dense(EMBEDDING_DIM, activation='relu'))(embedded_sequences_2) # y1 = Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, ))(y1) ######################################## ## train the model ######################################## model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['acc']) model.summary() # print(STAMP) return model