def model(self): encoded_input = Input(shape=(128,)) hidden_layer = Dense(self.conf_s.NUMBER_OF_NEURONS_IN_HIDDEN_LAYER, activation='sigmoid')(encoded_input) output_layer = Dense(conf.NUM_LABELS, activation='sigmoid')(hidden_layer) classifier = Model(input=encoded_input, output=output_layer) classifier.compile(self.conf_s.OPTIMIZER, loss='categorical_crossentropy') return classifier
def test1(): seq_size = 10 batch_size = 10 rnn_size = 1 xin = Input(batch_shape=(batch_size, seq_size,1)) xtop = Input(batch_shape=(batch_size, seq_size)) xbranch, xsummary = RTTN(rnn_size, return_sequences=True)([xin, xtop]) model = Model(input=[xin, xtop], output=[xbranch, xsummary]) model.compile(loss='MSE', optimizer='SGD') data_gen = generate_data_batch(batch_size, seq_size) model.fit_generator(generator=data_gen, samples_per_epoch=1000, nb_epoch=100)
def train_auto_encoder(self, train_x, test_x, input_dim, out_model_file='encoder_cnn.h5', monitor='val_loss', patience=4): ''' ''' early_stop = EarlyStopping(monitor=monitor, patience=patience) checkpoint = ModelCheckpoint(out_model, monitor='val_loss', verbose=1, save_best_only=True, mode='min') input_data = Input(shape=(input_dim,), name='Input') encoder = Dense(512, activation='relu', name='Encoder1')(input_data) encoder = Dense(256, activation='relu', name='Encoder2')(encoder) encoder = Dense(128, activation='relu', name='Encoder3')(encoder) decoder = Dense(256, activation='relu', name='Decoder1')(encoder) decoder = Dense(512, activation='relu', name='Decoder2')(decoder) decoder = Dense(input_dim, activation='linear', name='Output')(decoder) autoencoder = Model(input_data, decoder) autoencoder.compile(optimizer='adam', loss='mse') autoencoder.fit(train_x, train_x, epochs=200, batch_size=512, callbacks=[checkpoint, early_stop], shuffle=True, validation_data=(test_x, test_x), verbose=1) autoencoder.save(out_model_file) return encoder
def _build_cnn_joint_trainer(self, block_cnn_ori, block_cnn_gen, block_clf_ori, block_clf_gen, lr): ''' Joint training for cnn0 and cnn1 [arg1, arg2, arg2plus] to [multi-predict0, multi-predict1] ''' block_cnn_ori.trainable = True block_cnn_gen.trainable = True block_clf_ori.trainable = True block_clf_gen.trainable = True arg1 = Input(shape=(self.arg_maxlen,), dtype='int32') arg2 = Input(shape=(self.arg_maxlen,), dtype='int32') arg2plus = Input(shape=(self.arg_maxlen,), dtype='int32') cnn_ori_repr = block_cnn_ori([arg1, arg2]) cnn_gen_repr = block_cnn_gen([arg1, arg2plus]) output_ori = block_clf_ori(cnn_ori_repr) output_gen = block_clf_gen(cnn_gen_repr) model = Model(input=[arg1, arg2, arg2plus], output=[output_ori, output_gen]) # compile def loss_ori(y_true, y_pred): return (1 - self.lambda_gen) * K.mean(K.categorical_crossentropy(y_pred, y_true), axis=-1) def loss_gen(y_true, y_pred): return self.lambda_gen * K.mean(K.categorical_crossentropy(y_pred, y_true), axis=-1) model.compile(loss=[loss_ori, loss_gen], optimizer=self.get_opt(self.cnn_optimizer_name)(lr)) return model
def sda_training(features, labels): encoder_dims = [1600, 1024, 768] stacked_encoder = [] int_labels = label_to_category(labels=labels, type='training') X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=10) y_train = to_categorical([int_labels[i] for i in y_train]) y_test = to_categorical([int_labels[i] for i in y_test]) for encoder_dim in encoder_dims: input_dim = X_train.shape[1] input_img = Input(shape=(input_dim,)) n_layer = noise.GaussianNoise(0.3)(input_img) encode = Dense(encoder_dim, activation='sigmoid')(n_layer) decode = Dense(input_dim, activation='sigmoid')(encode) ae = Model(input_img, decode) ae.compile(optimizer='adam', loss='mape') ae.fit(X_train, X_train, epochs=10, batch_size=32, shuffle=True, validation_data=(X_test, X_test)) encoder = Model(input_img, encode) X_train = encoder.predict(X_train) X_test = encoder.predict(X_test) stacked_encoder.append(encoder.layers[-1])
def simple_lstm(timesteps, num_features, num_pitches, num_units_lstm, dropout_prob=0.2): input_seq = Input((timesteps, num_features), name='input_seq') repr_input = input_seq repr_input = LSTM(num_units_lstm, return_sequences=True)(repr_input) repr_input = Dropout(dropout_prob)(repr_input) repr_input = LSTM(num_units_lstm, return_sequences=False)(repr_input) # NN output = Dense(num_units_lstm, activation='relu')(repr_input) output = Dense(num_pitches)(output) preds = Activation('softmax', name='label')(output) model = Model(input=[input_seq], output=preds) model.compile(optimizer='adam', loss={'label': 'categorical_crossentropy'}, metrics=['accuracy']) return model
def create_simple_bert_model_with_previous_sentence(input_vector_size, learning_rate, output_dimension): current_input = Input(shape=(input_vector_size, ), name='input') reshape = Reshape((1, input_vector_size))(current_input) lstm = LSTM(500, activation="relu", recurrent_dropout=0.2, return_sequences=False)(reshape) previous_input = Input(shape=(input_vector_size, ), name='previous_input') merged = concatenate([lstm, previous_input]) output = Dense(output_dimension, name='output', activation='softmax')(merged) optimizer = Adam(lr=learning_rate) model = Model(inputs=[current_input, previous_input], outputs=[output]) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy", f1]) return model
def get_model() -> Model: # here starts your task! # implement an ANN that solves the boston house prices task. # solving means we want to approximate an unknown function which, # given some features like house size, location etc. predicts it's price # prices are normalized to range [0;1] # there are some useful imports already, check the imports from config and the # different layers from keras.layers! input = Input(shape=(config.FEATURE_DIMENSIONALITY, )) hidden = Dense(units=256, activation='sigmoid')(input) hidden = Reshape(target_shape=(16, 16, 1))(hidden) hidden = Conv2D(filters=16, kernel_size=(4, 4))(hidden) hidden = Flatten()(hidden) output = Dense(units=1, activation='linear')(hidden) # create a new model by specifying input/output layer(s) model = Model(inputs=[input], outputs=[output]) # I already chose optimizer and loss function, you won't need to teak them (but you can of course!) model.compile(optimizer='sgd', loss='mse') return model
def get_model(base_model, layer, input_shape, classes, lr=1e-3, activation="sigmoid", dropout=None, pooling="avg", weights=None, pretrained="imagenet"): base = base_model(input_shape=input_shape, include_top=False, weights=pretrained) if pooling == "avg": x = GlobalAveragePooling2D()(base.output) elif pooling == "max": x = GlobalMaxPooling2D()(base.output) elif pooling is None: x = Flatten()(base.output) if dropout is not None: x = Dropout(dropout)(x) x = Dense(classes, activation=activation)(x) model = Model(inputs=base.input, outputs=x) if weights is not None: model.load_weights(weights) for l in model.layers[:layer]: l.trainable = False model.compile(loss='binary_crossentropy', metrics=["binary_accuracy", auc, precision, recall], optimizer=optimizers.Adam(lr)) return model
def hrnn_model(): input_section = Input(shape=(4096, ), dtype='float32') # 0.5 second flattened epoch_section = Reshape( (64, 64), input_shape=(4096, ))(input_section) # 0.5 second epoch section lstm1 = LSTM(32, return_sequences=True, name='recurrent_layer')(epoch_section) dense1 = TimeDistributed(Dense(32))(lstm1) # TimeDistributed to apply a Dense layer to each of the 32 timesteps, independently attention = AttentionWithContext()(dense1) #apply attention model_section = Model(inputs=input_section, outputs=attention) model_section.summary( ) # summary of LSTM model applied on each 0.5 second of a 5 sec epoch ################################################################################################################## model_input = Input(shape=(10, 4096), dtype='float32') section_output = TimeDistributed(model_section)(model_input) # "TimeDistributed layer"for sequentially feeding each 0.5 second of 5 second EEG epoch,(each section of 10 sections of 4096) lstm2 = LSTM(32, return_sequences=True)(section_output) lstm3 = TimeDistributed(Dense(32))(lstm2) attention_2 = AttentionWithContext()(lstm3) model_output = Dense(1, activation='sigmoid')(attention_2) model = Model(inputs=model_input, outputs=model_output) optimizer = Adam(lr=0.01) print('Compiling...') model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) print("model fitting - Hierachical LSTM") model.summary() return model
def test_lkrelu(self): batch_size = 32 num_classes = 10 (x_train, y_train), (x_test, y_test) = load_cifar10() inputs = Input(shape=x_train.shape[1:]) x = Conv2D(self.width, (3, 3))(inputs) x = LKReLU()(x) x = Flatten()(x) x = Dense(num_classes, activation='softmax', name='fc1000')(x) model = Model(inputs=inputs, outputs=x) print(model.summary()) opt = keras.optimizers.sgd() model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=self.metrics) log_dir = 'summaries/width-lkrelu-{}-cifar10-{}-{}'.format( self.width, self.seed, datetime.datetime.now()) model.fit( x_train, y_train, batch_size=batch_size, epochs=self.epochs, validation_data=(x_test, y_test), shuffle=False, callbacks=[ TensorBoard(log_dir=log_dir), # ModelCheckpoint( # 'checkpoints/width-lkrelu-cifar10-{epoch:02d}-{val_loss:.2f}.hdf5') ]) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1])
def bielugru(word_input_size, word_embedding_size, sequence_embedding_size, n_tags, word_dropout, rnn_dropout_W, rnn_dropout_U, l2, word_embedding_weights, **kwargs): # define network inputs: words only text_input = Input(shape=(None, ), dtype='int32', name='text_input') # map word indices to vector representation word_embeddings = Embedding(input_dim=word_input_size, output_dim=word_embedding_size, weights=word_embedding_weights, name="word_embeddings")(text_input) # drop small portion of input vectors word_embeddings = Dropout(word_dropout)(word_embeddings) sequence_embedding = word_embeddings # apply text level BIGRU bidirectional_tag_sequence_output = Bidirectional( ELUGRU(sequence_embedding_size / 2, return_sequences=True, dropout=rnn_dropout_W, recurrent_dropout=rnn_dropout_U), merge_mode="concat")(sequence_embedding) # project hidden states to IOB tags tag_sequence_output = TimeDistributed( Dense(n_tags, activation='softmax', kernel_regularizer=L1L2(l2=l2)), name="aspect_output")(bidirectional_tag_sequence_output) # construct Model object and compile model = Model(inputs=[text_input], outputs=[tag_sequence_output]) adam = Adam() model.compile(optimizer=adam, loss={'aspect_output': "categorical_crossentropy"}, sample_weight_mode="temporal") model._make_train_function() model._make_predict_function() return model,
def UndirectedSimpleNetwork(input_shape, pool_size=(2, 2, 2), n_labels=48, initial_learning_rate=0.00001, depth=2, n_base_filters=8, batch_normalization=False): '''the network takes in a volume and predicts the direction for the next trace point ''' inputs = Input(input_shape) current_layer = inputs layer1 = create_convolution_block(input_layer=current_layer, n_filters=n_base_filters, batch_normalization=batch_normalization) layer2 = create_convolution_block(input_layer=layer1, n_filters=n_base_filters, batch_normalization=batch_normalization) layer3 = MaxPooling3D(pool_size=pool_size)(layer2) layer4 = create_convolution_block(input_layer=layer3, n_filters=n_base_filters * 2, batch_normalization=batch_normalization) layer5 = create_convolution_block(input_layer=layer4, n_filters=n_base_filters * 2, batch_normalization=batch_normalization) layer6 = MaxPooling3D(pool_size=pool_size)(layer5) flat_layer = Flatten()(layer6) dense_layer1 = Dense(400, activation='relu', name='fc1')(flat_layer) dense_layer2 = Dense(n_labels, activation='sigmoid', name='fc2')(dense_layer1) model = Model(inputs=inputs, outputs=dense_layer2) metrics = ['acc', jaccard_coef_int] model.compile(optimizer=Adam(lr=initial_learning_rate), loss=binary_crossentropy_weighted, metrics=metrics) print(model.summary()) return model
def cnn_multi_filters(wv, sent_length, nfilters, nb_filters, **kwargs): noise = kwargs.get("noise", 0) trainable = kwargs.get("trainable", False) drop_text_input = kwargs.get("drop_text_input", 0.) drop_conv = kwargs.get("drop_conv", 0.) activity_l2 = kwargs.get("activity_l2", 0.) input_text = Input(shape=(sent_length,), dtype='int32') emb_text = embeddings_layer(max_length=sent_length, embeddings=wv, trainable=trainable, masking=False)(input_text) emb_text = GaussianNoise(noise)(emb_text) emb_text = Dropout(drop_text_input)(emb_text) pooling_reps = [] for i in nfilters: feat_maps = Convolution1D(nb_filter=nb_filters, filter_length=i, border_mode="valid", activation="relu", subsample_length=1)(emb_text) pool_vecs = MaxPooling1D(pool_length=2)(feat_maps) pool_vecs = Flatten()(pool_vecs) # pool_vecs = GlobalMaxPooling1D()(feat_maps) pooling_reps.append(pool_vecs) representation = concatenate(pooling_reps) representation = Dropout(drop_conv)(representation) probabilities = Dense(3, activation='softmax', activity_regularizer=l2(activity_l2))(representation) model = Model(input=input_text, output=probabilities) model.compile(optimizer="adam", loss='categorical_crossentropy') return model
def build_network_resnet(self): # Resnet Facenet (v2) print('[+] Building CNN') vgg_notop = VGGFace(model='resnet50', include_top=False, input_shape=(224, 224, 3), pooling='avg') last_layer = vgg_notop.get_layer('avg_pool').output x = Flatten(name='flatten')(last_layer) x = Dense(4096, activation='relu', name='fc6')(x) x = Dense(1024, activation='relu', name='fc7')(x) print("Emotions count", len(EMOTIONS)) l = 0 for layer in vgg_notop.layers: print(layer, "[" + str(l) + "]") l = l + 1 for i in range(101): vgg_notop.layers[i].trainable = False out = Dense(6, activation='softmax', name='classifier')(x) custom_resnet = Model(vgg_notop.input, out) optim = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0) #optim = keras.optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) custom_resnet.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) plot_model(custom_resnet, to_file='model2.png', show_shapes=True) self.model = custom_resnet
def simple_second_model(): # Define and create a simple Conv2D model n = 8 input_tensor = Input(INPUT_SHAPE[1:]) x = Convolution2D(1024, 3, 3)(input_tensor) x = Convolution2D(2048, 3, 3)(x) list_covs = SeparateConvolutionFeatures(n)(x) list_covs = Regrouping(None)(list_covs) list_outputs = [] for cov in list_covs: cov = SecondaryStatistic()(cov) cov = O2Transform(100)(cov) cov = O2Transform(100)(cov) list_outputs.append(WeightedVectorization(10)(cov)) x = merge(list_outputs, mode='concat') x = Dense(10)(x) model = Model(input_tensor, x) model.compile(optimizer='sgd', loss='categorical_crossentropy') model.summary() return model
def testnet_model(input_shape=(4, 128, 128, 128), optimizer=Adam, initial_learning_rate=5e-4, activation_name="sigmoid", **kwargs): inputs_1 = Input(input_shape) #inputs_2 = Input(input_shape) #pool_1 = testnet_backbone(inputs_1, n_base_filters) #pool_2 = testnet_backbone(inputs_2, n_base_filters) #sf_add = concatenate([pool_1, pool_2], axis=1) sf_return = siam3dunet_backbone(inputs_1, mask_name='mask1', **kwargs) sf_add = GlobalAveragePooling3D()(sf_return) out_pred_score = Dense(1, activation=None)(sf_add) out_pred_score = Lambda(print_output, arguments={'msg': ' output'})(out_pred_score) out_pred_score = Activation(activation_name)(out_pred_score) out_pred_score = Lambda(print_output, arguments={'msg': ' output sigmoid'}, name='score')(out_pred_score) print(initial_learning_rate) print(activation_name) model = Model(inputs=inputs_1, outputs=out_pred_score) #model = Model(inputs=[inputs_1, inputs_2], outputs=out_pred_score) model.compile(optimizer=SGD(lr=initial_learning_rate, momentum=0.9), loss='binary_crossentropy', metrics=['accuracy']) #model.compile(optimizer=optimizer(lr=initial_learning_rate), loss={'score':'binary_crossentropy'}, metrics=['accuracy']) #model.metrics_tensors += model.outputs return model
def nn_architecture_seg_3d(input_shape, pool_size=(2, 2, 2), n_labels=1, initial_learning_rate=0.00001, depth=3, n_base_filters=16, metrics=dice_coefficient, batch_normalization=True): inputs = Input(input_shape) current_layer = inputs levels = list() for layer_depth in range(depth): layer1 = create_convolution_block(input_layer=current_layer, n_filters=n_base_filters * (2**layer_depth), batch_normalization=batch_normalization) layer2 = create_convolution_block(input_layer=layer1, n_filters=n_base_filters * (2**layer_depth) * 2, batch_normalization=batch_normalization) if layer_depth < depth - 1: current_layer = MaxPooling3D(pool_size=pool_size)(layer2) levels.append([layer1, layer2, current_layer]) else: current_layer = layer2 levels.append([layer1, layer2]) for layer_depth in range(depth - 2, -1, -1): up_convolution = UpSampling3D(size=pool_size) concat = concatenate([up_convolution, levels[layer_depth][1]], axis=1) current_layer = create_convolution_block(n_filters=levels[layer_depth][1]._keras_shape[1], input_layer=concat, batch_normalization=batch_normalization) current_layer = create_convolution_block(n_filters=levels[layer_depth][1]._keras_shape[1], input_layer=current_layer, batch_normalization=batch_normalization) final_convolution = Conv3D(n_labels, (1, 1, 1))(current_layer) act = Activation('sigmoid')(final_convolution) model = Model(inputs=inputs, outputs=act) if not isinstance(metrics, list): metrics = [metrics] model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coefficient_loss, metrics=metrics) return model
def get_model(self): main_input = Input(shape=(self.MAX_LENGTH, ), name="main_input") embedded = Embedding(len(self.w2idx), self.embedding_size, mask_zero=True)(main_input) relation_input = Input(shape=(len(c.RELATIONS), ), name="relation_input") relation_matrix = Dense(self.embedding_size, input_shape=(None, len(c.RELATIONS)))(relation_input) merge_l = add([embedded, relation_matrix]) blstm_l1 = Bidirectional(LSTM(self.lstm_size, return_sequences=True))(merge_l) blstm_l2 = Bidirectional(LSTM(self.lstm_size, return_sequences=True))(blstm_l1) output_l = TimeDistributed(Dense(len(c.entity_tags), use_bias=True))(blstm_l2) dropout_out_l = Dropout(self.dropout)(output_l) activation = Activation(activation="softmax")(dropout_out_l) model = Model(inputs=[main_input, relation_input], outputs=[activation]) # learning rate = 0.001 opt = RMSprop() model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) model.summary() return model
def create_bilstm_model(input_size, we_matrix, learning_rate, emb_training, output_dimension): input_dimension = we_matrix.shape[0] embedding_dimension = we_matrix.shape[1] my_input = Input(shape=(input_size, ), name='input') emb = Embedding(input_dim=input_dimension, weights=[we_matrix], output_dim=embedding_dimension, input_length=input_size, trainable=emb_training)(my_input) reshape = Reshape((input_size, embedding_dimension))(emb) lstm = Bidirectional( LSTM(100, activation="relu", recurrent_dropout=0.2, return_sequences=False))(reshape) d_1 = Dense(256, activation='relu')(lstm) drop_2 = Dropout(0.2)(d_1) output = Dense(output_dimension, name='output', activation='softmax')(drop_2) optimizer = Adam(lr=learning_rate) model = Model(inputs=[my_input], outputs=[output]) # model.layers[1].trainable = False model.summary() # model.compile(loss='binary_crossentropy', optimizer='adam', metrics=["accuracy", f1]) # learning rate 0.001 model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy", f1]) return model
def vgg16_model(img_width, img_height, nb_epoch, nb_classes): base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3)) # load dataset (X_train, y_train), (X_test, y_test) = cifar10.load_data() y_train = np_utils.to_categorical(y_train, nb_classes) y_test = np_utils.to_categorical(y_test, nb_classes) # Extract the last layer from third block of vgg16 model last = base_model.get_layer('block5_pool').output # Add classification layers on top of it x = Flatten()(last) x = BatchNormalization()(x) x = Dense(256, activation='relu')(x) x = Dropout(0.5)(x) output = Dense(10, activation='softmax')(x) model = Model(base_model.input, output) # model.summary() # model compile & fit model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-3, momentum=0.9), metrics=['accuracy']) model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=nb_epoch, batch_size=100, verbose=1) return model
def get_model( embedding_W, dataname, model_type="cnn2cnn", ): assert model_type in ['cnn2cnn', 'cnn2rnn'], 'type in [cnn2cnn,cnn2rnn]' #================sentence======================== sentence_input = Input(shape=(MAX_SENT_LENGTH.get(dataname),), dtype='int32') # word embedding 层 embedding_layer = Embedding(embedding_W.shape[0], EMBEDDING_DIM, weights=[embedding_W], trainable=True) embedding_sequences = embedding_layer(sentence_input) sents_representation = conv_atten(embedding_sequences, nb_filter, filter_lengths,latten_range) sent_model = Model(sentence_input, sents_representation, name='sentModel') # ==============================document================ doc_input = Input(shape=(None, MAX_SENT_LENGTH.get(dataname)), dtype='int32') sent_sequences = TimeDistributed(sent_model)(doc_input) if model_type=='cnn2cnn': doc_representation = conv_atten(sent_sequences,nb_filter,filter_lengths,atten_range=latten_range) elif model_type=='cnn2rnn': doc_representation = lstm_atten(sent_sequences, out_dim=gru_out_dim) fc_out = Dense(units=fc_hidden_dims, activation='relu', name='fcLayer')(doc_representation) fc_out = Dropout(0.5)(fc_out) preds = Dense(classes, activation='softmax')(fc_out) model = Model(doc_input, preds) model.compile( loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'] ) return model
class SmallRes(SiameseNetwork, object): def __init__(self, imageShape, featureShape, name, learningRate): self.learningRate = learningRate self.modelName = name convnet = Sequential() convnet.add(Conv2D(32, (3, 3), padding='same', input_shape=imageShape)) convnet.add(Activation('relu')) convnet.add(Conv2D(32, (3, 3))) convnet.add(Activation('relu')) convnet.add(MaxPooling2D(pool_size=(2, 2))) convnet.add(Dropout(0.25)) convnet.add(Conv2D(64, (3, 3), padding='same')) convnet.add(Activation('relu')) convnet.add(Conv2D(64, (3, 3))) convnet.add(Activation('relu')) convnet.add(MaxPooling2D(pool_size=(2, 2))) convnet.add(Dropout(0.25)) convnet.add(Flatten()) convnet.add(Dense(featureShape[0])) convnet.add(Activation('relu')) left_input = Input(imageShape) right_input = Input(imageShape) encoded_l = convnet(left_input) encoded_r = convnet(right_input) L1_layer = Lambda(lambda tensors:K.abs(tensors[0] - tensors[1])) L1_distance = L1_layer([encoded_l, encoded_r]) hidden = Dense(512, activation='relu')(L1_distance) hidden2 = Dense(64, activation='relu')(hidden) prediction = Dense(1, activation='sigmoid')(hidden2) self.siamese_net = Model(inputs=[left_input, right_input], outputs=prediction) # Compile and prepare network self.siamese_net.compile(loss="binary_crossentropy", optimizer=Adadelta(self.learningRate), metrics=['accuracy'])
def emb_create_image_gan_merge(config): print "Generating image gan MERGE" gan_image_input = Input(shape=(config[Conf.IMAGE_DIM], ), name="gan_model_image_input") # Generator g_lstm_noise_input = Input(shape=(config[Conf.NOISE_SIZE], ), name="g_model_lstm_noise_input") g_merge = merge([gan_image_input, g_lstm_noise_input], mode='concat') g_lstm_input = RepeatVector(config[Conf.MAX_SEQ_LENGTH])(g_merge) g_tensor = LSTM(500, return_sequences=True, consume_less='gpu')(g_lstm_input) g_tensor = TimeDistributed( Dense(config[Conf.EMBEDDING_SIZE], activation='tanh'))(g_tensor) g_model = Model(input=[gan_image_input, g_lstm_noise_input], output=g_tensor) # Discriminator d_lstm_input = Input(shape=(config[Conf.MAX_SEQ_LENGTH], config[Conf.EMBEDDING_SIZE]), name="d_model_lstm_input") d_lstm_out = LSTM(100, dropout_W=0.25, dropout_U=0.25, consume_less='gpu')(d_lstm_input) # img_input = Input(shape=(config[Conf.IMAGE_DIM],), name="d_model_img_input") d_tensor = merge([gan_image_input, d_lstm_out], mode='concat') d_tensor = Dropout(0.25)(d_tensor) d_tensor = Dense(1, activation='sigmoid')(d_tensor) d_model = Model(input=[gan_image_input, d_lstm_input], output=d_tensor, name="d_model") # GAN gan_tensor = d_model([gan_image_input, g_tensor]) gan_model = Model(input=[gan_image_input, g_lstm_noise_input], output=gan_tensor) g_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy']) d_model.compile(loss='binary_crossentropy', optimizer="sgd", metrics=['accuracy']) gan_model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['accuracy']) # from keras.utils.visualize_util import plot # plot(g_model, to_file="g_model.png", show_shapes=True) # plot(d_model, to_file="d_model.png", show_shapes=True) # plot(gan_model, to_file="gan_model.png", show_shapes=True) return g_model, d_model, gan_model
class MinimalKerasCNN(KerasModel): def load(self, kwargs): """ Parameters ---------- depth : int, optional Specified the layers deep the proposed U-Net should go. Layer depth is symmetric on both upsampling and downsampling arms. max_filter: int, optional Specifies the number of filters at the bottom level of the U-Net. """ super(MinimalKerasCNN, self).load(kwargs) add_parameter(self, kwargs, 'filter_size', 1) def build_model(self): output_layer = DnConv(self.inputs, 1, self.kernel_size, stride_size=(1, ) * self.dim, dim=self.dim, name='minimal_conv', backend='keras') # TODO: Brainstorm better way to specify outputs if self.input_tensor is not None: return output_layer if self.output_type == 'regression': self.model = Model(inputs=self.inputs, outputs=output_layer) self.model.compile(optimizer=Nadam(lr=self.initial_learning_rate), loss='mean_squared_error', metrics=['mean_squared_error']) if self.output_type == 'binary_label': act = Activation('sigmoid')(output_layer) self.model = Model(inputs=self.inputs, outputs=act) self.model.compile(optimizer=Nadam(lr=self.initial_learning_rate), loss=dice_coef_loss, metrics=[dice_coef]) if self.output_type == 'categorical_label': act = Activation('softmax')(output_layer) self.model = Model(inputs=self.inputs, outputs=act) self.model.compile(optimizer=Nadam(lr=self.initial_learning_rate), loss='categorical_crossentropy', metrics=['categorical_accuracy']) super(MinimalKerasCNN, self).build() return self.model
init='uniform', border_mode='same', dim_ordering='tf')(segmentation) segmentation = Reshape((img_rows_segment, img_cols_segment))(segmentation) model_segment = Model(input=images_segment, output=segmentation) model_segment.summary() print('') print('model init time: {}'.format(time.time() - start_time)) start_time = time.time() model_segment.compile(optimizer='rmsprop', loss=binaryCE, metrics=[dice_coeff]) print('model compile time: {}'.format(time.time() - start_time)) print('') ############################################################################# # TRAINING batch_size = 48 nb_epoch = 100 # Model saving callback checkpointer = ModelCheckpoint(filepath=WEIGHTS_SEGMENT_FILEPATH, verbose=1, save_best_only=True)
def create_model(self, embedding_dimensions, lstm_dimensions, dense_dimensions, optimizer, embeddings=None, embeddings_trainable=True): """ creates the neural network model, optionally using precomputed embeddings applied to the training data :return: """ num_words = len(self.tokenizer.word_index) logger.info('Creating a model based on %s unique tokens.', num_words) # create the shared embedding layer (with or without pre-trained weights) embedding_shared = None if embeddings is None: embedding_shared = Embedding(num_words + 1, embedding_dimensions, input_length=None, mask_zero=True, trainable=embeddings_trainable, name="embedding_shared") else: logger.info('Importing pre-trained word embeddings.') embeddings_index = load_embeddings(embeddings) # indices in word_index start with a 1, 0 is reserved for masking padded value embedding_matrix = np.zeros((num_words + 1, embedding_dimensions)) for word, i in self.tokenizer.word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector else: logger.warning('Word not found in embeddings: %s', word) embedding_shared = Embedding(num_words + 1, embedding_dimensions, input_length=None, mask_zero=True, trainable=embeddings_trainable, weights=[embedding_matrix], name="embedding_shared") input_state = Input(batch_shape=(None, None), name="input_state") input_action = Input(batch_shape=(None, None), name="input_action") embedding_state = embedding_shared(input_state) embedding_action = embedding_shared(input_action) lstm_shared = LSTM(lstm_dimensions, name="lstm_shared") lstm_state = lstm_shared(embedding_state) lstm_action = lstm_shared(embedding_action) dense_state = Dense(dense_dimensions, activation='tanh', name="dense_state")(lstm_state) dense_action = Dense(dense_dimensions, activation='tanh', name="dense_action")(lstm_action) model_state = Model(inputs=input_state, outputs=dense_state, name="state") model_action = Model(inputs=input_action, outputs=dense_action, name="action") self.model_state = model_state self.model_action = model_action input_dot_state = Input(shape=(dense_dimensions,)) input_dot_action = Input(shape=(dense_dimensions,)) dot_state_action = Dot(axes=-1, normalize=True, name="dot_state_action")([input_dot_state, input_dot_action]) model_dot_state_action = Model(inputs=[input_dot_state, input_dot_action], outputs=dot_state_action, name="dot_state_action") self.model_dot_state_action = model_dot_state_action model = Model(inputs=[model_state.input, model_action.input], outputs=model_dot_state_action([model_state.output, model_action.output]), name="model") model.compile(optimizer=optimizer, loss='mse') self.model = model print('---------------') print('Complete model:') model.summary() print('---------------')
def train_label_none_label_classification(label_folder, non_label_folder, model_file=None): c = Config() # Build or load model if model_file is None: # create model img_input = Input(shape=(28, 28, 3)) # prediction = model_cnn_2_layer.nn_classify_label_non_label(img_input) # prediction = model_cnn_3_layer.nn_classify_label_non_label(img_input) prediction = nn_cnn_3_layer.nn_classify_label_non_label(img_input) model = Model(inputs=img_input, outputs=prediction) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy']) else: model = load_model(model_file) model.summary() # Load and normalize data x_train, y_train, x_test, y_test = load_train_validation_data(label_folder, non_label_folder) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train[:, :, :, 0] -= c.img_channel_mean[0] x_train[:, :, :, 1] -= c.img_channel_mean[1] x_train[:, :, :, 2] -= c.img_channel_mean[2] x_test[:, :, :, 0] -= c.img_channel_mean[0] x_test[:, :, :, 1] -= c.img_channel_mean[1] x_test[:, :, :, 2] -= c.img_channel_mean[2] x_train /= 255 x_test /= 255 print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # x_train.reshape(x_train.shape[0], 28, 28, 3) # x_test.reshape(x_test.shape[0], 28, 28, 3) # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, 2) y_test = keras.utils.to_categorical(y_test, 2) # Checkpointing is to save the network weights only when there is an improvement in classification accuracy # on the validation dataset (monitor=’val_acc’ and mode=’max’). file_path = "weights-improvement-{epoch:04d}-{val_acc:.4f}.hdf5" checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model.fit(x_train, y_train, batch_size=128, epochs=100, verbose=1, callbacks=callbacks_list, validation_data=(x_test, y_test) ) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) model.save('final_model.h5')
def unet_model_3d(input_shape, pool_size=(2, 2, 2), n_labels=1, initial_learning_rate=0.00001, deconvolution=False, depth=4, n_base_filters=32, include_label_wise_dice_coefficients=False, metrics=dice_coefficient, batch_normalization=False, activation_name="sigmoid"): """ Builds the 3D UNet Keras model.f :param metrics: List metrics to be calculated during model training (default is dice coefficient). :param include_label_wise_dice_coefficients: If True and n_labels is greater than 1, model will report the dice coefficient for each label as metric. :param n_base_filters: The number of filters that the first layer in the convolution network will have. Following layers will contain a multiple of this number. Lowering this number will likely reduce the amount of memory required to train the model. :param depth: indicates the depth of the U-shape for the model. The greater the depth, the more max pooling layers will be added to the model. Lowering the depth may reduce the amount of memory required for training. :param input_shape: Shape of the input data (n_chanels, x_size, y_size, z_size). The x, y, and z sizes must be divisible by the pool size to the power of the depth of the UNet, that is pool_size^depth. :param pool_size: Pool size for the max pooling operations. :param n_labels: Number of binary labels that the model is learning. :param initial_learning_rate: Initial learning rate for the model. This will be decayed during training. :param deconvolution: If set to True, will use transpose convolution(deconvolution) instead of up-sampling. This increases the amount memory required during training. :return: Untrained 3D UNet Model """ inputs = Input(input_shape) current_layer = inputs levels = list() # add levels with max pooling for layer_depth in range(depth): layer1 = create_convolution_block(input_layer=current_layer, n_filters=n_base_filters*(2**layer_depth), batch_normalization=batch_normalization) layer2 = create_convolution_block(input_layer=layer1, n_filters=n_base_filters*(2**layer_depth)*2, batch_normalization=batch_normalization) if layer_depth < depth - 1: current_layer = MaxPooling3D(pool_size=pool_size)(layer2) levels.append([layer1, layer2, current_layer]) else: current_layer = layer2 levels.append([layer1, layer2]) # add levels with up-convolution or up-sampling for layer_depth in range(depth-2, -1, -1): up_convolution = get_up_convolution(pool_size=pool_size, deconvolution=deconvolution, n_filters=current_layer._keras_shape[1])(current_layer) concat = concatenate([up_convolution, levels[layer_depth][1]], axis=1) current_layer = create_convolution_block(n_filters=levels[layer_depth][1]._keras_shape[1], input_layer=concat, batch_normalization=batch_normalization) current_layer = create_convolution_block(n_filters=levels[layer_depth][1]._keras_shape[1], input_layer=current_layer, batch_normalization=batch_normalization) final_convolution = Conv3D(n_labels, (1, 1, 1))(current_layer) act = Activation(activation_name)(final_convolution) model = Model(inputs=inputs, outputs=act) if not isinstance(metrics, list): metrics = [metrics] if include_label_wise_dice_coefficients and n_labels > 1: label_wise_dice_metrics = [get_label_dice_coefficient_function(index) for index in range(n_labels)] if metrics: metrics = metrics + label_wise_dice_metrics else: metrics = label_wise_dice_metrics model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coefficient_loss, metrics=metrics) return model
embedded_sequences = embedding_layer(sequence_input) cnns = [] for filter_length in filter_lengths: x = Conv1D(nb_filter=nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', W_constraint=maxnorm(3), W_regularizer=l2(0.0001), subsample_length=1)(embedded_sequences) x = MaxPooling1D(pool_length=MAX_SEQUENCE_LENGTH - filter_length + 1)(x) x = Flatten()(x) cnns.append(x) x = merge(cnns, mode='concat') x = Dropout(0.2)(x) x = Dense(128, activation='relu')(x) preds = Dense(len(labels_index), activation='softmax')(x) model = Model(sequence_input, preds) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # happy learning! model.fit(x_train, y_train, validation_data=(x_val, y_val), nb_epoch=5, batch_size=128)
def unet_model_3d(input_shape, downsize_filters_factor=1, pool_size=(2, 2, 2), n_labels=1, initial_learning_rate=0.00001, deconvolution=False): """ Builds the 3D UNet Keras model. ## ORIGINAL: :param input_shape: Shape of the input data (n_chanels, x_size, y_size, z_size). ## NOW: :param input_shape: Shape of the input data (x_size, y_size, z_size, n_chanels) :param downsize_filters_factor: Factor to which to reduce the number of filters. Making this value larger will reduce the amount of memory the model will need during training. :param pool_size: Pool size for the max pooling operations. :param n_labels: Number of binary labels that the model is learning. :param initial_learning_rate: Initial learning rate for the model. This will be decayed during training. :param deconvolution: If set to True, will use transpose convolution(deconvolution) instead of upsamping. This increases the amount memory required during training. :return: Untrained 3D UNet Model """ inputs = Input(input_shape) conv1 = Conv3D(int(32/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(inputs) conv1 = Conv3D(int(64/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv1) pool1 = MaxPooling3D(pool_size=pool_size)(conv1) conv2 = Conv3D(int(64/downsize_filters_factor), (1, 1, 1), activation='relu', padding='same')(pool1) conv2 = Conv3D(int(128/downsize_filters_factor), (1, 1, 1), activation='relu', padding='same')(conv2) """ pool2 = MaxPooling3D(pool_size=pool_size)(conv2) conv3 = Conv3D(int(128/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool2) conv3 = Conv3D(int(256/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv3) pool3 = MaxPooling3D(pool_size=pool_size)(conv3) conv4 = Conv3D(int(256/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool3) conv4 = Conv3D(int(512/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv4) """ """ up5 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=2, nb_filters=int(512/downsize_filters_factor), image_shape=input_shape[1:4])(conv4) up5 = concatenate([up5, conv3], axis=-1) conv5 = Conv3D(int(256/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up5) conv5 = Conv3D(int(256/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv5) up6 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=1, nb_filters=int(256/downsize_filters_factor), image_shape=input_shape[1:4])(conv5) up6 = concatenate([up6, conv2], axis=-1) conv6 = Conv3D(int(128/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up6) conv6 = Conv3D(int(128/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv6) """ up7 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=0, nb_filters=int(128/downsize_filters_factor), image_shape=input_shape[1:4])(conv2) up7 = concatenate([up7, conv1], axis=-1) conv7 = Conv3D(int(64/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up7) conv7 = Conv3D(int(64/downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv7) conv8 = Conv3D(n_labels, (1, 1, 1))(conv7) # Shoudl be softmax? act = Activation('softmax')(conv8) model = Model(inputs=inputs, outputs=act) model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coef_loss, metrics=[dice_coef]) model.summary() return model
def rpn_initialize(options): config_output_filename = options.config_filename with open(config_output_filename, 'rb') as f_in: c = pickle.load(f_in) import nn_cnn_3_layer as nn # turn off any data augmentation at test time c.use_horizontal_flips = False c.use_vertical_flips = False c.rot_90 = False img_list_path = options.test_path class_mapping = c.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.items()} print(class_mapping) class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} c.num_rois = int(options.num_rois) # if c.network == 'resnet50': # num_features = 1024 # elif c.network == 'vgg': # num_features = 512 input_shape_img = (None, None, 3) # input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) # roi_input = Input(shape=(c.num_rois, 4)) # feature_map_input = Input(shape=input_shape_features) # define the base network shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(c.anchor_box_scales) * len(c.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) # classifier = nn.classifier(feature_map_input, roi_input, c.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) # model_classifier_only = Model([feature_map_input, roi_input], classifier) # model_classifier = Model([feature_map_input, roi_input], classifier) print('Loading weights from {}'.format(options.rpn_weight_path)) model_rpn.load_weights(options.rpn_weight_path, by_name=True) # model_classifier.load_weights(c.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') # model_classifier.compile(optimizer='sgd', loss='mse') model_rpn.summary() model_classifier = load_model(options.classify_model_path) model_classifier.summary() all_imgs = [] classes = {} bbox_threshold = 0.8 visualise = True return c, model_rpn, model_classifier
def train_rpn(model_file=None): parser = OptionParser() parser.add_option("--train_path", dest="train_path", help="Path to training data.", default='/Users/jie/projects/PanelSeg/ExpPython/train.txt') parser.add_option("--val_path", dest="val_path", help="Path to validation data.", default='/Users/jie/projects/PanelSeg/ExpPython/eval.txt') parser.add_option("--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=32) parser.add_option("--network", dest="network", help="Base network to use. Supports nn_cnn_3_layer.", default='nn_cnn_3_layer') parser.add_option("--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000) parser.add_option("--output_weight_path", dest="output_weight_path", help="Output path for weights.", default='./model_rpn.hdf5') parser.add_option("--input_weight_path", dest="input_weight_path", default='/Users/jie/projects/PanelSeg/ExpPython/models/label+bg_rpn_3_layer_color-0.135.hdf5') (options, args) = parser.parse_args() # set configuration c = Config.Config() c.model_path = options.output_weight_path c.num_rois = int(options.num_rois) import nn_cnn_3_layer as nn c.base_net_weights = options.input_weight_path val_imgs, val_classes_count = get_label_rpn_data(options.val_path) train_imgs, train_classes_count = get_label_rpn_data(options.train_path) classes_count = {k: train_classes_count.get(k, 0) + val_classes_count.get(k, 0) for k in set(train_classes_count) | set(val_classes_count)} class_mapping = LABEL_MAPPING if 'bg' not in classes_count: classes_count['bg'] = 0 class_mapping['bg'] = len(class_mapping) c.class_mapping = class_mapping inv_map = {v: k for k, v in class_mapping.items()} print('Training images per class:') pprint.pprint(classes_count) print('Num classes (including bg) = {}'.format(len(classes_count))) config_output_filename = 'config.pickle' with open(config_output_filename, 'wb') as config_f: pickle.dump(c, config_f) print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format( config_output_filename)) random.shuffle(train_imgs) random.shuffle(val_imgs) num_imgs = len(train_imgs) + len(val_imgs) print('Num train samples {}'.format(len(train_imgs))) print('Num val samples {}'.format(len(val_imgs))) data_gen_train = label_rcnn_data_generators.get_anchor_gt( train_imgs, classes_count, c, nn.nn_get_img_output_length, mode='train') data_gen_val = label_rcnn_data_generators.get_anchor_gt( val_imgs, classes_count, c, nn.nn_get_img_output_length, mode='val') input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) # roi_input = Input(shape=(None, 4)) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(c.anchor_box_scales) * len(c.anchor_box_ratios) rpn = nn.rpn(shared_layers, num_anchors) # classifier = nn.classifier(shared_layers, roi_input, c.num_rois, nb_classes=len(classes_count), trainable=True) model_rpn = Model(img_input, rpn[:2]) # model_classifier = Model([img_input, roi_input], classifier) # this is a model that holds both the RPN and the classifier, used to load/save weights for the models # model_all = Model([img_input, roi_input], rpn[:2] + classifier) print('loading weights from {}'.format(c.base_net_weights)) model_rpn.load_weights(c.base_net_weights, by_name=True) # model_classifier.load_weights(c.base_net_weights, by_name=True) model_rpn.summary() optimizer = Adam(lr=1e-5) # optimizer_classifier = Adam(lr=1e-5) model_rpn.compile(optimizer=optimizer, loss=[nn.rpn_loss_cls(num_anchors), nn.rpn_loss_regr(num_anchors)]) # model_classifier.compile(optimizer=optimizer_classifier, # loss=[nn.class_loss_cls, nn.class_loss_regr(len(classes_count) - 1)], # metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'}) # model_all.compile(optimizer='sgd', loss='mae') epoch_length = 1000 num_epochs = int(options.num_epochs) iter_num = 0 losses = np.zeros((epoch_length, 5)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] start_time = time.time() best_loss = np.Inf class_mapping_inv = {v: k for k, v in class_mapping.items()} print('Starting training') vis = True for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: try: if len(rpn_accuracy_rpn_monitor) == epoch_length and c.verbose: mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format( mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.') X, Y, img_data = next(data_gen_train) loss_rpn = model_rpn.train_on_batch(X, Y) P_rpn = model_rpn.predict_on_batch(X) R = label_rcnn_roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], c, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = label_rcnn_roi_helpers.calc_iou(R, img_data, c, class_mapping) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: neg_samples = neg_samples[0] else: neg_samples = [] if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] rpn_accuracy_rpn_monitor.append(len(pos_samples)) rpn_accuracy_for_epoch.append((len(pos_samples))) if c.num_rois > 1: if len(pos_samples) < c.num_rois // 2: selected_pos_samples = pos_samples.tolist() else: selected_pos_samples = np.random.choice(pos_samples, c.num_rois // 2, replace=False).tolist() try: selected_neg_samples = np.random.choice(neg_samples, c.num_rois - len(selected_pos_samples), replace=False).tolist() except: selected_neg_samples = np.random.choice(neg_samples, c.num_rois - len(selected_pos_samples), replace=True).tolist() sel_samples = selected_pos_samples + selected_neg_samples else: # in the extreme case where num_rois = 1, we pick a random pos or neg sample selected_pos_samples = pos_samples.tolist() selected_neg_samples = neg_samples.tolist() if np.random.randint(0, 2): sel_samples = random.choice(neg_samples) else: sel_samples = random.choice(pos_samples) # loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]], # [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]) losses[iter_num, 0] = loss_rpn[1] losses[iter_num, 1] = loss_rpn[2] # losses[iter_num, 2] = loss_class[1] # losses[iter_num, 3] = loss_class[2] # losses[iter_num, 4] = loss_class[3] iter_num += 1 progbar.update(iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1]))]) # progbar.update(iter_num, # [('rpn_cls', np.mean(losses[:iter_num, 0])), # ('rpn_regr', np.mean(losses[:iter_num, 1])), # ('detector_cls', np.mean(losses[:iter_num, 2])), # ('detector_regr', np.mean(losses[:iter_num, 3]))]) if iter_num == epoch_length: loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) # loss_class_cls = np.mean(losses[:, 2]) # loss_class_regr = np.mean(losses[:, 3]) # class_acc = np.mean(losses[:, 4]) mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) rpn_accuracy_for_epoch = [] if c.verbose: print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format( mean_overlapping_bboxes)) # print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc)) print('Loss RPN classifier: {}'.format(loss_rpn_cls)) print('Loss RPN regression: {}'.format(loss_rpn_regr)) # print('Loss Detector classifier: {}'.format(loss_class_cls)) # print('Loss Detector regression: {}'.format(loss_class_regr)) print('Elapsed time: {}'.format(time.time() - start_time)) curr_loss = loss_rpn_cls + loss_rpn_regr # curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr iter_num = 0 start_time = time.time() if curr_loss < best_loss: if c.verbose: print('Total loss decreased from {} to {}, saving weights'.format(best_loss, curr_loss)) best_loss = curr_loss model_rpn.save_weights(c.model_path) # model_all.save_weights(c.model_path) break except Exception as e: print('Exception: {}'.format(e)) continue print('Training complete, exiting.')
def complex_model2(input_length, output_levels, stride, receptive_field, nb_filters_, loading=False, path=""): fnn_init = 'he_uniform' def residual_block(input_): original = input_ tanh_ = AtrousConvolution1D( nb_filter=nb_filters_, filter_length=2, atrous_rate=2**i, init=fnn_init, border_mode='valid', bias=False, causal=True, activation='tanh', name='AtrousConv1D_%d_tanh' % (2**i) )(input_) sigmoid_ = AtrousConvolution1D( nb_filter=nb_filters_, filter_length=2, atrous_rate=2**i, init=fnn_init, border_mode='valid', bias=False, causal=True, activation='sigmoid', name='AtrousConv1D_%d_sigm' % (2**i) )(input_) input_ = Merge(mode='mul')([tanh_, sigmoid_]) res_x = Convolution1D(nb_filter=nb_filters_, filter_length=1, border_mode='same', bias=False)(input_) skip_c = res_x res_x = Merge(mode='sum')([original, res_x]) return res_x, skip_c input = Input(shape=(input_length, output_levels), name='input_part') skip_connections = [] output = input output = AtrousConvolution1D( nb_filter=nb_filters_, filter_length=2, atrous_rate=1, init=fnn_init, activation='relu', border_mode='valid', causal=True, name='initial_AtrousConv1D' )(output) for i in range( int(np.log2( receptive_field ) ) ): output, skip_c = residual_block(output) skip_connections.append(skip_c) out = Merge(mode='sum')(skip_connections) for _ in range(2): out = Activation('relu')(out) out = Convolution1D(output_levels, 1, activation=None, border_mode='same')(out) out = Activation('softmax', name='output_softmax')(out) #out = Reshape((dim1, nb_filters_*dim2))(out) output = TimeDistributed(Dense(output_dim=output_levels, init=fnn_init, activation='softmax'))(out) m = Model(input, output) if loading: m.load_weights(path) print "Weights loaded!" #ADAM = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-05, decay=0.0) m.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return m
# Learning rate reduction # When we see that the validation loss stopped improving, so that we can start # learning faster and then get more precision rp_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.000001, verbose=1) callbacks.append(rp_callback) # In[ ]: model.compile(optimizer=optimizer, loss=loss, metrics=metrics) model.fit(x=train_dataset, epochs=50, steps_per_epoch=len(train_gen), validation_data=valid_dataset, validation_steps=len(valid_gen), callbacks=callbacks) # In[ ]: # Save the model now = datetime.now().strftime('%b%d_%H-%M-%S') print(str(now)) model_name = os.path.join(models_dir, str(now)) classification_name = str(prediction_dir) + '/' +str(now) model.save(model_name)
def unormalise(x): # outputs in range [0, 1] resized to range [-100, 100] return (x * 200) - 100 last = Lambda(resize_image)(last) last = Lambda(unormalise)(last) def custom_mse(y_true, y_pred): return K.mean(K.square(y_pred - y_true), axis=[1, 2, 3]) model = Model(inputs=[main_input, vgg16.input], output=last) opt = optimizers.Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(optimizer=opt, loss=custom_mse) model.summary() start_from = 75 save_every_n_epoch = 5 n_epochs = 10000 model.load_weights("../weights/implementation7d-relu-70.h5") # start image downloader # ip = ImagePacker("../small_dataset", "../h5_data", "imp7d-relu-", num_images=1024, num_files=None) # ip.start() ip = None g = h5_small_vgg_generator(b_size, "../h5_data", ip) gval = h5_small_vgg_generator(b_size, "../h5_validate", None)
def main(): # parse arguments parser = argparse.ArgumentParser() parser.add_argument('--timesteps', help="model's range (default: %(default)s)", type=int, default=16) parser.add_argument( '-b', '--batch_size_train', help='batch size used during training phase (default: %(default)s)', type=int, default=128) parser.add_argument( '-s', '--samples_per_epoch', help='number of samples per epoch (default: %(default)s)', type=int, default=12800 * 7) parser.add_argument( '--num_val_samples', help='number of validation samples (default: %(default)s)', type=int, default=1280) parser.add_argument('-u', '--num_units_lstm', nargs='+', help='number of lstm units (default: %(default)s)', type=int, default=[200, 200]) parser.add_argument( '-d', '--num_dense', help='size of non recurrent hidden layers (default: %(default)s)', type=int, default=200) parser.add_argument('-n', '--name', help='model name (default: %(default)s)', choices=['deepbach', 'skip', 'norelu'], type=str, default='skip') parser.add_argument( '-i', '--num_iterations', help='number of gibbs iterations (default: %(default)s)', type=int, default=20000) parser.add_argument('-t', '--train', nargs='?', help='train models for N epochs (default: 15)', default=0, const=15, type=int) parser.add_argument('-p', '--parallel', nargs='?', help='number of parallel updates (default: 16)', type=int, const=16, default=1) parser.add_argument('--overwrite', help='overwrite previously computed models', action='store_true') parser.add_argument('-m', '--midi_file', nargs='?', help='relative path to midi file', type=str, const='datasets/god_save_the_queen.mid') parser.add_argument('-l', '--length', help='length of unconstrained generation', type=int, default=160) parser.add_argument('--ext', help='extension of model name', type=str, default='') parser.add_argument('-o', '--output_file', nargs='?', help='path to output file', type=str, default='', const='generated_examples/example.mid') parser.add_argument('--dataset', nargs='?', help='path to dataset folder', type=str, default='') parser.add_argument( '-r', '--reharmonization', nargs='?', help='reharmonization of a melody from the corpus identified by its id', type=int) args = parser.parse_args() print(args) if args.ext: ext = '_' + args.ext else: ext = '' dataset_path = None pickled_dataset = BACH_DATASET # metadatas = [TickMetadatas(SUBDIVISION), FermataMetadatas(), KeyMetadatas(window_size=1)] metadatas = [TickMetadatas(SUBDIVISION), FermataMetadatas()] timesteps = args.timesteps batch_size = args.batch_size_train samples_per_epoch = args.samples_per_epoch nb_val_samples = args.num_val_samples num_units_lstm = args.num_units_lstm model_name = args.name.lower() + ext sequence_length = args.length batch_size_per_voice = args.parallel num_units_lstm = args.num_units_lstm num_dense = args.num_dense if args.output_file: output_file = args.output_file else: output_file = None parallel = batch_size_per_voice > 1 train = args.train > 0 num_epochs = args.train overwrite = args.overwrite # Create pickled dataset if not os.path.exists(pickled_dataset): initialization(dataset_path, metadatas=metadatas, voice_ids=[SOP_INDEX], BACH_DATASET=BACH_DATASET) # load dataset X, X_metadatas, voice_ids, index2notes, note2indexes, metadatas = pickle.load( open(pickled_dataset, 'rb')) # dataset dependant variables NUM_VOICES = len(voice_ids) num_voices = NUM_VOICES num_pitches = list(map(len, index2notes)) num_iterations = args.num_iterations // batch_size_per_voice // num_voices # Create, train load models if not os.path.exists('models/' + model_name + '_' + str(NUM_VOICES - 1) + '.yaml'): create_models(model_name, create_new=overwrite, num_units_lstm=num_units_lstm, num_dense=num_dense, pickled_dataset=pickled_dataset, num_voices=num_voices, metadatas=metadatas, timesteps=timesteps) if train: models = train_models(model_name=model_name, samples_per_epoch=samples_per_epoch, num_epochs=num_epochs, nb_val_samples=nb_val_samples, timesteps=timesteps, pickled_dataset=pickled_dataset, num_voices=NUM_VOICES, metadatas=metadatas, batch_size=batch_size) else: models = load_models(model_name, num_voices=NUM_VOICES) # todo to remove # model_name = 'skip_large' # timesteps = 32 # # test_autoencoder(model_name='models/' + model_name + '_0', # timesteps=timesteps, # pickled_dataset=pickled_dataset) distance_model = load_model('models/seq2seq_masking') # distance_model.compile(optimizer='adam', loss='categorical_crossentropy', # metrics=['accuracy']) hidden_repr_model = Model(input=distance_model.input, output=distance_model.layers[1].output) hidden_repr_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # create target left_features, _, _, _ = all_features(np.transpose(X[21], axes=(1, 0)), voice_index=0, time_index=16 * 4, timesteps=32, num_pitches=num_pitches, num_voices=num_voices) left_metas, central_metas, _ = all_metadatas(X_metadatas[21], time_index=16 * 4, timesteps=32, metadatas=metadatas) inputs_target_chorale = { 'left_features': np.array([left_features]), 'left_metas': np.array([left_metas]), 'central_metas': np.array([central_metas]) } # show target score = indexed_chorale_to_score(X[21][:, 16 * 4 - 32:16 * 4], pickled_dataset=pickled_dataset) score.show() generated_chorale = gibbs(generation_models=models, hidden_repr_model=hidden_repr_model, inputs_target_chorale=inputs_target_chorale, chorale_metas=X_metadatas[12][:150], num_iterations=200, pickled_dataset=pickled_dataset, timesteps=timesteps) # convert score = indexed_chorale_to_score(np.transpose(generated_chorale, axes=(1, 0)), pickled_dataset=pickled_dataset) score.show()
def test_autoencoder(model_name, timesteps, pickled_dataset=BACH_DATASET): voice_index = 0 num_epochs = 200 samples_per_epoch = 1024 * 100 batch_size = 64 nb_val_samples = 1024 X, X_metadatas, voice_ids, index2notes, note2indexes, metadatas = pickle.load( open(pickled_dataset, 'rb')) # sequences num_voices = 1 num_pitches = list(map(len, index2notes)) generator_train = (({ 'left_features': left_features, 'central_features': central_features, 'right_features': right_features, 'left_metas': left_metas, 'right_metas': right_metas, 'central_metas': central_metas, }, { 'pitch_prediction': labels }) for ( (left_features, central_features, right_features), (left_metas, central_metas, right_metas), labels) in generator_from_raw_dataset(batch_size=batch_size, timesteps=timesteps, voice_index=voice_index, phase='train', pickled_dataset=pickled_dataset)) generator_unitary = (({ 'left_features': left_features, 'central_features': central_features, 'right_features': right_features, 'left_metas': left_metas, 'right_metas': right_metas, 'central_metas': central_metas, }, { 'pitch_prediction': labels }) for ( (left_features, central_features, right_features), (left_metas, central_metas, right_metas), labels) in generator_from_raw_dataset(batch_size=1, timesteps=timesteps, voice_index=voice_index, phase='all', pickled_dataset=pickled_dataset)) inputs, outputs = next(generator_train) model = load_model(model_name) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) hidden_repr_model = Model(input=model.input, output=model.layers[-1].output) hidden_repr_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # create score target chorale_seq = chorale_onehot_to_indexed_chorale( onehot_chorale=inputs['left_features'][0], num_pitches=num_pitches, time_major=False) score = indexed_chorale_to_score(chorale_seq, pickled_dataset=pickled_dataset) score.show() nearest_chorale_inputs, intermediate_results = find_nearest( inputs, hidden_repr_model, generator_unitary, num_elements=20000) # concat all results nearest_chorale = np.concatenate([ np.array(nearest_chorale_inputs[0]['left_features'][0]) for nearest_chorale_inputs in intermediate_results ], axis=0) # create score nearest nearest_chorale_seq = chorale_onehot_to_indexed_chorale( onehot_chorale=nearest_chorale, num_pitches=num_pitches, time_major=False) score_nearest = indexed_chorale_to_score(nearest_chorale_seq, pickled_dataset=pickled_dataset) score_nearest.show()
def test_rpn(): parser = OptionParser() parser.add_option("-p", "--path", dest="test_path", help="Path to test data.", default='/Users/jie/projects/PanelSeg/ExpRcnn/eval.txt') parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of ROIs per iteration. Higher means more memory use.", default=32) parser.add_option("--config_filename", dest="config_filename", help="Location to read the metadata related to the training (generated when training).", default="config.pickle") parser.add_option("--network", dest="network", help="Base network to use. Supports nn_cnn_3_layer.", default='nn_cnn_3_layer') parser.add_option("--rpn_weight_path", dest="rpn_weight_path", default='/Users/jie/projects/PanelSeg/ExpRcnn/models/model_rpn_3_layer_color-0.0577.hdf5') parser.add_option("--classify_model_path", dest="classify_model_path", default='/Users/jie/projects/PanelSeg/ExpRcnn/models/label50+bg_cnn_3_layer_color-0.9910.h5') (options, args) = parser.parse_args() if not options.test_path: # if filename is not given parser.error('Error: path to test data must be specified. Pass --path to command line') config_output_filename = options.config_filename with open(config_output_filename, 'rb') as f_in: c = pickle.load(f_in) import nn_cnn_3_layer as nn # turn off any data augmentation at test time c.use_horizontal_flips = False c.use_vertical_flips = False c.rot_90 = False img_list_path = options.test_path def format_img_size(img, C): """ formats the image size based on config """ img_min_side = float(C.im_size) (height, width, _) = img.shape if width <= height: ratio = img_min_side / width new_height = int(ratio * height) new_width = int(img_min_side) else: ratio = img_min_side / height new_width = int(ratio * width) new_height = int(img_min_side) img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) return img, ratio def format_img_channels(img, C): """ formats the image channels based on config """ # img = img[:, :, (2, 1, 0)] img = img.astype(np.float32) img[:, :, 0] -= C.img_channel_mean[0] img[:, :, 1] -= C.img_channel_mean[1] img[:, :, 2] -= C.img_channel_mean[2] img /= 255 # img /= C.img_scaling_factor img = np.transpose(img, (2, 0, 1)) img = np.expand_dims(img, axis=0) return img def format_img(img, C): """ formats an image for model prediction based on config """ # img, ratio = format_img_size(img, C) img = format_img_channels(img, C) return img, 1.0 # Method to transform the coordinates of the bounding box to its original size def get_real_coordinates(ratio, x1, y1, x2, y2): real_x1 = int(round(x1 // ratio)) real_y1 = int(round(y1 // ratio)) real_x2 = int(round(x2 // ratio)) real_y2 = int(round(y2 // ratio)) return (real_x1, real_y1, real_x2, real_y2) class_mapping = c.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.items()} print(class_mapping) class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} c.num_rois = int(options.num_rois) # if c.network == 'resnet50': # num_features = 1024 # elif c.network == 'vgg': # num_features = 512 input_shape_img = (None, None, 3) # input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(c.num_rois, 4)) # feature_map_input = Input(shape=input_shape_features) # define the base network shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(c.anchor_box_scales) * len(c.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) # classifier = nn.classifier(feature_map_input, roi_input, c.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) # model_classifier_only = Model([feature_map_input, roi_input], classifier) # model_classifier = Model([feature_map_input, roi_input], classifier) print('Loading weights from {}'.format(c.model_path)) model_rpn.load_weights(options.rpn_weight_path, by_name=True) # model_classifier.load_weights(c.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') # model_classifier.compile(optimizer='sgd', loss='mse') model_rpn.summary() model_classifier = load_model(options.classify_model_path) model_classifier.summary() all_imgs = [] classes = {} bbox_threshold = 0.8 visualise = True with open(img_list_path) as f: lines = f.readlines() for idx, filepath in enumerate(lines): print(filepath) st = time.time() filepath = filepath.strip() figure = Figure(filepath) figure.load_image() img = figure.image X, ratio = format_img(img, c) if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = label_rcnn_roi_helpers.rpn_to_roi(Y1, Y2, c, K.image_dim_ordering(), overlap_thresh=0.7) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] patches = np.empty([R.shape[0], 28, 28, 3], dtype=int) for idx, roi in enumerate(R): x, y, w, h = roi[0], roi[1], roi[2], roi[3] patch = figure.image[y:y + h, x:x + w] patches[idx] = cv2.resize(patch, (28, 28)) patches = patches.astype('float32') patches[:, :, :, 0] -= c.img_channel_mean[0] patches[:, :, :, 1] -= c.img_channel_mean[1] patches[:, :, :, 2] -= c.img_channel_mean[2] patches /= 255 prediction = model_classifier.predict(patches) # # apply the spatial pyramid pooling to the proposed regions # bboxes = {} # probs = {} # # for jk in range(R.shape[0] // c.num_rois + 1): # ROIs = np.expand_dims(R[c.num_rois * jk:c.num_rois * (jk + 1), :], axis=0) # if ROIs.shape[1] == 0: # break # # if jk == R.shape[0] // c.num_rois: # # pad R # curr_shape = ROIs.shape # target_shape = (curr_shape[0], c.num_rois, curr_shape[2]) # ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) # ROIs_padded[:, :curr_shape[1], :] = ROIs # ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] # ROIs = ROIs_padded # # # [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) # # for ii in range(P_cls.shape[1]): # # if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1): # continue # # cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] # # if cls_name not in bboxes: # bboxes[cls_name] = [] # probs[cls_name] = [] # # (x, y, w, h) = ROIs[0, ii, :] # # cls_num = np.argmax(P_cls[0, ii, :]) # try: # (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] # tx /= c.classifier_regr_std[0] # ty /= c.classifier_regr_std[1] # tw /= c.classifier_regr_std[2] # th /= c.classifier_regr_std[3] # x, y, w, h = label_rcnn_roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) # except: # pass # bboxes[cls_name].append( # [c.rpn_stride * x, c.rpn_stride * y, c.rpn_stride * (x + w), c.rpn_stride * (y + h)]) # probs[cls_name].append(np.max(P_cls[0, ii, :])) # # all_dets = [] # for key in bboxes: # bbox = np.array(bboxes[key]) # # new_boxes, new_probs = label_rcnn_roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5) # for jk in range(new_boxes.shape[0]): # (x1, y1, x2, y2) = new_boxes[jk, :] # # (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) # # cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2), # (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])), # 2) # # textLabel = '{}: {}'.format(key, int(100 * new_probs[jk])) # all_dets.append((key, 100 * new_probs[jk])) # # (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1) # textOrg = (real_x1, real_y1 - 0) # # cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), # (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 2) # cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), # (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1) # cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) print('Elapsed time = {}'.format(time.time() - st)) # print(all_dets) cv2.imshow('img', img) cv2.waitKey(0)
def isensee2017_model(input_shape=(4, 128, 128, 128), n_base_filters=16, depth=5, dropout_rate=0.3, n_segmentation_levels=3, n_labels=4, optimizer=Adam, initial_learning_rate=5e-4, loss_function=weighted_dice_coefficient_loss, activation_name="sigmoid"): """ This function builds a model proposed by Isensee et al. for the BRATS 2017 competition: https://www.cbica.upenn.edu/sbia/Spyridon.Bakas/MICCAI_BraTS/MICCAI_BraTS_2017_proceedings_shortPapers.pdf This network is highly similar to the model proposed by Kayalibay et al. "CNN-based Segmentation of Medical Imaging Data", 2017: https://arxiv.org/pdf/1701.03056.pdf :param input_shape: :param n_base_filters: :param depth: :param dropout_rate: :param n_segmentation_levels: :param n_labels: :param optimizer: :param initial_learning_rate: :param loss_function: :param activation_name: :return: """ inputs = Input(input_shape) current_layer = inputs level_output_layers = list() level_filters = list() for level_number in range(depth): n_level_filters = (2**level_number) * n_base_filters level_filters.append(n_level_filters) if current_layer is inputs: in_conv = create_convolution_block(current_layer, n_level_filters) else: in_conv = create_convolution_block(current_layer, n_level_filters, strides=(2, 2, 2)) context_output_layer = create_context_module(in_conv, n_level_filters, dropout_rate=dropout_rate) summation_layer = Add()([in_conv, context_output_layer]) level_output_layers.append(summation_layer) current_layer = summation_layer segmentation_layers = list() for level_number in range(depth - 2, -1, -1): up_sampling = create_up_sampling_module(current_layer, level_filters[level_number]) concatenation_layer = concatenate([level_output_layers[level_number], up_sampling], axis=1) localization_output = create_localization_module(concatenation_layer, level_filters[level_number]) current_layer = localization_output if level_number < n_segmentation_levels: segmentation_layers.insert(0, create_convolution_block(current_layer, n_filters=n_labels, kernel=(1, 1, 1))) output_layer = None for level_number in reversed(range(n_segmentation_levels)): segmentation_layer = segmentation_layers[level_number] if output_layer is None: output_layer = segmentation_layer else: output_layer = Add()([output_layer, segmentation_layer]) if level_number > 0: output_layer = UpSampling3D(size=(2, 2, 2))(output_layer) activation_block = Activation(activation_name)(output_layer) model = Model(inputs=inputs, outputs=activation_block) label_wise_dice_metrics = [get_label_dice_coefficient_function(index) for index in range(n_labels)] metrics = label_wise_dice_metrics model.compile(optimizer=optimizer(lr=initial_learning_rate), loss=loss_function, metrics=metrics) return model
def _add_model(self): nodes = self.nodes config = self.model_config['PHM'] p = config['dropout_p'] mlp_l2 = config['l2'] D = config['mlp_output_dim'] activation = lambda x: relu(x, alpha=config['leaky_alpha']) # SENTENCE LEVEL # answer plus question nodes['question_encoding_repeated'] = RepeatVector(self.answer_size)(nodes['question_encoding']) nodes['answer_plus_question'] = merge([nodes['answer_encoding'], nodes['question_encoding_repeated']], mode='sum') # story mlp and dropout ninputs, noutputs = ['story_encoding1'], ['story_encoding_mlp'] for ngram in config['ngram_inputs']: ninputs.append('story_encoding1_%sgram' % ngram) noutputs.append('story_encoding_mlp_%sgram' % ngram) story_encoding_mlp = NTimeDistributed(Dense(D, init="identity", activation=activation, W_regularizer=l2(mlp_l2), trainable=config['trainable_story_encoding_mlp'])) for input, output in zip(ninputs, noutputs): nodes[output] = story_encoding_mlp(self._get_node(input)) qa_encoding_mlp = NTimeDistributed(Dense(D, init="identity", activation=activation, W_regularizer=l2(mlp_l2), trainable=config['trainable_answer_plus_question_mlp'])) nodes['answer_plus_question_mlp'] = qa_encoding_mlp(nodes['answer_plus_question']) nodes['story_encoding_mlp_dropout'] = Dropout(p)(nodes['story_encoding_mlp']) nodes['answer_plus_question_mlp_dropout'] = Dropout(p)(nodes['answer_plus_question_mlp']) # norm unit_layer = UnitNormalization() nodes['story_encoding_mlp_dropout_norm'] = unit_layer(nodes['story_encoding_mlp_dropout']) nodes['answer_plus_question_norm'] = unit_layer(nodes['answer_plus_question_mlp_dropout']) # cosine nodes['story_dot_answer'] = merge([nodes['story_encoding_mlp_dropout_norm'], nodes['answer_plus_question_norm']], mode='dot', dot_axes=[2, 2]) # WORD LEVEL # story mlps for word score and distance score trainable_word_mlp = self.model_config['PHM']['trainable_word_mlp'] if trainable_word_mlp: story_word_dense = NTimeDistributed( Dense(D, init="identity", activation=activation, W_regularizer=l2(mlp_l2), trainable=trainable_word_mlp), first_n=3) # q mlps for word and distance scores q_or_a_word_dense = NTimeDistributed( Dense(D, init="identity", activation=activation, W_regularizer=l2(mlp_l2), trainable=trainable_word_mlp), first_n=3) else: linear_activation = Activation('linear') story_word_dense = linear_activation q_or_a_word_dense = linear_activation ninputs, noutputs = [], [] tpls = [(True, 'story_word_embedding1', 'story_word_mlp'), ('use_slide_window_inside_sentence', 'reordered_story_word_embedding', 'reordered_story_word_mlp'), ('use_slide_window_word', 'story_attentive_word_embedding', 'story_attentive_word_embedding_mlp'), ('use_slide_window_reordered_word', 'reordered_story_attentive_word_embedding', 'reordered_story_attentive_word_embedding_mlp') ] for tpl in tpls: a, b, c = tpl if a is True or config[a]: ninputs.append(b) noutputs.append(c) if b in ['reordered_story_word_embedding', 'story_word_embedding1']: for ngram in config['ngram_inputs']: ninputs.append('%s_%sgram' % (b, ngram)) noutputs.append('%s_%sgram' % (c, ngram)) for input, output in zip(ninputs, noutputs): nodes[output] = story_word_dense(self._get_node(input)) inputs = ['question_word_embedding', 'answer_word_embedding', 'qa_word_embedding'] outputs = ['question_word_mlp', 'answer_word_mlp', 'qa_word_mlp'] for input, output in zip(inputs, outputs): nodes[output] = q_or_a_word_dense(self._get_node(input)) # SIMILARITY MATRICES # first for word scores # cosine similarity matrix based on sentence and q nodes['sim_matrix_q'] = WordByWordMatrix(is_q=True)([nodes['story_word_mlp'], nodes['question_word_mlp']]) # cosine similarity matrix based on sentence and a nodes['sim_matrix_a'] = WordByWordMatrix()([nodes['story_word_mlp'], nodes['answer_word_mlp']]) # WORD-BY-WORD SCORES # q nodes['s_q_wbw_score'] = WordByWordScores(trainable=False, is_q=True, alpha=1., threshold=0.15, wordbyword_merge_type=config['wordbyword_merge_type'], )([nodes['sim_matrix_q'], nodes['__w_question_wbw']]) # a nodes['s_a_wbw_score'] = WordByWordScores(trainable=False, alpha=1., threshold=0.15, wordbyword_merge_type=config['wordbyword_merge_type'], )( [nodes['sim_matrix_a'], nodes['__w_answer_wbw']]) # mean nodes['story_dot_answer_words'] = GeneralizedMean(mean_type=config['mean_type'], trainable=config['trainable_story_dot_answer_words']) \ ([nodes['s_q_wbw_score'], nodes['s_a_wbw_score']]) # SLIDING WINDOW INSIDE SENTENCE if config['use_slide_window_inside_sentence']: # q+a mlp for word score # construct cosine similarity matrix based on sentence and qa, for word score _inputs = [nodes['reordered_story_word_mlp'], nodes['qa_word_mlp']] nodes['wordbyword_slide_sum_within_sentence'] = \ WordByWordSlideSumInsideSentence(len(_inputs), window_size=config['window_size_word_inside'], alpha=config['alpha_slide_window_word_inside'], use_gaussian_window=config['use_gaussian_window_word_inside'], gaussian_std=config['gaussian_sd_word_inside'], trainable=config['trainable_slide_window_word_inside'])(_inputs) # COMBINE LEVELS # sum word-based and sentence-based similarity scores inputs = ['story_dot_answer_words', 'story_dot_answer'] if config['use_slide_window_sentence']: inputs.append('story_dot_answer_slide') nodes["story_dot_answer_slide"] = SlideSum(alpha=config['alpha_slide_window'], use_gaussian_window=config['use_gaussian_window'], trainable=config['trainable_slide_window'])( nodes['story_dot_answer']) if config['use_slide_window_inside_sentence']: inputs.append('wordbyword_slide_sum_within_sentence') if self.model_config['PHM']['use_depend_score']: # SENTENCE-QA DEPENDENCY LEVEL inputs.append('lcc_score_matrix') nodes['lcc_score_matrix'] = DependencyDistanceScore(config['alpha_depend_score'])( self._get_node('input_dep')) # sum scores from different component of the model on sentence level. # sentence level score merge layers_s_input = [nodes[x] for x in inputs] weights_s = [1.] * len(layers_s_input) nodes['word_plus_sent_sim'] = Combination(len(layers_s_input), input_dim=3, weights=weights_s, combination_type=config['sentence_ensemble'], trainable=config['trainable_sentence_ensemble'])(layers_s_input) # extract max over sentences nodes['story_dot_answer_max'] = TimeDistributedMerge(mode='max', axis=1)(nodes['word_plus_sent_sim']) # word sliding window word_sliding_window_output = ['story_dot_answer_max'] if config['use_slide_window_word']: # q+a mlp for word score # construct cosine similarity matrix based on sentence and qa, for word score temp_inputs = [nodes['story_attentive_word_embedding_mlp'], nodes['qa_word_mlp']] if config['use_qa_idf']: temp_inputs.append(nodes['__w_question_answer']) nodes['wordbyword_slide_sum'] = WordByWordSlideSum(len(temp_inputs), window_size=config['window_size_word'], alpha=config['alpha_slide_window_word'], use_gaussian_window=config['use_gaussian_window_word'], gaussian_std=config['gaussian_sd_word'], trainable=config['trainable_slide_window_word'])( temp_inputs) word_sliding_window_output.append('wordbyword_slide_sum') if config['use_slide_window_reordered_word']: # q+a mlp for word score # construct cosine similarity matrix based on sentence and qa, for word score temp_inputs = [nodes['reordered_story_attentive_word_embedding_mlp'], nodes['qa_word_mlp']] if config['use_qa_idf']: temp_inputs.append(nodes['__w_question_answer']) nodes['reordered_wordbyword_slide_sum'] = WordByWordSlideSum(len(temp_inputs), window_size=config[ 'window_size_reordered_word'], alpha=config[ 'alpha_slide_window_reordered_word'], use_gaussian_window=config[ 'use_gaussian_window_reordered_word'], gaussian_std=config[ 'gaussian_sd_reordered_word'], trainable=config[ 'trainable_slide_window_reordered_word'])( temp_inputs ) word_sliding_window_output.append('reordered_wordbyword_slide_sum') # Extract top_n sentence for each answer if config['top_n_wordbyword']: layers_name = ['word_plus_sent_sim', 'story_word_embedding1', 'qa_word_embedding', '__w_question_answer'] layers = [nodes[x] for x in layers_name] top_n_name = 'top_n_wordbyword' nodes[top_n_name] = TopNWordByWord(top_n=config['top_n'], nodes=nodes, use_sum=config['top_n_use_sum'], trainable=True)(layers) word_sliding_window_output.append(top_n_name) ngram_output = [self._add_ngram_network(ngram, story_encoding_mlp) for ngram in config['ngram_inputs']] # final score merge layers_input = [nodes[x] for x in word_sliding_window_output + ngram_output] weights = [1.] * len(layers_input) for i in range(len(ngram_output)): weights[-i - 1] = 1. """ # also aggregate scores that were already aggregated on sentence level. sentence_level_weight = 0.1 for layer_name in sentence_level_merge_layers: layer_max = layer_name + "_max" if layer_max not in nodes: add_node(TimeDistributedMergeEnhanced(mode='max'), layer_max, input=layer_name) layers_input.append(nodes[layer_max]) weights.append(sentence_level_weight)""" nodes['story_dot_answer_combined_max'] = Combination(len(layers_input), weights=weights, combination_type=config['answer_ensemble'], trainable=config['trainable_answer_ensemble'])( layers_input) # apply not-switch input_mul = self._get_node('input_negation_questions') nodes['story_dot_answer_max_switch'] = merge([nodes['story_dot_answer_combined_max'], input_mul], mode='mul') activation_final = Activation('linear', name='y_hat') \ if self.model_config['optimizer']['loss'] == 'ranking_loss' else Activation( 'softmax', name='y_hat') prediction = activation_final(nodes['story_dot_answer_max_switch']) inputs = self.inputs_nodes.values() model = Model(input=inputs, output=prediction) optimizer = self._get_optimizer() model.compile(loss=self._get_loss_dict(), optimizer=optimizer, metrics={'y_hat': 'accuracy'}) self.graph = model