def google_vgg16_finetune(classes=3, size=256): input_layer = Input(shape=(size, size, 3), name='image_input') base_model = VGG16(weights='imagenet', include_top=False, input_tensor=input_layer) x = Conv2D(name='squeeze', filters=256, kernel_size=(1, 1))(base_model.output) # squeeze channels x = Flatten(name='avgpool')(x) x = Dense(256, name='features', kernel_regularizer=regularizers.l2(0.01))(x) x = Activation('relu')(x) x = Dense(classes, activation='softmax', name='out')(x) model = Model(inputs=base_model.input, outputs=x) for layer in model.layers: if layer.name in ['block5_conv1', 'block5_conv2', 'block5_conv3', 'features', 'out']: layer.trainable = True else: layer.trainable = False print(model.summary()) model.compile( loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(lr=1e-4), metrics=['accuracy']) return model
left_input = Input(shape=(max_seq_length, ), dtype='int32') right_input = Input(shape=(max_seq_length, ), dtype='int32') # Pack it all up into a Manhattan Distance model malstm_distance = ManDist()( [shared_model(left_input), shared_model(right_input)]) model = Model(inputs=[left_input, right_input], outputs=[malstm_distance]) #if gpus >= 2: # `multi_gpu_model()` is a so quite buggy. it breaks the saved model. # model = tf.keras.utils.multi_gpu_model(model, gpus=gpus) model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) model.summary() shared_model.summary() # Start trainings training_start_time = time() malstm_trained = model.fit( [X_train['left'], X_train['right']], Y_train, batch_size=batch_size, epochs=n_epoch, validation_data=([X_validation['left'], X_validation['right']], Y_validation)) training_end_time = time() print("Training time finished.\n%d epochs in %12.2f" % (n_epoch, training_end_time - training_start_time))
def vgg_gru(input_image,vgg_conv5): """ Defining a NMT model """ # VGG的Conv5,然后按照宽度展开,把H中的数据concat到一起,是model,model的父类也是layer # input_shape = (img_width,img_height,channel) # [batch,width,height,channel] => [batch,width,height*channel] # [samples, time steps, features] # vgg_conv5_shape = tf.shape(vgg_conv5) # vgg_conv5_shape = vgg_conv5.shape.as_list() vgg_conv5_shape = [x if x is not None else -1 for x in vgg_conv5.shape.as_list()] # 支持else的写法 # vgg_conv5_shape = [x for x in vgg_conv5.shape.as_list() if x is not None] # print(vgg_conv5_shape) b = vgg_conv5_shape[0] w = vgg_conv5_shape[1] h = vgg_conv5_shape[2] c = vgg_conv5_shape[3] print("(b,w,c*h)",(b,w,c*h)) # rnn_input = tf.reshape(vgg_conv5,(b,w,c*h)) # 转置[batch,width,height,channel] => [batch,width,height*channel] # print(tf.shape(rnn_input)) # VGG的Conv5,然后按照宽度展开,把H中的数据concat到一起,是model,model的父类也是layer rnn_input = Reshape((w,c*h))(vgg_conv5) print("rnn_input.shape=", rnn_input) # time_distribute = TimeDistributed(Lambda(lambda x: model_cnn(x)))( # input_lay) # keras.layers.Lambda is essential to make our trick work :) # 1.Encoder GRU编码器 encoder_gru = Bidirectional(GRU(64,#写死一个隐含神经元数量 return_sequences=True, return_state=True, name='encoder_gru'), name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru(rnn_input) # 2.Decoder GRU,using `encoder_states` as initial state. # 使用encoder的输出当做decoder的输入 decoder_inputs = Input(shape=(5,64), name='decoder_inputs') decoder_gru = GRU(64*2, return_sequences=True, return_state=True, name='decoder_gru') decoder_out, decoder_state = decoder_gru( decoder_inputs, initial_state=Concatenate(axis=-1)([encoder_fwd_state, encoder_back_state]) ) # Attention layer attn_layer = AttentionLayer(name='attention_layer') print("encoder_out:",encoder_out) print("decoder_out:", decoder_out) attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # concat Attention的输出 + GRU的输出 decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer, dense = Dense(64, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model full_model = Model(inputs=[input_image, decoder_inputs], outputs=decoder_pred) full_model.compile(optimizer='adam', loss='categorical_crossentropy') full_model.summary() return full_model
def define_nmt(hidden_size, batch_size, en_timesteps, en_vsize, sp_timesteps, sp_vsize): """ Defining a NMT model """ # Define an input sequence and process it. if batch_size: encoder_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(batch_shape=(batch_size, sp_timesteps - 1, sp_vsize), name='decoder_inputs') else: encoder_inputs = Input(shape=(en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(shape=(sp_timesteps - 1, sp_vsize), name='decoder_inputs') # Encoder GRU encoder_gru = GRU(hidden_size, return_sequences=True, return_state=True, name='encoder_gru') encoder_out, encoder_state = encoder_gru(encoder_inputs) # Set up the decoder GRU, using `encoder_states` as initial state. decoder_gru = GRU(hidden_size, return_sequences=True, return_state=True, name='decoder_gru') decoder_out, decoder_state = decoder_gru(decoder_inputs, initial_state=encoder_state) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # Concat attention input and decoder GRU output decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer dense = Dense(sp_vsize, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model full_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) full_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) full_model.summary() """ Inference model """ batch_size = 1 """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_state = encoder_gru(encoder_inf_inputs) encoder_model = Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_state]) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(batch_size, 1, sp_vsize), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(batch_size, en_timesteps, hidden_size), name='encoder_inf_states') decoder_init_state = Input(batch_shape=(batch_size, hidden_size), name='decoder_init') decoder_inf_out, decoder_inf_state = decoder_gru( decoder_inf_inputs, initial_state=decoder_init_state) attn_inf_out, attn_inf_states = attn_layer( [encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) decoder_model = Model( inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state]) return full_model, encoder_model, decoder_model
class JointEmbeddingModel: def __init__(self, config): self.data_dir = config.data_dir self.model_name = config.model_name self.methname_len = config.methname_len # the max length of method name self.apiseq_len = config.apiseq_len self.tokens_len = config.tokens_len self.desc_len = config.desc_len self.vocab_size = config.n_words # the size of vocab self.embed_dims = config.embed_dims self.lstm_dims = config.lstm_dims self.hidden_dims = config.hidden_dims self.margin = 0.05 self.init_embed_weights_methodname = config.init_embed_weights_methodname self.init_embed_weights_tokens = config.init_embed_weights_tokens self.init_embed_weights_desc = config.init_embed_weights_desc self.methodname = Input(shape=(self.methname_len, ), dtype='int32', name='methodname') self.apiseq = Input(shape=(self.apiseq_len, ), dtype='int32', name='apiseq') self.tokens = Input(shape=(self.tokens_len, ), dtype='int32', name='tokens') self.desc_good = Input(shape=(self.desc_len, ), dtype='int32', name='desc_good') self.desc_bad = Input(shape=(self.desc_len, ), dtype='int32', name='desc_bad') # create path to store model Info if not os.path.exists(self.data_dir + 'model/' + self.model_name): os.makedirs(self.data_dir + 'model/' + self.model_name) def build(self): # 1 -- CodeNN methodname = Input(shape=(self.methname_len, ), dtype='int32', name='methodname') apiseq = Input(shape=(self.apiseq_len, ), dtype='int32', name='apiseq') tokens = Input(shape=(self.tokens_len, ), dtype='int32', name='tokens') # methodname # embedding layer init_emd_weights = np.load( self.data_dir + self.init_embed_weights_methodname ) if self.init_embed_weights_methodname is not None else None init_emd_weights = init_emd_weights if init_emd_weights is None else [ init_emd_weights ] embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, weights=init_emd_weights, mask_zero=False, name='embedding_methodname') methodname_embedding = embedding(methodname) # dropout dropout = Dropout(0.25, name='dropout_methodname_embed') methodname_dropout = dropout(methodname_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, name='lstm_methodname_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, go_backwards=True, name='lstm_methodname_bw') methodname_fw = fw_rnn(methodname_dropout) methodname_bw = bw_rnn(methodname_dropout) dropout = Dropout(0.25, name='dropout_methodname_rnn') methodname_fw_dropout = dropout(methodname_fw) methodname_bw_dropout = dropout(methodname_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_methodname') methodname_pool = Concatenate(name='concat_methodname_lstm')( [maxpool(methodname_fw_dropout), maxpool(methodname_bw_dropout)]) activation = Activation('tanh', name='active_methodname') methodname_repr = activation(methodname_pool) # apiseq # embedding layer embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, mask_zero=False, name='embedding_apiseq') apiseq_embedding = embedding(apiseq) # dropout dropout = Dropout(0.25, name='dropout_apiseq_embed') apiseq_dropout = dropout(apiseq_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, return_sequences=True, recurrent_dropout=0.2, name='lstm_apiseq_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, return_sequences=True, recurrent_dropout=0.2, go_backwards=True, name='lstm_apiseq_bw') apiseq_fw = fw_rnn(apiseq_dropout) apiseq_bw = bw_rnn(apiseq_dropout) dropout = Dropout(0.25, name='dropout_apiseq_rnn') apiseq_fw_dropout = dropout(apiseq_fw) apiseq_bw_dropout = dropout(apiseq_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_apiseq') apiseq_pool = Concatenate(name='concat_apiseq_lstm')( [maxpool(apiseq_fw_dropout), maxpool(apiseq_bw_dropout)]) activation = Activation('tanh', name='active_apiseq') apiseq_repr = activation(apiseq_pool) # tokens # embedding layer init_emd_weights = np.load( self.data_dir + self.init_embed_weights_tokens ) if self.init_embed_weights_tokens is not None else None init_emd_weights = init_emd_weights if init_emd_weights is None else [ init_emd_weights ] embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, weights=init_emd_weights, mask_zero=False, name='embedding_tokens') tokens_embedding = embedding(tokens) # dropout dropout = Dropout(0.25, name='dropout_tokens_embed') tokens_dropout = dropout(tokens_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, name='lstm_tokens_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, go_backwards=True, name='lstm_tokens_bw') tokens_fw = fw_rnn(tokens_dropout) tokens_bw = bw_rnn(tokens_dropout) dropout = Dropout(0.25, name='dropout_tokens_rnn') tokens_fw_dropout = dropout(tokens_fw) tokens_bw_dropout = dropout(tokens_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_tokens') tokens_pool = Concatenate(name='concat_tokens_lstm')( [maxpool(tokens_fw_dropout), maxpool(tokens_bw_dropout)]) # tokens_pool = maxpool(tokens_dropout) activation = Activation('tanh', name='active_tokens') tokens_repr = activation(tokens_pool) # fusion methodname, apiseq, tokens merge_methname_api = Concatenate(name='merge_methname_api')( [methodname_repr, apiseq_repr]) merge_code_repr = Concatenate(name='merge_code_repr')( [merge_methname_api, tokens_repr]) code_repr = Dense(self.hidden_dims, activation='tanh', name='dense_coderepr')(merge_code_repr) self.code_repr_model = Model(inputs=[methodname, apiseq, tokens], outputs=[code_repr], name='code_repr_model') self.code_repr_model.summary() # 2 -- description desc = Input(shape=(self.desc_len, ), dtype='int32', name='desc') # desc # embedding layer init_emd_weights = np.load( self.data_dir + self.init_embed_weights_desc ) if self.init_embed_weights_desc is not None else None init_emd_weights = init_emd_weights if init_emd_weights is None else [ init_emd_weights ] embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, weights=init_emd_weights, mask_zero=False, name='embedding_desc') desc_embedding = embedding(desc) # dropout dropout = Dropout(0.25, name='dropout_desc_embed') desc_dropout = dropout(desc_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, name='lstm_desc_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, go_backwards=True, name='lstm_desc_bw') desc_fw = fw_rnn(desc_dropout) desc_bw = bw_rnn(desc_dropout) dropout = Dropout(0.25, name='dropout_desc_rnn') desc_fw_dropout = dropout(desc_fw) desc_bw_dropout = dropout(desc_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_desc') desc_pool = Concatenate(name='concat_desc_lstm')( [maxpool(desc_fw_dropout), maxpool(desc_bw_dropout)]) activation = Activation('tanh', name='active_desc') desc_repr = activation(desc_pool) self.desc_repr_model = Model(inputs=[desc], outputs=[desc_repr], name='desc_repr_model') self.desc_repr_model.summary() # 3 -- cosine similarity code_repr = self.code_repr_model([methodname, apiseq, tokens]) desc_repr = self.desc_repr_model([desc]) cos_sim = Dot(axes=1, normalize=True, name='cos_sim')([code_repr, desc_repr]) sim_model = Model(inputs=[methodname, apiseq, tokens, desc], outputs=[cos_sim], name='sim_model') self.sim_model = sim_model self.sim_model.summary() # 4 -- build training model good_sim = sim_model( [self.methodname, self.apiseq, self.tokens, self.desc_good]) bad_sim = sim_model( [self.methodname, self.apiseq, self.tokens, self.desc_bad]) loss = Lambda(lambda x: K.maximum(1e-6, self.margin - x[0] + x[1]), output_shape=lambda x: x[0], name='loss')([good_sim, bad_sim]) self.training_model = Model(inputs=[ self.methodname, self.apiseq, self.tokens, self.desc_good, self.desc_bad ], outputs=[loss], name='training_model') self.training_model.summary() def compile(self, optimizer, **kwargs): #optimizer = optimizers.Adam(lr=0.001) self.code_repr_model.compile(loss='cosine_proximity', optimizer=optimizer, **kwargs) self.desc_repr_model.compile(loss='cosine_proximity', optimizer=optimizer, **kwargs) self.training_model.compile( loss=lambda y_true, y_pred: y_pred + y_true - y_true, optimizer=optimizer, **kwargs) self.sim_model.compile(loss='binary_crossentropy', optimizer=optimizer, **kwargs) def fit(self, x, **kwargs): y = np.zeros(shape=x[0].shape[:1], dtype=np.float32) return self.training_model.fit(x, y, **kwargs) def repr_code(self, x, **kwargs): return self.code_repr_model.predict(x, **kwargs) def repr_desc(self, x, **kwargs): return self.desc_repr_model.predict(x, **kwargs) def predict(self, x, **kwargs): return self.sim_model.predict(x, **kwargs) def save(self, code_model_file, desc_model_file, **kwargs): self.code_repr_model.save_weights(code_model_file, **kwargs) self.desc_repr_model.save_weights(desc_model_file, **kwargs) def load(self, code_model_file, desc_model_file, **kwargs): self.code_repr_model.load_weights(code_model_file, **kwargs) self.desc_repr_model.load_weights(desc_model_file, **kwargs)
pooling='avg').outputs[0] x = Dropout(0.3)(x) x = Dense(units=2, name='area_classifier', kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), activation='softmax')(x) model = Model(inputs=input_a, outputs=x) model, epoch = _load_model(folder, model, weights_only=True) save_model(folder, model) print('Model built successfully.') if not os.path.isdir(folder): os.makedirs(folder) log_append = True if os.path.isfile(logger) else False print(model.summary()) from tensorflow.python.keras.preprocessing.image import ImageDataGenerator train_datagen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, brightness_range=(0.8, 1.2), zoom_range=0.1, horizontal_flip=False, vertical_flip=True, fill_mode='constant') test_datagen = ImageDataGenerator() if os.path.exists(folder) and os.path.exists(
def init_model(self, input_shape, num_classes, **kwargs): layers = 5 filters_size = [64, 128, 256, 512, 512] kernel_size = (3, 3) pool_size = [(2, 2), (2, 2), (2, 2), (4, 1), (4, 1)] freq_axis = 2 channel_axis = 3 channel_size = 128 min_size = min(input_shape[:2]) melgram_input = Input(shape=input_shape) # x = ZeroPadding2D(padding=(0, 37))(melgram_input) x = Reshape((input_shape[0], input_shape[1], 1))(melgram_input) x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x) # Conv block 1 x = Convolution2D(filters=filters_size[0], kernel_size=kernel_size, padding='same', name='conv1')(x) x = ELU()(x) x = BatchNormalization(axis=channel_axis, name='bn1')(x) x = MaxPooling2D(pool_size=pool_size[0], strides=pool_size[0], name='pool1')(x) x = Dropout(0.1, name='dropout1')(x) min_size = min_size // pool_size[0][0] for layer in range(1, layers): min_size = min_size // pool_size[layer][0] if min_size < 1: break x = Convolution2D(filters=filters_size[layer], kernel_size=kernel_size, padding='same', name=f'conv{layer + 1}')(x) x = ELU()(x) x = BatchNormalization(axis=channel_axis, name=f'bn{layer + 1}')(x) x = MaxPooling2D(pool_size=pool_size[layer], strides=pool_size[layer], name=f'pool{layer + 1}')(x) x = Dropout(0.1, name=f'dropout{layer + 1}')(x) x = Reshape((-1, channel_size))(x) gru_units = 32 if num_classes > 32: gru_units = int(num_classes * 1.5) # GRU block 1, 2, output x = CuDNNGRU(gru_units, return_sequences=True, name='gru1')(x) x = CuDNNGRU(gru_units, return_sequences=False, name='gru2')(x) x = Dropout(0.3)(x) outputs = Dense(num_classes, activation='softmax', name='output')(x) model = TFModel(inputs=melgram_input, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-4, amsgrad=True) model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def __init__(self): self.HOPS = 5 self.DATASET = 'twitter' # 'restaurant', 'laptop' self.POLARITIES_DIM = 3 self.EMBEDDING_DIM = 200 self.LEARNING_RATE = 0.01 self.LSTM_PARAMS = { 'units': 200, 'activation': 'tanh', 'recurrent_activation': 'sigmoid', 'kernel_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'recurrent_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'bias_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'kernel_regularizer': regularizers.l2(0.001), 'recurrent_regularizer': regularizers.l2(0.001), 'bias_regularizer': regularizers.l2(0.001), 'dropout': 0, 'recurrent_dropout': 0, } self.MAX_SEQUENCE_LENGTH = 40 self.MAX_ASPECT_LENGTH = 2 self.ITERATION = 500 self.BATCH_SIZE = 200 self.texts_raw_indices, self.texts_left_indices, self.aspects_indices, self.texts_right_indices, \ self.polarities_matrix, \ self.embedding_matrix, \ self.tokenizer = \ read_dataset(type=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) if os.path.exists('dmn_saved_model.h5'): print('loading saved model...') self.model = load_model('dmn_saved_model.h5') else: print('Build model...') inputs_sentence = Input(shape=(self.MAX_SEQUENCE_LENGTH * 2 + self.MAX_ASPECT_LENGTH, ), name='inputs_sentence') inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH, ), name='inputs_aspect') memory = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH * 2 + self.MAX_ASPECT_LENGTH, weights=[self.embedding_matrix], trainable=False, name='sentence_embedding')(inputs_sentence) memory = Lambda(self.locationed_memory, name='locationed_memory')(memory) aspect = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_ASPECT_LENGTH, weights=[self.embedding_matrix], trainable=False, name='aspect_embedding')(inputs_aspect) x = Lambda(lambda xin: K.mean(xin, axis=1), name='aspect_mean')(aspect) SharedAttention = Attention(name='shared_attention') for i in range(self.HOPS): x = SharedAttention((memory, x)) x = Dense(self.POLARITIES_DIM)(x) predictions = Activation('softmax')(x) model = Model(inputs=[inputs_sentence, inputs_aspect], outputs=predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc']) # plot_model(model, to_file='model.png') self.model = model
# layer which generates softmax class score for each class # 3. we compile the final model using an Adam optimizer, with a # low learning rate (since we are 'fine-tuning') net = ResNet50(include_top=False, weights='imagenet', input_tensor=None, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)) x = net.output x = Flatten()(x) x = Dropout(0.5)(x) output_layer = Dense(NUM_CLASSES, activation='softmax', name='softmax')(x) net_final = Model(inputs=net.input, outputs=output_layer) for layer in net_final.layers[:FREEZE_LAYERS]: layer.trainable = False for layer in net_final.layers[FREEZE_LAYERS:]: layer.trainable = True net_final.compile(optimizer=Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy']) print(net_final.summary()) # train the model net_final.fit_generator(train_batches, steps_per_epoch=train_batches.samples // BATCH_SIZE, validation_data=valid_batches, validation_steps=valid_batches.samples // BATCH_SIZE, epochs=NUM_EPOCHS) # save trained weights net_final.save(WEIGHTS_FINAL)
def __init__(self, embedding_dim=100, batch_size=64, n_hidden=100, learning_rate=0.01, n_class=3, max_sentence_len=40, l2_reg_val=0.003): ############################ self.DATASET = ['restaurant', 'laptop'] self.TASK_INDICES = [1002, 1003, 1005] ##1001-twitter, 1002-restaurant, 1003-laptop, 1004-others, 1005-general self.LOSS_WEIGHTS = {1002: 0.5, 1003: 0.5, 1005: 0.5} self.MODEL_TO_LOAD = './models/mtl_absa_att_sh_saved_model.h5' ########################### self.SCORE_FUNCTION = 'mlp' self.EMBEDDING_DIM = embedding_dim self.BATCH_SIZE = batch_size self.N_HIDDEN = n_hidden self.LEARNING_RATE = learning_rate self.N_CLASS = n_class self.MAX_SENTENCE_LENGTH = max_sentence_len self.EPOCHS = 4 self.L2_REG_VAL = l2_reg_val self.MAX_ASPECT_LENGTH = 5 self.INITIALIZER = initializers.RandomUniform(minval=-0.003, maxval=0.003) self.REGULARIZER = regularizers.l2(self.L2_REG_VAL) self.LSTM_PARAMS = { 'units': self.N_HIDDEN, 'activation': 'tanh', 'recurrent_activation': 'hard_sigmoid', 'dropout': 0, 'recurrent_dropout': 0 } self.DENSE_PARAMS = { 'kernel_initializer': self.INITIALIZER, 'bias_initializer': self.INITIALIZER, 'kernel_regularizer': self.REGULARIZER, 'bias_regularizer': self.REGULARIZER, 'dtype':'float32' } self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \ self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, self.dataset_index,\ self.polarities_matrix,self.polarities,\ self.embedding_matrix, \ self.tokenizer = \ read_dataset(types=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SENTENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) print('Build model...') inputs_l = Input(shape=(self.MAX_SENTENCE_LENGTH,),dtype='int64') inputs_r = Input(shape=(self.MAX_SENTENCE_LENGTH,),dtype='int64') inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH,),dtype='int64' ,name='inputs_aspect') nonzero_count = Lambda(lambda xin: tf.count_nonzero(xin, dtype='float32'))(inputs_aspect) input_dataset = Input(shape=(1,),dtype='float32') Embedding_Layer = Embedding(input_dim=len(self.embedding_matrix) , output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SENTENCE_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False) aspect = Embedding(input_dim=len(self.embedding_matrix), output_dim=self.EMBEDDING_DIM, input_length=self.MAX_ASPECT_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False, name='aspect_embedding')(inputs_aspect) x_l = Embedding_Layer(inputs_l) x_r = Embedding_Layer(inputs_r) x_aspect = Bidirectional(LSTM(name='aspect', return_sequences=True,**self.LSTM_PARAMS),merge_mode='sum')(aspect) x_aspect = Lambda(lambda xin: K.sum(xin[0], axis=1) / xin[1], name='aspect_mean')([x_aspect, nonzero_count]) x_l = LSTM(name='sentence_left',return_sequences=True,**self.LSTM_PARAMS)(x_l) x_r = LSTM(go_backwards=True, name='sentence_right',return_sequences=True,**self.LSTM_PARAMS)(x_r) shared_attention = Attention(score_function=self.SCORE_FUNCTION, initializer=self.INITIALIZER, regularizer=self.REGULARIZER, name='shared_attention') x= Concatenate(name='last_shared',axis=1)([x_l,x_r]) x = shared_attention((x, x_aspect)) x= Lambda(lambda x: K.squeeze(x, 1))(x) #twitter task layers tw_x= Dense(self.N_HIDDEN,name='t1_dense_10',**self.DENSE_PARAMS)(x) twitter_x = Dense(self.N_CLASS,name='t1_dense_3',**self.DENSE_PARAMS)(tw_x) twitter_x = Concatenate(name= "twitter_output")([twitter_x,input_dataset]) #rest task layers rest_x= Dense(self.N_HIDDEN,name='t2_dense_10',**self.DENSE_PARAMS)(x) rest_x = Dense(self.N_CLASS,name='t2_dense_3',**self.DENSE_PARAMS)(rest_x) rest_x = Concatenate(name="rest_output")([rest_x,input_dataset]) #general task layers general_x= Dense(self.N_HIDDEN,name='t3_dense_10',**self.DENSE_PARAMS)(x) general_x = Dense(self.N_CLASS,name='t3_dense_3',**self.DENSE_PARAMS)(general_x) general_x = Concatenate(name="general_output")([general_x,input_dataset]) model = Model(inputs=[inputs_l, inputs_r,input_dataset,inputs_aspect], outputs=[twitter_x, rest_x, general_x]) model.summary() if os.path.exists(self.MODEL_TO_LOAD): print('loading saved model...') model.load_weights(self.MODEL_TO_LOAD) self.model = model self.model.compile(loss={'twitter_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[0]), 'rest_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[1]), 'general_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[2])}, optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=[multitask_accuracy, f1])
def train(self, data): """Pretrain the latent layers of the model.""" # network parameters original_dim = data.shape[1] input_shape = (original_dim, ) # build encoder model inputs = Input(shape=input_shape, name='encoder_input') hidden = inputs for i, hidden_dim in enumerate(self.hidden_dim, 1): hidden = Dense(hidden_dim, activation='sigmoid', name='hidden_e_{}'.format(i))(hidden) logger.debug("Hooked up hidden layer with %d neurons" % hidden_dim) z_mean = Dense(params_training.num_latent, activation=None, name='z_mean')(hidden) z_log_sigma = Dense(params_training.num_latent, activation=None, name='z_log_sigma')(hidden) z = Lambda(self.sampling, output_shape=(params_training.num_latent, ), name='z')([z_mean, z_log_sigma]) encoder = Model(inputs, [z_mean, z_log_sigma, z], name='encoder') self.encoder_z_mean = encoder.predict(data)[0] # build decoder model latent_inputs = Input(shape=(params_training.num_latent, ), name='z_sampling') hidden = latent_inputs for i, hidden_dim in enumerate(self.hidden_dim[::-1], 1): # Reverse because decoder. hidden = Dense(hidden_dim, activation='sigmoid', name='hidden_d_{}'.format(i))(hidden) logger.debug("Hooked up hidden layer with %d neurons" % hidden_dim) # if hidden == latent_inputs: # logger.warning("No Hidden layers hooked up.") outputs = Dense(original_dim, activation='sigmoid')(hidden) decoder = Model(latent_inputs, outputs, name='decoder') # Build the CVAE auto-encoder outputs = decoder(encoder(inputs)[2]) cvae_model = Model(inputs, outputs, name='cvae') # Load the pre-trained weights. self.load_pretrain_weights(cvae_model) reconstruction_loss = binary_crossentropy(inputs, outputs) * original_dim kl_loss = 1 + z_log_sigma - tf.square(z_mean) - tf.exp(z_log_sigma) kl_loss = -0.5 * tf.reduce_sum(kl_loss, axis=-1) cvae_model.add_loss(tf.reduce_mean(reconstruction_loss + kl_loss)) cvae_model.compile(optimizer='adam') cvae_model.summary() # First load the weights from the pre-training if self.pretrain_weights: cvae_model = self.load_pretrain_weights(cvae_model) saver = ModelCheckpoint(check_path(TEMPORARY_CVAE_PATH), save_weights_only=True, verbose=1) tensorboard_config = TensorBoard( log_dir=check_path(TEMPORARY_CVAE_PATH)) # train the auto-encoder cvae_model.fit(data, epochs=params_training.num_epochs, batch_size=params_training.batch_size, callbacks=[saver, tensorboard_config]) return self.encoder_z_mean
def generator_model(self): input_image = Input(shape=self.input_shape) #32 layer_1 = Conv2D(self.filter_array[0], kernel_size=(3, 3), padding="SAME")(input_image) layer_1 = BatchNormalization()(layer_1) layer_1 = PReLU(shared_axes=[1, 2])(layer_1) #128 layer_2 = Conv2D(self.filter_array[1], kernel_size=(3, 3), padding="SAME")(layer_1) layer_2 = BatchNormalization()(layer_2) layer_2 = PReLU(shared_axes=[1, 2])(layer_2) #128 layer_3 = Conv2D(self.filter_array[2], kernel_size=(3, 3), padding="SAME")(layer_2) layer_3 = BatchNormalization()(layer_3) layer_3 = PReLU(shared_axes=[1, 2])(layer_3) #128 layer_4 = Conv2D(self.filter_array[3], kernel_size=(3, 3), padding="SAME")(layer_3) layer_4 = BatchNormalization()(layer_4) layer_4 = Add()([PReLU(shared_axes=[1, 2])(layer_4), layer_3]) if self.type_no == 3: #128 layer_5 = Conv2D(self.filter_array[4], kernel_size=(3, 3), padding="SAME")(layer_4) layer_5 = BatchNormalization()(layer_5) layer_5 = Add()([PReLU(shared_axes=[1, 2])(layer_5), layer_4]) #128 layer_6 = Conv2D(self.filter_array[5], kernel_size=(3, 3), padding="SAME")(layer_5) layer_6 = BatchNormalization()(layer_6) layer_6 = Add()([PReLU(shared_axes=[1, 2])(layer_6), layer_5]) layer_7 = Add()([layer_6, layer_2]) #128 layer_7 = Conv2D(self.filter_array[6], kernel_size=(5, 5), padding="SAME")(layer_6) layer_7 = BatchNormalization()(layer_7) #layer_7 = Add()([PReLU(shared_axes=[1,2])(layer_7),layer_6]) #512 layer_8 = Conv2D(self.filter_array[7], kernel_size=(5, 5), padding="SAME")(layer_7) layer_8 = BatchNormalization()(layer_8) layer_8 = PReLU(shared_axes=[1, 2])(layer_8) layer_8 = Add()([PReLU(shared_axes=[1, 2])(layer_8), layer_7]) #if self.inflow_layer!=None: # layer_8 = Add()([layer_8,self.inflow_layer]) #512 layer_9 = Conv2D(self.filter_array[8], kernel_size=(7, 7), padding="SAME")(layer_8) layer_9 = SubpixelConv2D(layer_9.shape, scale=2)(layer_9) layer_9 = PReLU(shared_axes=[1, 2])(layer_9) layer = layer_9 elif self.type_no == 1: #128 layer_4 = Add()([PReLU(shared_axes=[1, 2])(layer_4), layer_2]) layer_5 = Conv2D(self.filter_array[4], kernel_size=(3, 3), padding="SAME")(layer_4) layer_5 = BatchNormalization()(layer_5) layer_5 = PReLU(shared_axes=[1, 2])(layer_5) # layer_5 = Add()([PReLU(shared_axes=[1,2])(layer_5),layer_4] #256 layer_5 = SubpixelConv2D(self.input_shape, scale=2)(layer_5) layer = layer_5 elif self.type_no == 2: layer_4 = Add()([PReLU(shared_axes=[1, 2])(layer_4), layer_2]) #256 layer_5 = Conv2D(self.filter_array[4], kernel_size=(5, 5), padding="SAME")(layer_4) layer_5 = BatchNormalization()(layer_5) layer_5 = PReLU(shared_axes=[1, 2])(layer_5) layer_6 = Conv2D(self.filter_array[4], kernel_size=(5, 5), padding="SAME")(layer_5) layer_6 = BatchNormalization()(layer_6) layer_6 = PReLU(shared_axes=[1, 2])(layer_6) layer_6 = Add()([PReLU(shared_axes=[1, 2])(layer_6), layer_5]) #if self.inflow_layer!=None: #layer_5 = Add()([layer_5, self.inflow_layer]) #512 layer_7 = Conv2D(self.filter_array[6], kernel_size=(7, 7), padding="SAME")(layer_6) layer_7 = SubpixelConv2D(layer_7.shape, scale=2)(layer_7) layer_7 = PReLU(shared_axes=[1, 2])(layer_7) layer = layer_7 out_layer = Conv2D(3, kernel_size=(1, 1), activation='tanh')(layer) out_layer = Lambda(lambda x: (x + 1) * 127.5)(out_layer) model = Model(inputs=input_image, outputs=out_layer) outflow_layer = layer if self.path != None: model.load_weights(self.path) print('Loaded g_%s weights', str(self.type_no)) return model if self.flag: model.summary() return model else: model.compile(loss=self.loss, optimizer=self.optimizer) model.summary() #model = load_model('/home/mdo2/sid_codes/new_codes/model.h5') #if self.path!=None: #model.load_weights(self.path) #print('Loaded g_%s weights',str(self.type_no)) return model
class TmClassify: def __init__(self): self.input_shape = (224,224) self.filter_count = 32 self.kernel_size = (3, 3) self.leakrelu_alpha = 0.2 self.encoder = self.createEncoder() Input1 = Input(shape=(224,224,3)) Input2 = Input(shape=(224,224,3)) # target = Dot(axes=1)([self.encoder(Input1), self.encoder(Input2)]) x = concatenate(inputs = [self.encoder(Input1), self.encoder(Input2)]) target = Dense(1)(x) self.discriminator = self.createDiscriminator() y = self.discriminator(x) self.model = Model(inputs=[Input1,Input2],outputs=y) self.model.summary() self.pathlist = pathlist self.train_data_count = [len(i) for i in self.pathlist] op = Adam(lr=0.0001) self.model.compile(optimizer=op,loss='mse',metrics=['accuracy']) self.pad_param = 5 self.rotate_degree_param = 90 def createEncoder(self): base_model=InceptionV3(input_shape=(224,224,3),weights=None,include_top=False) x=base_model.output x=GlobalAveragePooling2D()(x) x = LayerNormalization()(x) model=Model(inputs=base_model.input,outputs=x) return model def createDiscriminator(self): x = Input(shape = 4096) target = Dense(1)(x) model = Model(inputs=x,outputs=target) return model def gen_data(self,path): img_pil = Image.open(path).convert('RGB') # img_pil.save("test.jpg") pad_top = int(abs(np.random.uniform(0,self.pad_param))) pad_bottom = int(abs(np.random.uniform(0,self.pad_param))) pad_left = int(abs(np.random.uniform(0,self.pad_param))) pad_right = int(abs(np.random.uniform(0,self.pad_param))) rotate_param = np.random.uniform(0,self.rotate_degree_param) flip_flag = np.random.randint(0,1) mirror_flag = np.random.randint(0,1) if(flip_flag): img_pil = ImageOps.flip(img_pil) if(mirror_flag): img_pil = ImageOps.mirror(img_pil) blur_rad = np.random.normal(loc=0.0, scale=1, size=None) img_pil = img_pil.filter(ImageFilter.GaussianBlur(blur_rad)) enhancer_contrat = ImageEnhance.Contrast(img_pil) enhancer_brightness = ImageEnhance.Brightness(img_pil) enhancer_color = ImageEnhance.Color(img_pil) contrast_factor = np.random.normal(loc=1.0, scale=0.25, size=None) color_factor = np.max([0,1-abs(np.random.normal(loc=0, scale=0.5, size=None))]) translate_factor_hor = np.random.normal(loc=0, scale=5, size=None) translate_factor_ver = np.random.normal(loc=0, scale=5, size=None) brightness_factor = np.random.normal(loc=1.0, scale=0.5, size=None) img_pil = enhancer_contrat.enhance(contrast_factor) img_pil = enhancer_brightness.enhance(brightness_factor) img_pil = enhancer_color.enhance(color_factor) img_pil = ImageChops.offset(img_pil, int(translate_factor_hor), int(translate_factor_ver)) img_pil = img_pil.rotate(rotate_param,resample = Image.BILINEAR,expand = True, fillcolor = (255)) img = np.asarray(img_pil) img = cv2.copyMakeBorder(img, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT,value=(255,255,255)) img= cv2.resize(img, dsize=(self.input_shape)) img = img/127.5 - 1 return img def train(self,start_epoch, max_epoch, batch_size, viz_interval): max_step = sum([len(i) for i in self.pathlist]) // batch_size # permu_ind = list(range(len(self.pathlist))) step = 0 for epoch in range(start_epoch,max_epoch): # permu_ind = np.random.permutation(permu_ind) real_index = 0 for step_index in range(max_step): batch_img1 = np.zeros((batch_size,self.input_shape[0],self.input_shape[1],3)) batch_img2 = np.zeros((batch_size,self.input_shape[0],self.input_shape[1],3)) batch_target = np.zeros(batch_size) for batch_index in range(batch_size//2+1): random_permu = np.random.permutation(len(sdir)) filelist = glob2.glob(sdir[batch_index]+'/*') file_permu = np.random.permutation(len(filelist)) img1 = self.gen_data(filelist[file_permu[0]]) img2 = self.gen_data(filelist[file_permu[1]]) batch_target[batch_index] = 1 batch_img1[batch_index] = img1 batch_img2[batch_index] = img2 real_index = real_index+1 for batch_index in range(batch_size//2,batch_size): random_permu = np.random.permutation(len(sdir)) filelist1 = glob2.glob(sdir[batch_index]+'/*') filelist2 = glob2.glob(sdir[-1-(-1*batch_index)]+'/*') file_permu1 = np.random.permutation(len(filelist1)) file_permu2 = np.random.permutation(len(filelist2)) img1 = self.gen_data(filelist1[file_permu1[0]]) img2 = self.gen_data(filelist2[file_permu2[0]]) batch_target[batch_index] = 0 batch_img1[batch_index] = img1 batch_img2[batch_index] = img2 real_index = real_index+1 train_loss = self.model.train_on_batch([batch_img1,batch_img2],batch_target) with train_summary_writer.as_default(): tf.summary.scalar('loss', train_loss[0], step=step) tf.summary.scalar('accuracy', train_loss[1], step=step) step = step + 1 # train_summary_writer = tf.summary.create_file_writer(train_log_dir) print('\r epoch ' + str(epoch) + ' / ' + str(max_epoch) + ' ' + 'step ' + str(step_index) + ' / ' + str(max_step) + ' loss = ' + str(train_loss)) if(step_index%viz_interval==0): self.encoder.save('DIPencoder.h5') self.discriminator.save('DIPdiscriminator.h5') self.model.save('DIPMatch.h5')
def get_unet(img_rows, img_cols, first_layer_num_filters=32, num_classes=1): inputs = Input((img_rows, img_cols, 3)) ###### ENCODER BRANCH ###### # leaky_relu = LeakyReLU(alpha=0.2) leaky_relu = LeakyReLU(alpha=0.2) first_conv_block = convolution_block(input_layer=inputs, num_filters=first_layer_num_filters, kernel_size=(3, 3), activation_func=leaky_relu) conv_ds_block_1 = conv_downsample_block( input_layer=first_conv_block, num_filters=first_layer_num_filters * 2, kernel_size=(3, 3), activation_func=leaky_relu, max_pool_shape=(2, 2)) conv_ds_block_2 = conv_downsample_block( input_layer=conv_ds_block_1, num_filters=first_layer_num_filters * 4, kernel_size=(3, 3), activation_func=leaky_relu, max_pool_shape=(2, 2)) conv_ds_block_3 = conv_downsample_block( input_layer=conv_ds_block_2, num_filters=first_layer_num_filters * 8, kernel_size=(3, 3), activation_func=leaky_relu, max_pool_shape=(2, 2)) ##### BOTTOM OF U-SHAPE ##### bottom_conv_block = conv_downsample_block( input_layer=conv_ds_block_3, num_filters=first_layer_num_filters * 16, kernel_size=(3, 3), activation_func=leaky_relu, max_pool_shape=(2, 2)) ###### DECODER BRANCH ###### conv_us_block_1 = conv_upsample_block( input_layer=bottom_conv_block, skip_connection_layer=conv_ds_block_3, num_filters=first_layer_num_filters * 8, kernel_size=(3, 3), activation_func=leaky_relu, upsampling_shape=(2, 2)) conv_us_block_2 = conv_upsample_block( input_layer=conv_us_block_1, skip_connection_layer=conv_ds_block_2, num_filters=first_layer_num_filters * 4, kernel_size=(3, 3), activation_func=leaky_relu, upsampling_shape=(2, 2)) conv_us_block_3 = conv_upsample_block( input_layer=conv_us_block_2, skip_connection_layer=conv_ds_block_1, num_filters=first_layer_num_filters * 2, kernel_size=(3, 3), activation_func=leaky_relu, upsampling_shape=(2, 2)) last_conv_block = conv_upsample_block( input_layer=conv_us_block_3, skip_connection_layer=first_conv_block, num_filters=first_layer_num_filters, kernel_size=(3, 3), activation_func=leaky_relu, upsampling_shape=(2, 2)) output_layer = Conv2D(num_classes, (1, 1), activation='sigmoid')(last_conv_block) model = Model(inputs=[inputs], outputs=[output_layer]) sgd_opt = SGD(lr=0.01, momentum=0.0, decay=0.0001, nesterov=False) model.compile(optimizer=Adam(lr=1e-4, decay=1e-5), loss=dice_coef_loss, metrics=['accuracy']) # decay=1e-5 print(model.summary()) return model
class SqueezeNet0(BaseModel): # TODO add documentation def __init__(self, nb_classes, bypass=False, optimizer="adam", logdir=None): # get model's attributes super().__init__() self.nb_classes = nb_classes self.logdir = logdir # initialize model if not os.path.exists(self.logdir): os.mkdir(self.logdir) self.model = None self.trained = False self.history = None # build model self._build_model(bypass, optimizer) def fit(self, train_data, train_label, validation_data, validation_label, batch_size, nb_epochs): # TODO exploit 'sample_weight' # TODO implement resumed training with 'initial_epoch' # TODO add documentation callbacks = [] # define checkpoints if self.logdir is not None: # create checkpoint callback checkpoint_path = os.path.join(self.logdir, "cp-{epoch}.ckpt") cp_callback = ModelCheckpoint(filepath=checkpoint_path, verbose=1) callbacks.append(cp_callback) # TODO debug early stopping # define early stopping early_stop = EarlyStopping(monitor="val_categorical_accuracy", min_delta=0, patience=5, verbose=2) callbacks.append(early_stop) # fit model self.history = self.model.fit(x=train_data, y=train_label, batch_size=batch_size, epochs=nb_epochs, verbose=2, callbacks=callbacks, validation_data=(validation_data, validation_label), shuffle=True, sample_weight=None, initial_epoch=0) # update model attribute self.trained = True return def fit_generator(self, train_generator, validation_generator, nb_epochs, nb_workers=1, multiprocessing=False): # TODO implement multiprocessing # TODO exploit an equivalent of 'sample_weight' # TODO implement resumed training with 'initial_epoch' # TODO add documentation # TODO check distribution strategy during compilation # TODO check callbacks parameters # check generators if train_generator.nb_epoch_max is not None: Warning("Train generator must loop indefinitely over the data. " "The parameter 'nb_epoch_max' is set to None.") train_generator.nb_epoch_max = None if validation_generator.nb_epoch_max is not None: Warning("Validation generator must loop indefinitely over the " "data. The parameter 'nb_epoch_max' is set to None.") validation_generator.nb_epoch_max = None callbacks = [] # define checkpoints if self.logdir is not None: # create checkpoint callback checkpoint_path = os.path.join(self.logdir, "cp-{epoch}.ckpt") cp_callback = ModelCheckpoint(filepath=checkpoint_path, verbose=1) callbacks.append(cp_callback) # define early stopping early_stop = EarlyStopping(monitor='val_categorical_accuracy', min_delta=0, patience=5, verbose=2) callbacks.append(early_stop) # fit model from generator steps_per_epoch = train_generator.nb_batch_per_epoch self.history = self.model.fit_generator( generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=nb_epochs, verbose=2, callbacks=callbacks, validation_data=validation_generator, validation_steps=validation_generator.nb_batch_per_epoch, max_queue_size=10, workers=nb_workers, use_multiprocessing=multiprocessing, initial_epoch=0) # update model attribute self.trained = True return def predict(self, data, return_probability=False): # compute probabilities probability = self.predict_probability(data=data) # make prediction prediction = np.argmax(probability, axis=-1) if return_probability: return prediction, probability else: return prediction def predict_probability(self, data): # compute probabilities probability = self.model.predict(x=data) return probability def predict_generator(self, generator, return_probability=False, nb_workers=1, multiprocessing=False, verbose=0): # compute probabilities probability = self.predict_probability_generator( generator=generator, nb_workers=nb_workers, multiprocessing=multiprocessing, verbose=verbose) # make prediction prediction = np.argmax(probability, axis=-1) if return_probability: return prediction, probability else: return prediction def predict_probability_generator(self, generator, nb_workers=1, multiprocessing=False, verbose=0): # TODO add multiprocessing # compute probabilities probability = self.model.predict_generator( generator=generator, steps=generator.nb_batch_per_epoch, workers=nb_workers, max_queue_size=1, use_multiprocessing=multiprocessing, verbose=verbose) return probability def evaluate(self, data, label, verbose=0): # evaluate model loss, accuracy = self.model.evaluate(x=data, y=label) if verbose > 0: print("Loss: {0:.3f} | Accuracy: {1:.3f}".format( loss, 100 * accuracy)) return loss, accuracy def evaluate_generator(self, generator, nb_workers=1, multiprocessing=False, verbose=0): # TODO check the outcome 'loss' and 'accuracy' # evaluate model loss, accuracy = self.model.evaluate_generator( generator=generator, steps=generator.nb_batch_per_epoch, workers=nb_workers, max_queue_size=1, use_multiprocessing=multiprocessing, verbose=verbose) if verbose > 0: print("Loss: {0:.3f} | Accuracy: {1:.3f}".format( loss, 100 * accuracy)) return loss, accuracy def _build_model(self, bypass, optimizer): # build model architecture input_ = Input(shape=(224, 224, 3), name="input", dtype="float32") logit_ = squeezenet_network_v0(input_tensor=input_, nb_classes=self.nb_classes, bypass=bypass) output_ = squeezenet_classifier(logit=logit_) self.model = Model(inputs=input_, outputs=output_, name="SqueezeNet_v0") # get optimizer self.optimizer = get_optimizer(optimizer_name=optimizer) # compile model self.model.compile(optimizer=self.optimizer, loss="categorical_crossentropy", metrics=["categorical_accuracy"]) def print_model(self): print(self.model.summary(), "\n") def get_weight(self, latest=True, checkpoint_name="cp.ckpt"): # TODO fix the loose of the optimizer state # load weights from a training checkpoint if it exists if self.logdir is not None and os.path.isdir(self.logdir): # the last one... if latest: checkpoint_path = tf.train.latest_checkpoint(self.logdir) # ...or a specific one else: checkpoint_path = os.path.join(self.logdir, checkpoint_name) # load weights self.model.load_weights(checkpoint_path) self.trained = True else: raise ValueError("Impossible to load pre-trained weights. The log " "directory is not specified or does not exist.") def save_training_history(self): """Save the loss and accuracy of the train and validation data over the different epochs. Returns ------- """ if self.logdir is not None: path = os.path.join(self.logdir, "history.npz") np.savez(path, loss=self.history.history["loss"], categorical_accuracy=self.history.history["loss"], val_loss=self.history.history["loss"], val_categorical_accuracy=self.history.history["loss"]) return def get_feature_map(self, generator, after_average_pooling=True): # TODO add documentation # get input layer input_ = self.model.input # get embedding layer if after_average_pooling: output_ = self.model.layers[-2].output else: output_ = self.model.layers[-3].output # define the steps to compute the feature map features_map = function([input_, learning_phase()], [output_]) # compute the feature map if generator.with_label: embedding = [ features_map([batch, 0])[0] for (batch, _) in generator ] else: embedding = [features_map([batch, 0])[0] for batch in generator] embedding = np.array(embedding) embedding = np.concatenate(embedding, axis=0) if not after_average_pooling: a, b, c, d = embedding.shape embedding = np.reshape(embedding, (a, b * c * d)) return embedding
class HybridCNN: def __init__(self, word_embedding_map, code_embedding_map, word_tokenizer, code_tokenizer, model_config): self.model_config = model_config self.word_tokenizer = word_tokenizer self.code_tokenizer = code_tokenizer self.word_embedding_map = word_embedding_map self.code_embedding_map = code_embedding_map self.model = None def create_model(self): # Declaration for KimCNN-based word encoder word_encoder_input = Input(shape=(self.model_config.max_word_len,), dtype='int32') word_embedding_layer = Embedding(len(self.word_tokenizer.word_index) + 1, self.model_config.word_embedding_dim, weights=[self.word_embedding_map], input_length=self.model_config.max_word_len, trainable=False) embedded_word_sequences = word_embedding_layer(word_encoder_input) w_conv1 = Conv1D(100, 3, activation='relu', padding='same')(embedded_word_sequences) w_pool1 = GlobalMaxPool1D()(w_conv1) w_conv2 = Conv1D(100, 4, activation='relu', padding='same')(embedded_word_sequences) w_pool2 = GlobalMaxPool1D()(w_conv2) w_conv3 = Conv1D(100, 5, activation='relu', padding='same')(embedded_word_sequences) w_pool3 = GlobalMaxPool1D()(w_conv3) w_concat1 = Concatenate()([w_pool1, w_pool2, w_pool3]) word_encoder = Model(word_encoder_input, w_concat1) # Declaration for KimCNN-based code encoder code_encoder_input = Input(shape=(self.model_config.max_code_len,), dtype='int32') code_embedding_layer = Embedding(len(self.code_tokenizer.word_index) + 1, self.model_config.code_embedding_dim, weights=[self.code_embedding_map], input_length=self.model_config.max_code_len, trainable=False) embedded_code_sequences = code_embedding_layer(code_encoder_input) c_conv1 = Conv1D(100, 3, activation='relu', padding='same')(embedded_code_sequences) c_pool1 = GlobalMaxPool1D()(c_conv1) c_conv2 = Conv1D(100, 4, activation='relu', padding='same')(embedded_code_sequences) c_pool2 = GlobalMaxPool1D()(c_conv2) c_conv3 = Conv1D(100, 5, activation='relu', padding='same')(embedded_code_sequences) c_pool3 = GlobalMaxPool1D()(c_conv3) c_concat1 = Concatenate()([c_pool1, c_pool2, c_pool3]) code_encoder = Model(code_encoder_input, c_concat1) # Similarity classifier using the word and code encoders word_input1 = Input(shape=(self.model_config.max_word_len,), dtype='int32') word_input2 = Input(shape=(self.model_config.max_word_len,), dtype='int32') code_input1 = Input(shape=(self.model_config.max_code_len,), dtype='int32') code_input2 = Input(shape=(self.model_config.max_code_len,), dtype='int32') l_concat1 = Concatenate()([word_encoder(word_input1), word_encoder(word_input2), code_encoder(code_input1), code_encoder(code_input2)]) l_dense1 = Dense(self.model_config.hidden_dim, activation='relu')(l_concat1) l_dropout1 = Dropout(self.model_config.dropout)(l_dense1) l_dense2 = Dense(self.model_config.hidden_dim, activation='relu')(l_dropout1) l_dropout2 = Dropout(self.model_config.dropout)(l_dense2) preds = Dense(self.model_config.num_classes, activation='softmax')(l_dropout2) self.model = Model([word_input1, word_input2, code_input1, code_input2], preds) self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) self.model.summary() def train(self, train_xw1, train_xw2, train_xc1, train_xc2, train_y, evaluate_xw1, evaluate_xw2, evaluate_xc1, evaluate_xc2, evaluate_y, **kwargs): iteration = 0 if 'iteration' in kwargs: iteration = kwargs['iteration'] early_stopping_callback = EarlyStopping(patience=self.model_config.patience, monitor='val_acc') checkpoint_callback = ModelCheckpoint(filepath="data/checkpoints/hybrid_cnn/%s_%d.hdf5" % (self.model_config.dataset, iteration), monitor='val_acc', verbose=1, save_best_only=True) self.model.fit([train_xw1, train_xw2, train_xc1, train_xc2], train_y, validation_data=([evaluate_xw1, evaluate_xw2, evaluate_xc1, evaluate_xc2], evaluate_y), epochs=self.model_config.epochs, batch_size=self.model_config.batch_size, callbacks=[early_stopping_callback, checkpoint_callback]) self.model.load_weights(filepath="data/checkpoints/hybrid_cnn/%s_%d.hdf5" % (self.model_config.dataset, iteration)) def predict(self, predict_xw1, predict_xw2, predict_xc1, predict_xc2): return self.model.predict([predict_xw1, predict_xw2, predict_xc1, predict_xc2]) def evaluate(self, evaluate_xw1, evaluate_xw2, evaluate_xc1, evaluate_xc2, evaluate_y): predict_y = self.predict(evaluate_xw1, evaluate_xw2, evaluate_xc1, evaluate_xc2).argmax(axis=1) evaluate_y = evaluate_y.argmax(axis=1) return {"individual": precision_recall_fscore_support(evaluate_y, predict_y), "micro-average": precision_recall_fscore_support(evaluate_y, predict_y, average="micro")} def cross_val(self, data_xw1, data_xw2, data_xc1, data_xc2, data_y, n_splits=5): skf = StratifiedKFold(n_splits, shuffle=False, random_state=157) print("Performing cross validation (%d-fold)..." % n_splits) iteration = 1 mean_accuracy = 0 recall_list = [0 for _ in range(self.model_config.num_classes)] precision_list = [0 for _ in range(self.model_config.num_classes)] for train_index, test_index in skf.split(data_xw1, data_y.argmax(axis=1)): self.create_model() print("Iteration %d of %d" % (iteration, n_splits)) self.train(data_xw1[train_index], data_xw2[train_index], data_xc1[train_index], data_xc2[train_index], data_y[train_index], data_xw1[test_index], data_xw2[test_index], data_xc1[test_index], data_xc2[test_index], data_y[test_index], iteration=iteration) metrics = self.evaluate(data_xw1[test_index], data_xw2[test_index], data_xc1[test_index], data_xc2[test_index], data_y[test_index]) precision_list = [x + y for x, y in zip(metrics['individual'][0], precision_list)] recall_list = [x + y for x, y in zip(metrics['individual'][1], recall_list)] mean_accuracy += metrics['micro-average'][0] print("Precision, Recall, F_Score, Support") iteration += 1 print(metrics) print("Mean accuracy: %s Mean precision: %s, Mean recall: %s" % (mean_accuracy/n_splits, [precision/n_splits for precision in precision_list], [recall/n_splits for recall in recall_list]))
def test_cyclegan(): mnist_shape, mnist_images = load_mnist() cats = load_input_images() nb_epochs = 50 batch_size = 512 adam_lr = 0.0002 adam_beta_1 = 0.5 adam_decay = 0 # adam_lr/(nb_epochs*120) cyc_multiplier = 10 history_size = int(batch_size * 7 / 3) SAVEPATH = os.path.abspath( os.path.join(os.path.dirname(__file__), 'output')) generation_history_mnist = None generation_history_cats = None adam_optimizer = Adam(lr=adam_lr, beta_1=adam_beta_1, decay=adam_decay) generator_cats = mnist_generator(mnist_shape) generator_cats.compile(optimizer=adam_optimizer, loss='mean_squared_error') generator_cats.summary() discriminator_cats = mnist_discriminator(mnist_shape) discriminator_cats.compile(optimizer=adam_optimizer, loss=discriminator_loss) discriminator_cats.summary() generator_mnist = mnist_generator(mnist_shape) generator_mnist.compile(optimizer=adam_optimizer, loss='mean_squared_error') generator_mnist.summary() discriminator_mnist = mnist_discriminator(mnist_shape) discriminator_mnist.compile(optimizer=adam_optimizer, loss=discriminator_loss) discriminator_mnist.summary() mnist_input = Input(mnist_shape) cat_input = Input(mnist_shape) fake_cat = generator_cats(mnist_input) fake_mnist = generator_mnist(cat_input) # Only train discriminator during first phase # (trainable only affects new models when they are compiled) discriminator_cats.trainable = False discriminator_mnist.trainable = False mnist_gen_trainer = Model(cat_input, discriminator_mnist(fake_mnist)) mnist_gen_trainer.compile(optimizer=adam_optimizer, loss='mean_squared_error') mnist_gen_trainer.summary() cats_gen_trainer = Model(mnist_input, discriminator_cats(fake_cat)) cats_gen_trainer.compile(optimizer=adam_optimizer, loss='mean_squared_error') cats_gen_trainer.summary() mnist_cyc = Model(mnist_input, generator_mnist(fake_cat)) mnist_cyc.compile(optimizer=adam_optimizer, loss=cycle_loss, loss_weights=[cyc_multiplier]) mnist_cyc.summary() cats_cyc = Model(cat_input, generator_cats(fake_mnist)) cats_cyc.compile(optimizer=adam_optimizer, loss=cycle_loss, loss_weights=[cyc_multiplier]) cats_cyc.summary() # training time mnist_discrim_loss = [] cats_discrim_loss = [] mnist_gen_loss = [] cats_gen_loss = [] mnist_cyc_loss = [] cats_cyc_loss = [] if not os.path.exists(SAVEPATH): os.makedirs(os.path.join(SAVEPATH, 'images')) for epoch in range(nb_epochs): print("\n\n================================================") print("Epoch", epoch, '\n') if epoch == 0: # Initialize history for training the discriminator mnist_indices = np.random.choice(mnist_images.shape[0], history_size) generation_history_mnist = mnist_images[mnist_indices] cats_indices = np.random.choice(cats.shape[0], history_size) generation_history_cats = cats[cats_indices] # Make and save a test collage choice = np.random.choice(mnist_images.shape[0]) if k.image_data_format() == 'channels_first': mnist_in = mnist_images[choice].reshape((1, 1, 28, 28)) else: mnist_in = mnist_images[choice].reshape((1, 28, 28, 1)) cat_out = generator_cats.predict(mnist_in) mnist_cyc_out = generator_mnist.predict(cat_out) choice = np.random.choice(cats.shape[0]) if k.image_data_format() == 'channels_first': cat_in = cats[choice].reshape((1, 1, 28, 28)) else: cat_in = cats[choice].reshape((1, 28, 28, 1)) mnist_out = generator_mnist.predict(cat_in) cat_cyc_out = generator_cats.predict(mnist_out) mnist_test_images = np.concatenate( (prettify(mnist_in), prettify(cat_out), prettify(mnist_cyc_out)), axis=1) cat_test_images = np.concatenate( (prettify(cat_in), prettify(mnist_out), prettify(cat_cyc_out)), axis=1) test_collage = np.concatenate((mnist_test_images, cat_test_images), axis=0) test_collage = Image.fromarray(test_collage, mode='L') test_collage.save(os.path.join(SAVEPATH, 'images', str(epoch) + '.png')) for batch in range(int(mnist_images.shape[0] / batch_size)): print("\nEpoch", epoch, "| Batch", batch) # Get batch. mnist_indices = np.random.choice(mnist_images.shape[0], batch_size) mnist_batch_real = mnist_images[mnist_indices] cats_indices = np.random.choice(cats.shape[0], batch_size) cats_batch_real = cats[cats_indices] # Update history with new generated images. mnist_batch_gen = generator_mnist.predict_on_batch(cats_batch_real) cats_batch_gen = generator_cats.predict_on_batch(mnist_batch_real) generation_history_mnist = np.concatenate( (generation_history_mnist[batch_size:], mnist_batch_gen)) generation_history_cats = np.concatenate( (generation_history_cats[batch_size:], cats_batch_gen)) # Train discriminators. real_label = np.ones(batch_size) fake_label = np.zeros(batch_size) mnist_discrim_loss.append( discriminator_mnist.train_on_batch( np.concatenate((generation_history_mnist[:batch_size], mnist_batch_real)), np.concatenate((fake_label, real_label)))) print("MNIST Discriminator Loss:", mnist_discrim_loss[-1]) cats_discrim_loss.append( discriminator_cats.train_on_batch( np.concatenate((generation_history_cats[:batch_size], cats_batch_real)), np.concatenate((fake_label, real_label)))) print("Cats Discriminator Loss:", cats_discrim_loss[-1]) # Train generators. mnist_gen_loss.append( mnist_gen_trainer.train_on_batch(cats_batch_real, real_label)) print("MNIST Generator Loss:", mnist_gen_loss[-1]) cats_gen_loss.append( cats_gen_trainer.train_on_batch(mnist_batch_real, real_label)) print("Cats Generator Loss:", cats_gen_loss[-1]) mnist_cyc_loss.append( mnist_cyc.train_on_batch(mnist_batch_real, mnist_batch_real)) print("MNIST Cyclic Loss:", mnist_cyc_loss[-1]) cats_cyc_loss.append( cats_cyc.train_on_batch(cats_batch_real, cats_batch_real)) print("Cats Cyclic Loss:", cats_cyc_loss[-1]) # Save models. generator_cats.save(os.path.join(SAVEPATH, 'generator_cats.h5')) generator_mnist.save(os.path.join(SAVEPATH, 'generator_mnist.h5')) discriminator_cats.save(os.path.join(SAVEPATH, 'discriminator_cats.h5')) discriminator_mnist.save(os.path.join(SAVEPATH, 'discriminator_mnist.h5')) # Save training history. output_dict = { 'mnist_discrim_loss': [str(loss) for loss in mnist_discrim_loss], 'cats_discrim_loss': [str(loss) for loss in cats_discrim_loss], 'mnist_gen_loss': [str(loss) for loss in mnist_gen_loss], 'cats_gen_loss': [str(loss) for loss in cats_gen_loss], 'mnist_cyc_loss': [str(loss) for loss in mnist_cyc_loss], 'cats_cyc_loss': [str(loss) for loss in cats_cyc_loss] } with open(os.path.join(SAVEPATH, 'log.txt'), 'w') as f: json.dump(output_dict, f, indent=4)
def build_autoencoder(self, param): autoencoder = None input_img = Input(shape=(param.get('image_size'), param.get('image_size'), param.get('image_channels')), name='input') x = Conv2D(256, (param.get('cae_conv_size'), param.get('cae_conv_size')), activation='relu', padding='same')(input_img) # tanh? x = MaxPooling2D( (param.get('cae_max_pool_size'), param.get('cae_max_pool_size')), padding='same')(x) x = Conv2D(128, (param.get('cae_conv_size'), param.get('cae_conv_size')), activation='relu', padding='same')(x) x = MaxPooling2D( (param.get('cae_max_pool_size'), param.get('cae_max_pool_size')), padding='same')(x) x = Conv2D(128, (param.get('cae_conv_size'), param.get('cae_conv_size')), activation='relu', padding='same')(x) x = MaxPooling2D( (param.get('cae_max_pool_size'), param.get('cae_max_pool_size')), padding='same')(x) x = Flatten()(x) encoded = Dense(param.get('code_size'), name='encoded')(x) print('encoded shape ', encoded.shape) ims = 8 first = True x = Dense(int(ims * ims), activation='relu')(encoded) x = Reshape(target_shape=(ims, ims, 1))(x) # -12 while ims != param.get('image_size'): x = Conv2D( int(ims * ims / 2), (param.get('cae_conv_size'), param.get('cae_conv_size')), activation='relu', padding='same')(x) x = UpSampling2D((param.get('cae_max_pool_size'), param.get('cae_max_pool_size')))(x) ims = ims * param.get('cae_max_pool_size') decoded = Conv2D( param.get('image_channels'), (param.get('cae_conv_size'), param.get('cae_conv_size')), activation='sigmoid', padding='same', name='decoded')(x) print('decoded shape ', decoded.shape) autoencoder = Model(input_img, decoded) autoencoder.compile(optimizer='adam', loss='mean_squared_error') # Create a separate encoder model encoder = Model(input_img, encoded) encoder.compile(optimizer='adam', loss='mean_squared_error') encoder.summary() # Create a separate decoder model decoder_inp = Input(shape=(param.get('code_size'), )) # decoder_inp = Input(shape=encoded.output_shape) enc_layer_idx = utils.getLayerIndexByName(autoencoder, 'encoded') print('encoder layer idx ', enc_layer_idx) decoder_layer = autoencoder.layers[enc_layer_idx + 1](decoder_inp) for i in range(enc_layer_idx + 2, len(autoencoder.layers)): decoder_layer = autoencoder.layers[i](decoder_layer) decoder = Model(decoder_inp, decoder_layer) decoder.compile(optimizer='adam', loss='mean_squared_error') decoder.summary() return autoencoder, encoder, decoder
def get_model(weights=None, verbose=True, **kwargs): for k, v in kwargs.items(): assert k in PARAMS PARAMS[k] = v if verbose: print("Model hyper-parameters:", PARAMS) dhw = PARAMS['dhw'] first_scale = PARAMS['first_scale'] first_layer = PARAMS['first_layer'] kernel_initializer = PARAMS['kernel_initializer'] weight_decay = PARAMS['weight_decay'] down_structure = PARAMS['down_structure'] output_size = PARAMS['output_size'] shape = dhw + [1] inputs = Input(shape=shape) if first_scale is not None: scaled = Lambda(first_scale)(inputs) else: scaled = inputs conv = Conv3D(first_layer, kernel_size=(3, 3, 3), padding='same', use_bias=True, kernel_initializer=kernel_initializer, kernel_regularizer=l2_penalty(weight_decay))(scaled) downsample_times = len(down_structure) top_down = [] for l, n in enumerate(down_structure): db = _dense_block(conv, n) top_down.append(db) conv = _transmit_block(db, l == downsample_times - 1) feat = top_down[-1] for top_feat in reversed(top_down[:-1]): *_, f = top_feat.get_shape().as_list() deconv = Conv3DTranspose(filters=f, kernel_size=2, strides=2, use_bias=True, kernel_initializer=kernel_initializer, kernel_regularizer=l2_penalty(weight_decay))(feat) feat = add([top_feat, deconv]) seg_head = Conv3D(1, kernel_size=(1, 1, 1), padding='same', activation='sigmoid', use_bias=True, kernel_initializer=kernel_initializer, kernel_regularizer=l2_penalty(weight_decay), name='seg')(feat) if output_size == 1: last_activation = 'sigmoid' else: last_activation = 'softmax' clf_head = Dense(output_size, activation=last_activation, kernel_regularizer=l2_penalty(weight_decay), kernel_initializer=kernel_initializer, name='clf')(conv) model = Model(inputs, [clf_head, seg_head]) if verbose: model.summary() if weights is not None: model.load_weights(weights) return model
def __init__(self): self.HOPS = 7 self.SCORE_FUNCTION = 'mlp' # scaled_dot_product / mlp (concat) / bi_linear (general dot) self.DATASET = 'twitter' # 'twitter', 'restaurant', 'laptop' self.POLARITIES_DIM = 3 self.EMBEDDING_DIM = 300 self.LEARNING_RATE = 0.01 self.INITIALIZER = initializers.RandomUniform(minval=-0.003, maxval=0.003) self.REGULARIZER = regularizers.l2(0.001) self.MAX_SEQUENCE_LENGTH = 80 self.MAX_ASPECT_LENGTH = 10 self.BATCH_SIZE = 200 self.EPOCHS = 100 self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \ self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, \ self.polarities_matrix, \ self.embedding_matrix, \ self.tokenizer = \ read_dataset(type=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) if os.path.exists('dmn_saved_model.h5'): print('loading saved model...') self.model = load_model('dmn_saved_model.h5') else: print('Build model...') inputs_sentence = Input(shape=(self.MAX_SEQUENCE_LENGTH, ), name='inputs_sentence') inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH, ), name='inputs_aspect') nonzero_count = Lambda(lambda xin: tf.count_nonzero( xin, dtype=tf.float32))(inputs_aspect) memory = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False, name='sentence_embedding')(inputs_sentence) memory = Lambda(self.locationed_memory, name='locationed_memory')(memory) aspect = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_ASPECT_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False, name='aspect_embedding')(inputs_aspect) x = Lambda(lambda xin: K.sum(xin[0], axis=1) / xin[1], name='aspect_mean')([aspect, nonzero_count]) shared_attention = Attention(score_function=self.SCORE_FUNCTION, initializer=self.INITIALIZER, regularizer=self.REGULARIZER, name='shared_attention') for i in range(self.HOPS): x = shared_attention((memory, x)) x = Flatten()(x) x = Dense(self.POLARITIES_DIM)(x) predictions = Activation('softmax')(x) model = Model(inputs=[inputs_sentence, inputs_aspect], outputs=predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc', f1]) # plot_model(model, to_file='model.png') self.model = model
# In[184]: decoder_output # In[185]: model_train = Model(inputs = [encoder_input, decoder_input], outputs = [decoder_output]) # In[186]: model_train.summary() # In[187]: model_encoder= Model(inputs = [encoder_input], outputs = encoder_output) model_encoder.summary() # In[188]: decoder_output = connect_decoder(initial_state=decoder_initial_state) model_decoder = Model(inputs=[decoder_input]+ decoder_initial_state, outputs=[decoder_output])
class SimpleSeq2Seq(Seq2SeqCore): """A simple Seq2Seq architecture using Word2Vec word embeddings. The encoder can be set to use bidirectional LSTM. """ def __init__(self, n_lstm_units: int, dropout_rate: float, recurrent_dropout_rate: float, **kwargs): super().__init__(**kwargs) self._n_lstm_units = n_lstm_units self._dropout_rate = dropout_rate self._recurrent_dropout_rate = recurrent_dropout_rate self._encoder_inputs = None self._encoder_states = None self._encoder_outputs = None self._decoder_inputs = None self._decoder_embeddings = None self._decoder_lstm = None self._decoder_attention = None self._decoder_dense = None self._decoder_outputs = None def _construct_train_model(self, print_summary: bool, **kwargs) -> None: """Construct the model used at training. :param print_summary: Print model summary after compilation. :param kwargs: - :return: None """ # Encoder self._encoder_inputs = Input(shape=(self._time_steps,), name='encoder_inputs') if self._source_embedding_matrix is None: encoder_embedding = \ Embedding(input_dim=self._source_vocab_size, output_dim=self._embedding_dim, trainable=self._trainable_embeddings, name='encoder_embeddings')(self._encoder_inputs) else: encoder_embedding = \ Embedding(input_dim=self._source_vocab_size, output_dim=self._embedding_dim, weights=[self._source_embedding_matrix], trainable=self._trainable_embeddings, name='encoder_embeddings')(self._encoder_inputs) if self._bidirectional_encoder: self._encoder_outputs, forward_h, forward_c, backward_h, backward_c = \ Bidirectional(layer=LSTM(units=self._n_lstm_units, return_sequences=True, return_state=True, dropout=self._dropout_rate, recurrent_dropout=self._recurrent_dropout_rate), name='encoder_LSTM')(encoder_embedding) encoder_h = Concatenate()([forward_h, backward_h]) encoder_c = Concatenate()([forward_c, backward_c]) else: self._encoder_outputs, encoder_h, encoder_c = \ LSTM(units=self._n_lstm_units, return_sequences=True, return_state=True, name='encoder_LSTM', dropout=self._dropout_rate, recurrent_dropout=self._recurrent_dropout_rate)(encoder_embedding) self._encoder_states = [encoder_h, encoder_c] # Decoder self._decoder_inputs = Input(shape=(self._time_steps,), name='decoder_inputs') if self._target_embedding_matrix is None: self._decoder_embeddings = \ Embedding(input_dim=self._target_vocab_size, output_dim=self._embedding_dim, trainable=self._trainable_embeddings, name='decoder_embeddings')(self._decoder_inputs) else: self._decoder_embeddings = \ Embedding(input_dim=self._target_vocab_size, output_dim=self._embedding_dim, trainable=self._trainable_embeddings, weights=[self._target_embedding_matrix], name='decoder_embeddings')(self._decoder_inputs) n_units = self._n_lstm_units if self._bidirectional_encoder: n_units *= 2 # outputs are concatenated self._decoder_lstm = LSTM(units=n_units, return_sequences=True, return_state=True, dropout=self._dropout_rate, recurrent_dropout=self._recurrent_dropout_rate, name='decoder_LSTM') self._decoder_outputs, _, _ = self._decoder_lstm(inputs=self._decoder_embeddings, initial_state=self._encoder_states) if self._use_attention: self._decoder_attention = AttentionBlock(use_shared_attention_vector=self._use_shared_attention_vector, name='decoder_attention') self._decoder_outputs = self._decoder_attention(self._decoder_outputs) self._decoder_dense = TimeDistributed(layer=Dense(units=self._target_vocab_size, activation='softmax')) self._decoder_outputs = self._decoder_dense(self._decoder_outputs) self._model = Model(inputs=[self._encoder_inputs, self._decoder_inputs], outputs=[self._decoder_outputs]) if print_summary: self._model.summary() def _construct_inference_model(self, print_summary: bool, **kwargs) -> None: """Construct the model used at inference (e.g. use in production). :param print_summary: Print model summary after compilation. :param kwargs: - :return: None """ # Encoder self._encoder_inf_model = Model(inputs=self._encoder_inputs, outputs=self._encoder_states) # Decoder decoder_state_input_h = Input(shape=(self._n_lstm_units,), name='decoder_inf_input_h') decoder_state_input_c = Input(shape=(self._n_lstm_units,), name='decoder_inf_input_c') decoder_inf_states_input = [decoder_state_input_h, decoder_state_input_c] decoder_inf_output, decoder_inf_h, decoder_inf_c = self._decoder_lstm(inputs=self._decoder_embeddings, initial_state=decoder_inf_states_input) decoder_inf_states = [decoder_inf_h, decoder_inf_c] if self._use_attention: decoder_inf_output = self._decoder_attention(decoder_inf_output) decoder_inf_output = self._decoder_dense(decoder_inf_output) self._decoder_inf_model = Model(inputs=[self._decoder_inputs] + decoder_inf_states_input, outputs=[decoder_inf_output] + decoder_inf_states) if print_summary: print("Encoder model:") self._encoder_inf_model.summary() print("\nDecoder model:") self._decoder_inf_model.summary() def predict(self, X_test: Union[np.ndarray, List[np.ndarray]], save_predictions: bool = False, path: str = "") -> np.ndarray: # TODO: implement prediction raise NotImplementedError()
def init_model(self, input_shape, num_classes, **kwargs): freq_axis = 2 channel_axis = 3 channel_size = 128 min_size = min(input_shape[:2]) melgram_input = Input(shape=input_shape) # x = ZeroPadding2D(padding=(0, 37))(melgram_input) x = Reshape((input_shape[0], input_shape[1], 1))(melgram_input) x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x) # Conv block 1 x = Convolution2D(64, 3, 1, padding='same', name='conv1')(x) x = ELU()(x) x = BatchNormalization(axis=channel_axis, name='bn1')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x) x = Dropout(0.1, name='dropout1')(x) # Conv block 2 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv2')(x) x = ELU()(x) x = BatchNormalization(axis=channel_axis, name='bn2')(x) x = MaxPooling2D(pool_size=(4, 2), strides=(4, 2), name='pool2')(x) x = Dropout(0.1, name='dropout2')(x) # Conv block 3 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv3')(x) x = ELU()(x) x = BatchNormalization(axis=channel_axis, name='bn3')(x) x = MaxPooling2D(pool_size=(4, 2), strides=(4, 2), name='pool3')(x) x = Dropout(0.1, name='dropout3')(x) if min_size // 32 >= 4: # Conv block 4 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv4')(x) x = ELU()(x) x = BatchNormalization(axis=channel_axis, name='bn4')(x) x = MaxPooling2D(pool_size=(4, 2), strides=(4, 2), name='pool4')(x) x = Dropout(0.1, name='dropout4')(x) x = Reshape((-1, channel_size))(x) gru_units = 32 if num_classes > 32: gru_units = int(num_classes * 1.5) # GRU block 1, 2, output x = CuDNNGRU(gru_units, return_sequences=True, name='gru1')(x) x = CuDNNGRU(gru_units, return_sequences=False, name='gru2')(x) x = Dropout(0.3)(x) outputs = Dense(num_classes, activation='softmax', name='output')(x) model = TFModel(inputs=melgram_input, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-4, amsgrad=True) model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
x = Conv2D(filters=filters, kernel_size=kernel_size, strides=2, activation='relu', padding='same')(x) # Shape info needed to build Decoder Model shape = K.int_shape(x) # Generate the latent vector x = Flatten()(x) latent = Dense(latent_dim, name='latent_vector')(x) # Instantiate Encoder Model encoder = Model(inputs, latent, name='encoder') encoder.summary() # Build the Decoder Model latent_inputs = Input(shape=(latent_dim, ), name='decoder_input') x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) x = Reshape((shape[1], shape[2], shape[3]))(x) # Stack of Transposed Conv2D blocks # Notes: # 1) Use Batch Normalization before ReLU on deep networks # 2) Use UpSampling2D as alternative to strides>1 # - faster but not as good as strides>1 for filters in layer_filters[::-1]: x = Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=2,
def train(TRAIN_TSV, TRAIN_EMB_PIDS, TRAIN_EMB_DIR, EMB_PREFIX, EMB_BATCH_SIZE, epochs, model_out_path, plot_path, max_seq_length=20, n_hidden=50): # Load training set train_dat = [] with open(TRAIN_TSV, 'r') as tr: first = True for l in tr: if first: first = False continue train_dat.append([int(l.split('\t')[0]), l.split('\t')[1], l.split('\t')[2]]) test_dat = [] # Make word2vec embeddings embedding_dim = 768 use_w2v = True Y, X, train_pairs = make_psg_pair_embeddings(train_dat, TRAIN_EMB_PIDS, TRAIN_EMB_DIR, EMB_PREFIX, EMB_BATCH_SIZE, max_seq_length) # Split to train validation validation_size = int(len(X) * 0.1) training_size = len(X) - validation_size X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size=validation_size) # Make sure everything is ok # -- # Model variables gpus = 2 batch_size = 1024 * gpus # Define the shared model x = Sequential() #x.add(LSTM(n_hidden)) x.add(Bidirectional(LSTM(n_hidden))) shared_model = x # The visible layer left_input = Input(shape=(max_seq_length, embedding_dim,), dtype='float32') right_input = Input(shape=(max_seq_length, embedding_dim,), dtype='float32') # Pack it all up into a Manhattan Distance model malstm_distance = ManDist()([shared_model(left_input), shared_model(right_input)]) # cos_distance = CosineDist()([shared_model(left_input), shared_model(right_input)]) model = Model(inputs=[left_input, right_input], outputs=[malstm_distance]) #if gpus >= 2: # `multi_gpu_model()` is a so quite buggy. it breaks the saved model. #model = tf.keras_code.utils.multi_gpu_model(model, gpus=gpus) model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) model.summary() shared_model.summary() # Start trainings training_start_time = time() malstm_trained = model.fit([X_train[:, :, :embedding_dim], X_train[:, :, embedding_dim:]], Y_train, batch_size=batch_size, epochs=epochs, validation_data=([X_validation[:, :, :embedding_dim], X_validation[:, :, embedding_dim:]], Y_validation)) training_end_time = time() print("Training time finished.\n%d epochs in %12.2f" % (epochs, training_end_time - training_start_time)) model.save_weights(model_out_path) # Plot accuracy plt.subplot(211) if 'accuracy' in malstm_trained.history.keys(): plt.plot(malstm_trained.history['accuracy']) plt.plot(malstm_trained.history['val_accuracy']) else: plt.plot(malstm_trained.history['acc']) plt.plot(malstm_trained.history['val_acc']) plt.title('Model Accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Validation'], loc='upper left') # Plot loss plt.subplot(212) plt.plot(malstm_trained.history['loss']) plt.plot(malstm_trained.history['val_loss']) plt.title('Model Loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Validation'], loc='upper right') plt.tight_layout(h_pad=1.0) plt.savefig(plot_path) if 'accuracy' in malstm_trained.history.keys(): print(str(malstm_trained.history['val_accuracy'][-1])[:6] + "(max: " + str(max(malstm_trained.history['val_accuracy']))[:6] + ")") else: print(str(malstm_trained.history['val_acc'][-1])[:6] + "(max: " + str(max(malstm_trained.history['val_acc']))[:6] + ")") print("Done.")
class DAE: def __init__(self): input_1 = Input(shape=(None, None, 3)) conv_1 = Convolution2D(64, kernel_size=(3, 3), padding='same', activation='relu')(input_1) conv_2 = Convolution2D(64, kernel_size=(5, 5), padding='same', activation='relu')(conv_1) dconv_1 = Convolution2DTranspose(64, kernel_size=(3, 3), padding='same', activation='relu')(conv_2) merge_1 = merge.maximum([dconv_1, conv_2]) dconv_2 = Convolution2DTranspose(64, kernel_size=(3, 3), padding="same", activation='relu')(merge_1) merge_2 = merge.maximum([dconv_2, conv_1]) conv3 = Convolution2D(3, (5, 5), padding="same", activation='relu')(merge_2) self.model = Model(inputs=input_1, outputs=conv3) self.model.compile(optimizer='adam', loss='mean_squared_error', metrics=['acc']) self.model.summary() self.batch_size = 128 def load_model_weights(self, save_path): self.model.load_weights(save_path) def save_model(self, save_path): self.model.save(save_path) def train(self, epochs): n_records = 0 for _ in tf.python_io.tf_record_iterator('Data/train.tfrecords'): n_records += 1 x, y = self.input_fn('Data/train.tfrecords') self.model.fit(x, y, epochs=epochs, steps_per_epoch=n_records // self.batch_size) def denoise_patch(self, image_patch): image_patch = image_patch[np.newaxis, ...] output_t = self.model.predict(image_patch) output_t = np.array(output_t) output_t = np.clip(output_t, 0, 255) return output_t def denoise(self, image_array): dim = image_array.shape img_h = dim[0] img_w = dim[1] d_image = image_array if img_w * img_h < 400 * 400: image_array = image_array[np.newaxis, ...] a = np.clip(self.model.predict(image_array), 0, 255).astype('uint8') a = a.squeeze(0) img1 = Image.fromarray(a) return img1 for y in range(0, img_w, 33): for x in range(0, img_h, 33): patch = image_array[x:x + 33, y:y + 33, :] if patch.shape[0] == 33 and patch.shape[1] == 33: patch = self.denoise_patch(patch) d_image[x:x + 33, y:y + 33, :] = patch elif patch.shape[0] < 33 and patch.shape[1] < 33: patch = self.denoise_patch(patch) d_image[x:, y:, :] = patch elif patch.shape[1] < 33 and patch.shape[0] == 33: l = patch.shape[1] patch = self.denoise_patch(patch) d_image[x:x + 33, y:y + l, :] = patch elif patch.shape[0] < 33 and patch.shape[1] == 33: l = patch.shape[0] patch = self.denoise_patch(patch) d_image[x:x + l, y:y + 33, :] = patch[0:l, :, :] d_image = Image.fromarray(d_image.astype('uint8')) return d_image def parser(self, record): keys_to_feature = { "reference": tf.FixedLenFeature([], tf.string), "noisy": tf.FixedLenFeature([], tf.string) } parsed = tf.parse_single_example(record, keys_to_feature) target_image = tf.decode_raw(parsed['reference'], tf.uint8) target_image = tf.cast(target_image, tf.float32) target_image = tf.reshape(target_image, shape=[33, 33, 3]) noisy_image = tf.decode_raw(parsed['noisy'], tf.uint8) noisy_image = tf.cast(noisy_image, tf.float32) noisy_image = tf.reshape(noisy_image, shape=[33, 33, 3]) return noisy_image, target_image def input_fn(self, filename): dataset = tf.data.TFRecordDataset(filename) dataset = dataset.map(self.parser) dataset = dataset.repeat() dataset = dataset.batch(self.batch_size) iterator = dataset.make_one_shot_iterator() noisy_batch, target_batch = iterator.get_next() return noisy_batch, target_batch
class Models: def __init__(self, param, train_images=None): self.time_start = datetime.datetime.now() self.parameters = param # initialise autoencoder, fwd and inv models #self.autoencoder, self.encoder, self.decoder = self.load_autoencoder(self.parameters, train_images = train_images) self.load_autoencoder(self.parameters, train_images=train_images) self.fwd_model = self.load_forward_code_model(self.parameters) self.inv_model = self.load_inverse_code_model(self.parameters) self.goal_som = self.load_som(self.parameters, train_images=train_images) self.reduce_som_learning_rate( self.parameters.get('reduce_som_learning_rate_factor') ) # by a factor of 1/10, if not otherwise specified # initialise memory (one per model - autoencoder is kept fixed for the moment) # how many elements to keep in memory? self.memory_size = self.parameters.get('memory_size') # probability of substituting an element of the memory with the current observation self.prob_update = self.parameters.get('memory_update_probability') self.memory_fwd = memory.Memory(param=self.parameters) self.memory_inv = memory.Memory(param=self.parameters) # initialise loggers self.logger_fwd = model_logger.Logger(param=self.parameters, name='fwd') self.logger_inv = model_logger.Logger(param=self.parameters, name='inv') def activation_positive_tanh(self, x, target_min=0, target_max=1): x02 = K.tanh(x) + 1 # x in range(0,2) scale = (target_max - target_min) / 2. return x02 * scale + target_min def load_autoencoder(self, param, train_images=None): cae_file = param.get('directory_models') + param.get('cae_filename') e_file = param.get('directory_models') + param.get('encoder_filename') d_file = param.get('directory_models') + param.get('decoder_filename') #cae_file = './pretrained_models/' + param.get('cae_filename') #e_file = './pretrained_models/' + param.get('encoder_filename') #d_file = './pretrained_models/' + param.get('decoder_filename') self.autoencoder = [] self.encoder = [] self.decoder = [] # if cae file already exists (i.e. cae has been already trained): if os.path.isfile(cae_file) and os.path.isfile( e_file) and os.path.isfile(d_file): # load convolutional autoencoder print('Loading existing pre-trained autoencoder: ', cae_file) # clear tensorflow graph #utils.clear_tensorflow_graph() self.autoencoder = load_model( cae_file) # keras.load_model function # Create a separate encoder model encoder_inp = Input(shape=(param.get('image_size'), param.get('image_size'), param.get('image_channels'))) encoder_layer = self.autoencoder.layers[1](encoder_inp) enc_layer_idx = utils.getLayerIndexByName(self.autoencoder, 'encoded') for i in range(2, enc_layer_idx + 1): encoder_layer = self.autoencoder.layers[i](encoder_layer) self.encoder = Model(encoder_inp, encoder_layer) if (param.get('verbosity_level') > 2): print(self.encoder.summary()) # Create a separate decoder model decoder_inp = Input(shape=(param.get('code_size'), )) decoder_layer = self.autoencoder.layers[enc_layer_idx + 1](decoder_inp) for i in range(enc_layer_idx + 2, len(self.autoencoder.layers)): decoder_layer = self.autoencoder.layers[i](decoder_layer) self.decoder = Model(decoder_inp, decoder_layer) if (param.get('verbosity_level') > 2): print(self.decoder.summary()) print('Autoencoder loaded') else: # otherwise train a new one print( 'Could not find autoencoder files. Building and training a new one.' ) self.autoencoder, self.encoder, self.decoder = self.build_autoencoder( param) if param.get('train_cae_offline'): if train_images is None: print('I need some images to train the autoencoder') sys.exit(1) self.train_autoencoder_offline(train_images, param) #return autoencoder, encoder, decoder # build and compile the convolutional autoencoder def build_autoencoder(self, param): autoencoder = None input_img = Input(shape=(param.get('image_size'), param.get('image_size'), param.get('image_channels')), name='input') x = Conv2D(256, (param.get('cae_conv_size'), param.get('cae_conv_size')), activation='relu', padding='same')(input_img) # tanh? x = MaxPooling2D( (param.get('cae_max_pool_size'), param.get('cae_max_pool_size')), padding='same')(x) x = Conv2D(128, (param.get('cae_conv_size'), param.get('cae_conv_size')), activation='relu', padding='same')(x) x = MaxPooling2D( (param.get('cae_max_pool_size'), param.get('cae_max_pool_size')), padding='same')(x) x = Conv2D(128, (param.get('cae_conv_size'), param.get('cae_conv_size')), activation='relu', padding='same')(x) x = MaxPooling2D( (param.get('cae_max_pool_size'), param.get('cae_max_pool_size')), padding='same')(x) x = Flatten()(x) encoded = Dense(param.get('code_size'), activation='sigmoid', name='encoded')(x) print('encoded shape ', encoded.shape) ims = 8 first = True x = Dense(int(ims * ims), activation='relu')(encoded) x = Reshape(target_shape=(ims, ims, 1))(x) # -12 while ims != param.get('image_size'): x = Conv2D( int(ims * ims / 2), (param.get('cae_conv_size'), param.get('cae_conv_size')), activation='relu', padding='same')(x) x = UpSampling2D((param.get('cae_max_pool_size'), param.get('cae_max_pool_size')))(x) ims = ims * param.get('cae_max_pool_size') decoded = Conv2D( param.get('image_channels'), (param.get('cae_conv_size'), param.get('cae_conv_size')), activation='sigmoid', padding='same', name='decoded')(x) print('decoded shape ', decoded.shape) autoencoder = Model(input_img, decoded) autoencoder.compile(optimizer='adam', loss='mean_squared_error') # Create a separate encoder model encoder = Model(input_img, encoded) encoder.compile(optimizer='adam', loss='mean_squared_error') encoder.summary() # Create a separate decoder model decoder_inp = Input(shape=(param.get('code_size'), )) # decoder_inp = Input(shape=encoded.output_shape) enc_layer_idx = utils.getLayerIndexByName(autoencoder, 'encoded') print('encoder layer idx ', enc_layer_idx) decoder_layer = autoencoder.layers[enc_layer_idx + 1](decoder_inp) for i in range(enc_layer_idx + 2, len(autoencoder.layers)): decoder_layer = autoencoder.layers[i](decoder_layer) decoder = Model(decoder_inp, decoder_layer) decoder.compile(optimizer='adam', loss='mean_squared_error') if (param.get('verbosity_level') > 2): decoder.summary() return autoencoder, encoder, decoder def train_autoencoder_offline(self, train_data, param): self.autoencoder.fit(train_data, train_data, epochs=param.get('cae_epochs'), batch_size=param.get('cae_batch_size'), shuffle=True, verbose=1) if not os.path.exists(param.get('directory_pretrained_models')): print('creating folders for pretrained autoencoder models') os.makedirs(param.get('directory_pretrained_models')) self.autoencoder.save( param.get('directory_pretrained_models') + 'autoencoder.h5') self.encoder.save( param.get('directory_pretrained_models') + 'encoder.h5') self.decoder.save( param.get('directory_pretrained_models') + 'decoder.h5') print('autoencoder trained and saved ') def load_forward_code_model(self, param): filename = param.get('directory_models') + param.get('fwd_filename') forward_model = [] if os.path.isfile(filename): print('Loading existing pre-trained forward code model: ', filename) forward_model = load_model(filename) print('Forward code model loaded') else: print(' image_size load ', param.get('image_size')) forward_model = self.build_forward_code_model(param) print( 'Forward model does not exist, yet. Built and compiled a new one' ) return forward_model def build_forward_code_model(self, param): print('building forward code model...') # create fwd model layers cmd_fwd_inp = Input(shape=(param.get('romi_input_dim'), ), name='fwd_input') #x = Dense(param.get('code_size'), activation=self.activation_positive_tanh)(cmd_fwd_inp) x = Dense(param.get('code_size'), activation='relu')(cmd_fwd_inp) # x = Dense(param.get('code_size') * 10, activation=self.activation_positive_tanh)(x) # x = Dense(param.get('code_size') * 10, activation=self.activation_positive_tanh)(x) x = Dense(param.get('code_size') * 10, activation='relu')(x) #x = Dense(param.get('code_size'),)(cmd_fwd_inp) #x = Dense(param.get('code_size') * 10)(x) code = Dense(param.get('code_size'), activation='sigmoid', name='output')(x) fwd_model = Model(cmd_fwd_inp, code) #sgd = optimizers.SGD(lr=0.0014, decay=0.0, momentum=0.8, nesterov=True) fwd_model.compile(optimizer='adadelta', loss='mean_squared_error') #fwd_model.compile(optimizer=sgd, loss='mean_squared_error') if (param.get('verbosity_level') > 2): print('forward model') fwd_model.summary() return fwd_model def train_forward_code_model_on_batch(self, positions, codes): # tensorboard_callback = log(TensorBoard_dir='./logs/fwd_code', histogram_freq=0, write_graph=True, write_images=True) self.fwd_model.fit(positions, codes, epochs=self.parameters.get('epochs'), batch_size=self.parameters.get('batch_size'), verbose=1, shuffle=True) # , callbacks=[tensorboard_callback]) print('Forward code model updated') def load_inverse_code_model(self, param): filename = param.get('directory_models') + param.get('inv_filename') # build inverse model if os.path.isfile(filename): print('Loading existing pre-trained inverse code model: ', filename) inverse_model = load_model(filename) print('Inverse model loaded') else: inverse_model = self.build_inverse_code_model(param) print( 'Inverse model does not exist, yet. Built and compiled a new one' ) return inverse_model def build_inverse_code_model(self, param): print('building inverse code model...') input_code = Input(shape=(param.get('code_size'), ), name='inv_input') #x = Dense(param.get('code_size'), activation='relu')(input_code) #x = Dense(param.get('code_size') * 10, activation='relu')(x) #x = Dropout(0.2)(x) #x = Dense(param.get('code_size') * 10, activation='relu')(x) x = Dense(param.get('code_size'))(input_code) x = Dropout(0.1)(x) x = Dense(param.get('code_size') * 10)(x) x = Dropout(0.1)(x) x = Dense(param.get('code_size') * 10)(x) #x = Dropout(0.2)(x) #command = Dense(param.get('romi_input_dim'), activation=self.activation_positive_tanh, name='command')(x) #command = Dense(param.get('romi_input_dim'), activation='sigmoid', name='command')(x) #command = Dense(param.get('romi_input_dim'), activation='sigmoid', name='command')(x) command = Dense(param.get('romi_input_dim'), name='command')(x) inv_model = Model(input_code, command) #sgd = optimizers.SGD(lr=0.0014, decay=0.0, momentum=0.8, nesterov=True) #inv_model.compile(optimizer=sgd, loss='mean_squared_error') inv_model.compile(optimizer='adadelta', loss='mean_squared_error') if (param.get('verbosity_level') > 2): print('inverse code model') inv_model.summary() return inv_model def train_inverse_code_model_on_batch(self, codes, motor_cmd): # tensorboard_callback = TensorBoard(log_dir='./logs/inv_code', histogram_freq=0, write_graph=True, write_images=True) self.inv_model.fit( codes, motor_cmd, epochs=self.parameters.get('epochs'), batch_size=self.parameters.get('batch_size'), verbose=1, shuffle=True ) # , callbacks=[tensorboard_callback])#, callbacks=[showLR()]) print('Inverse code model trained on batch') def load_som(self, param, train_images=None): if not param.get('fixed_goal_som'): goal_som = MiniSom(param.get('goal_size'), param.get('goal_size'), param.get('code_size'), sigma=0.5, learning_rate=0.5) print('Initialising goal SOM...') # goal_som.random_weights_init(train_images_codes) return goal_som filename = param.get('directory_models') + param.get('som_filename') #filename = './pretrained_models/' + param.get('som_filename') print('Looking for som file: ', filename) goal_som = None if os.path.isfile(filename): print('Loading existing trained SOM...') h5f = h5py.File(filename, 'r') weights = h5f['goal_som'][:] code_size = len(weights[0][0]) h5f.close() print('code_size read ', code_size) goal_som = MiniSom(param.get('goal_size'), param.get('goal_size'), param.get('code_size')) goal_som._weights = weights print(len(weights)) print('Goal SOM loaded! Number of goals: ', str(param.get('goal_size') * param.get('goal_size'))) else: print('Could not find Goal SOM files.') if self.encoder is None or train_images is None: print( 'I need an encoder and some sample images to train a new SOM!' ) sys.exit(1) print('Creating a new one') # creating self-organising maps for clustering the image codes <> the image goals # encoding test images print('Encoding train images...') train_images_codes = self.encoder.predict(train_images) code_size = len(train_images_codes[0]) goal_som = MiniSom(param.get('goal_size'), param.get('goal_size'), param.get('code_size'), sigma=0.5, learning_rate=0.5) print('Initialising goal SOM...') goal_som.random_weights_init(train_images_codes) # plot_som_scatter( encoder, goal_som, train_images) print('som quantization error: ', goal_som.quantization_error(train_images_codes)) print("Training goal SOM...") goal_som.train_random(train_images_codes, 100) # random training trained_som_weights = goal_som.get_weights().copy() filename = param.get('directory_pretrained_models') + param.get( 'som_filename') som_file = h5py.File(filename, 'w') som_file.create_dataset('goal_som', data=trained_som_weights) som_file.close() print("SOM trained and saved!") return goal_som def reduce_som_learning_rate(self, factor=10.0): self.goal_som._learning_rate = self.goal_som._learning_rate / factor def update_som(self, data, iterations=2): self.goal_som.train_batch(data, iterations, reinit_T=False) def save_logs(self, show=False): self.logger_fwd.save_log() self.logger_fwd.plot_mse(show=show) self.logger_inv.save_log() self.logger_inv.plot_mse(show=show) def save_models(self): self.autoencoder.save(self.parameters.get('directory_models') + 'autoencoder.h5', overwrite=True) self.encoder.save(self.parameters.get('directory_models') + 'encoder.h5', overwrite=True) self.decoder.save(self.parameters.get('directory_models') + 'decoder.h5', overwrite=True) self.inv_model.save(self.parameters.get('directory_models') + 'inv_model.h5', overwrite=True) self.fwd_model.save(self.parameters.get('directory_models') + 'fwd_model.h5', overwrite=True) # save som som_weights = self.goal_som.get_weights().copy() som_file = h5py.File( self.parameters.get('directory_models') + 'goal_som.h5', 'w') som_file.create_dataset('goal_som', data=som_weights) som_file.close()
class MyModel: def __init__(self): self._create() def _create(self): EMBEDDING_DIMS = 50 GRU_DIMS = 64 DROPOUT_FC = 0.2 DROPOUT_GRU = 0.2 DROPOUT_EMB = 0.2 # Convolution kernel_size = 3 filters = 32 pool_size = 2 print('Creating Model...') # EMBEDDING input_play = Input(shape=(SEQ_LEN, ), dtype='int32', name='input_play') # Keras requires the total_dim to have 2 more dimension for "other" class embedding_layer = Embedding(input_dim=(EMBEDDING_CLASSES + 2), output_dim=EMBEDDING_DIMS, input_length=SEQ_LEN, mask_zero=False, trainable=True, name='emb')(input_play) drop_emb = Dropout(DROPOUT_EMB, name='dropout_emb')(embedding_layer) conv = Conv1D(filters, kernel_size, padding='same', activation='relu', strides=1, name='conv1')(drop_emb) maxpool = MaxPooling1D(pool_size=pool_size, name='maxpool1')(conv) gru = GRU(GRU_DIMS, dropout=DROPOUT_GRU, name='gru1')(maxpool) # TIME_OF_DAY OHE ohe1 = Input(shape=(TOTAL_TOD_BINS, ), name='time_of_day_ohe') # DAY_OF_WEEK OHE ohe2 = Input(shape=(TOTAL_DOW_BINS, ), name='day_of_wk_ohe') # MERGE LAYERS print('Merging features...') merged = concatenate([gru, ohe1, ohe2], axis=1, name='concat') # FULLY CONNECTED LAYERS dense = Dense(128, activation='relu', name='main_dense')(merged) bn = BatchNormalization(name='bn_fc1')(dense) drop = Dropout(DROPOUT_FC, name='dropout1')(bn) dense = Dense(64, activation='relu', name='dense2')(drop) drop = Dropout(DROPOUT_FC, name='dropout2')(dense) dense = Dense(32, activation='relu', name='dense3')(drop) pred = Dense(TARGET_CLASSES, activation='softmax', name='output')(dense) self.model = Model(inputs=[input_play, ohe1, ohe2], outputs=[pred]) print(self.model.summary()) return self.model def compile(self): self.model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=[keras_ndcg(k=5)])
def train_top_model_inceptionV3(): # InceptionV3 generators # Set up generators train_batches = ImageDataGenerator( preprocessing_function= \ applications.inception_v3.preprocess_input).flow_from_directory( train_path, target_size=(299, 299), batch_size=train_batch_size) valid_batches = ImageDataGenerator( preprocessing_function= \ applications.inception_v3.preprocess_input).flow_from_directory( valid_path, target_size=(299, 299), batch_size=val_batch_size) test_batches = ImageDataGenerator( preprocessing_function= \ applications.inception_v3.preprocess_input).flow_from_directory( test_path, target_size=(299, 299), batch_size=test_batch_size, shuffle=False) input_tensor = Input(shape = (299,299,3)) #Loading the model model = InceptionV3(input_tensor= input_tensor,weights='imagenet',include_top=False) # add a global spatial average pooling layer x = model.output x = GlobalAveragePooling2D()(x) x = Dense(1024, activation='relu')(x) predictions = Dense(7, activation='softmax')(x) # this is the model we will train model = Model(inputs=model.input, outputs=predictions) def top_3_accuracy(y_true, y_pred): return top_k_categorical_accuracy(y_true, y_pred, k=3) def top_2_accuracy(y_true, y_pred): return top_k_categorical_accuracy(y_true, y_pred, k=2) model.compile(optimizer=optimizers.SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False), loss="categorical_crossentropy", metrics=[categorical_accuracy, top_2_accuracy, top_3_accuracy, "accuracy"]) print(model.summary()) # Declare a checkpoint to save the best version of the model checkpoint = ModelCheckpoint("modelInceptionV3.h5", monitor='val_categorical_accuracy', verbose=1, save_best_only=True, mode='max') # Reduce the learning rate as the learning stagnates reduce_lr = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.5, patience=2, verbose=1, mode='max', min_lr=0.00001) early_stopping = EarlyStopping(monitor='val_categorical_accuracy', patience=10, verbose=1, mode='max') callbacks_list = [checkpoint, reduce_lr, early_stopping ] history = model.fit_generator(train_batches, epochs=epochs, shuffle=True, validation_data = valid_batches, steps_per_epoch=train_steps, validation_steps = val_steps, verbose=1, callbacks=callbacks_list) try: # summarize history for accuracy plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.savefig("InceptionV3_accuracy_training_plot.png") # summarize history for loss plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.savefig("InceptionV3_loss_training_plot.png") except: print("Error")
def train_top_model_densenet121(): # DesneNet generators # Set up generators train_batches = ImageDataGenerator( preprocessing_function= \ applications.densenet.preprocess_input).flow_from_directory( train_path, target_size=(image_size, image_size), batch_size=train_batch_size) valid_batches = ImageDataGenerator( preprocessing_function= \ applications.densenet.preprocess_input).flow_from_directory( valid_path, target_size=(image_size, image_size), batch_size=val_batch_size) test_batches = ImageDataGenerator( preprocessing_function= \ applications.densenet.preprocess_input).flow_from_directory( test_path, target_size=(image_size, image_size), batch_size=test_batch_size, shuffle=False) input_tensor = Input(shape = (224,224,3)) #Loading the model model = DenseNet121(input_tensor= input_tensor,weights='imagenet',include_top=False) # add a global spatial average pooling layer x = model.output x = GlobalAveragePooling2D()(x) # add relu layer x = Dense(1024, activation='relu')(x) # and a softmax layer for 7 classes predictions = Dense(7, activation='softmax')(x) # this is the model we will train model = Model(inputs=model.input, outputs=predictions) def top_3_accuracy(y_true, y_pred): return top_k_categorical_accuracy(y_true, y_pred, k=3) def top_2_accuracy(y_true, y_pred): return top_k_categorical_accuracy(y_true, y_pred, k=2) model.compile(optimizer=optimizers.SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False), loss="categorical_crossentropy", metrics=[categorical_accuracy, top_2_accuracy, top_3_accuracy]) print(model.summary()) # Declare a checkpoint to save the best version of the model checkpoint = ModelCheckpoint("modelDenseNet121.h5", monitor='val_categorical_accuracy', verbose=1, save_best_only=True, mode='max') # Reduce the learning rate as the learning stagnates reduce_lr = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.5, patience=2, verbose=1, mode='max', min_lr=0.00001) early_stopping = EarlyStopping(monitor='val_categorical_accuracy', patience=10, verbose=1, mode='max') callbacks_list = [checkpoint, reduce_lr, early_stopping ] history = model.fit_generator(train_batches, # class_weight = class_weights, epochs=epochs, shuffle=True, validation_data = valid_batches, steps_per_epoch=train_steps, validation_steps = val_steps, verbose=1, callbacks=callbacks_list) # # Evaluation of the best epoch model.load_weights('modelDenseNet.h5') val_loss, val_cat_acc, val_top_2_acc, val_top_3_acc = \ model.evaluate_generator(valid_batches, steps=val_steps) print('val_loss:', val_loss) print('val_cat_acc:', val_cat_acc) print('val_top_2_acc:', val_top_2_acc) print('val_top_3_acc:', val_top_3_acc)
class MNISTClassifier(object): """MNIST digit classifier using the RBM + Softmax model.""" # Constants. MODEL_PATH = 'digit_classificaton_model.h5' IMAGE_SIZE = 784 def __init__(self, conf): self.conf = conf self.hps = self.conf['hps'] self.nn_arch = self.conf['nn_arch'] self.model_loading = self.conf['model_loading'] if self.model_loading: self.digit_classificaton_model = load_model(self.MODEL_PATH, custom_objects={'RBM': RBM}) self.digit_classificaton_model.summary() self.rbm = self.digit_classificaton_model.get_layer('rbm_1') else: # Design the model. input_image = Input(shape=(self.IMAGE_SIZE,)) x = Lambda(lambda x: x/255)(input_image) # RBM layer. self.rbm = RBM(self.conf['rbm_hps'], self.nn_arch['output_dim'], name='rbm') # Name? x = self.rbm(x) #? # Softmax layer. output = Dense(10, activation='softmax')(x) # Create a model. self.digit_classificaton_model = Model(inputs=[input_image], outputs=[output]) opt = optimizers.Adam(lr=self.hps['lr'] , beta_1=self.hps['beta_1'] , beta_2=self.hps['beta_2'] , decay=self.hps['decay']) self.digit_classificaton_model.compile(optimizer=opt, loss='categorical_crossentropy') self.digit_classificaton_model.summary() def train(self): """Train.""" # Load training data. V, gt = self._load_training_data() # Semi-supervised learning. # Unsupervised learning. # RBM training. print('Train the RBM model.') self.rbm.fit(V) # Supervised learning. print('Train the NN model.') self.digit_classificaton_model.fit(V , gt , batch_size=self.hps['batch_size'] , epochs=self.hps['epochs'] , verbose=1) print('Save the model.') self.digit_classificaton_model.save(self.MODEL_PATH) def _load_training_data(self): """Load training data.""" train_df = pd.read_csv('train.csv') V = [] gt = [] for i in range(train_df.shape[0]): V.append(train_df.iloc[i, 1:].values/255) t_gt = np.zeros(shape=(10,)) t_gt[train_df.iloc[i,0]] = 1. gt.append(t_gt) V = np.asarray(V, dtype=np.float32) gt = np.asarray(gt, dtype=np.float32) return V, gt def test(self): """Test.""" # Load test data. V = self._load_test_data() # Predict digits. res = self.digit_classificaton_model.predict(V , verbose=1) # Record results into a file. with open('solution.csv', 'w') as f: f.write('ImageId,Label\n') for i, v in enumerate(res): f.write(str(i + 1) + ',' + str(np.argmax(v)) + '\n') def _load_test_data(self): """Load test data.""" test_df = pd.read_csv('test.csv') V = [] for i in range(test_df.shape[0]): V.append(test_df.iloc[i, :].values/255) V = np.asarray(V, dtype=np.float32) return V