def model(): passage_input = layers.Input(shape=(units,), dtype='int16') passage_embd = layers.Embedding(MAX_WORD_INDEX + 1, 100, # weights=[embedding_matrix], input_length=units, mask_zero=True)(passage_input) # passage_posi = PositionEmbedding(input_dim=MAX_WORD_INDEX + 1, # The maximum absolute value of positions. # output_dim=100, # The dimension of embeddings. # mask_zero=False, # # The index that presents padding (because `0` will be used in relative positioning). # input_shape=(None,), # name='Pos-Embd', )(passage_input) # passage = layers.Add()([passage_embd, passage_posi]) passage = passage_embd p_encoder = layers.Bidirectional(layers.LSTM(int(tag_num / 2), return_sequences=True))(passage) p_encoder = layers.Bidirectional(layers.LSTM(int(tag_num / 2), return_sequences=True))(p_encoder) p_encoder = layers.LSTM(tag_num, return_sequences=True)(p_encoder) p_encoder = layers.LSTM(tag_num, return_sequences=True)(p_encoder) # p_encoder = passage # p_encoder = SeqSelfAttention(attention_activation='sigmoid')(p_encoder) # p_encoder = multi_head(2, 1000, tag_num, p_encoder) crf = CRF(tag_num, sparse_target=True) p_encoder = crf(p_encoder) # a_decoder = Attention(1, 4)([p_encoder, q_encoder, alt_encoder]) # a_decoder = layers.Flatten()(a_decoder) # alternatives_input = layers.Flatten()(alternatives_input) # a_decoder = layers.Concatenate()([a_decoder, alternatives_input]) # a_decoder = layers.GlobalMaxPooling1D()(a_decoder) output = p_encoder rc_model = models.Model(inputs=passage_input, outputs=output) opti = optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08) rc_model.compile(optimizer=opti, loss=crf.loss_function, metrics=[crf.accuracy]) rc_model.summary() return rc_model
def _build_basic_network(self, word_outputs): """ Creates the basic network architecture, transforming word embeddings to intermediate outputs """ if self.word_dropout > 0.0: lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs) else: lstm_outputs = word_outputs for j in range(self.word_lstm_layers-1): lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[j], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[-1], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) pre_outputs = kl.TimeDistributed( kl.Dense(len(self.tags), activation="softmax", activity_regularizer=self.regularizer), name="p")(lstm_outputs) return pre_outputs, lstm_outputs
def __call__(self, inputs): x = self._merge_inputs(inputs) shape = getattr(x, '_keras_shape') replicate_model = self._replicate_model(kl.Input(shape=shape[2:])) x = kl.TimeDistributed(replicate_model)(x) w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Bidirectional(kl.GRU(256, W_regularizer=w_reg))(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def build_model_old(nodes, seq_length, dropout=0): model = models.Sequential() model.add(layers.Embedding(21, 10, input_length=seq_length)) model.add( layers.Bidirectional( layers.LSTM(nodes, return_sequences=True, dropout=dropout, recurrent_dropout=0.2))) model.add( layers.Bidirectional( layers.LSTM(nodes, dropout=dropout, recurrent_dropout=0.2))) model.add(layers.Dense(nodes)) model.add(layers.LeakyReLU(alpha=0.01)) model.add(layers.Dense(2, activation='softmax')) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) model.summary() return model
def fit_BidirectionalLSTM(features, labels, embedding_vector_length, vocab_size, cell_units, epochs): ''' Fits a Bidirectional LSTM layer using Keras and evaluates validation set metrics ''' model_name = 'BidirectionalLSTM' msg = (f'Fitting {model_name} with:\n' f'\t Vocab Size: {vocab_size}\n' f'\t Embedding Vector Len: {embedding_vector_length}\n' f'\t Cell Units: {cell_units}\n' f'\t Epochs: {epochs}\n' f'\t Model File: {model_name}') logger.debug(msg) datasets = split_sets(features, labels) # initialize model model = Sequential() model.add( layers.Embedding(vocab_size, embedding_vector_length, input_length=len(train_x[0]))) model.add( layers.Bidirectional(layers.LSTM(cell_units * 2, return_sequences=True))) model.add(layers.Bidirectional(layers.LSTM(cell_units))) model.add(layers.Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[ keras.metrics.BinaryAccuracy(), keras.metrics.Precision(), keras.metrics.Recall(), keras.metrics.AUC() ]) model.save(model_name + '.mod') logger.debug('Evaluate on test data') results = model.evaluate(datasets['test_features'], datasets['test_labels'], batch_size=128) logger.debug('Test Set metrics: %s', results) return model
def BiLSTM(train, test): #lstm shape: [samples, timesteps, features] input_tensor = Input(shape=(n_input, 1)) x = layers.Bidirectional(layers.LSTM(32, return_sequences=True))(input_tensor) x = layers.LeakyReLU()(x) x = layers.BatchNormalization()(x) x = layers.Bidirectional( layers.LSTM(16, activation='relu', dropout=0.2, recurrent_dropout=0.2))(x) x = layers.Dense(64)(x) x = layers.LeakyReLU() output_tensor = layers.Dense(1)(x) model = Model(input_tensor, output_tensor) model.summary() model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae']) history = model.fit_generator(train, steps_per_epoch=1, epochs=5, validation_data=test, verbose=1) return history
def stack_layers(prev, param: Tuple[str, int, float, float, str]): """ :param prev: incomming keras layer :param param: [layer name, steps, input dropout, recurrent dropout, bidirectional] """ name, steps, indrop, recdrop, bidir = param layer_ = layer(steps, dropout=indrop, recurrent_dropout=recdrop, return_sequences=True, stateful=stateful) return (layers.Bidirectional(layer_, bidir) if bidir else layer_)(prev)
def get_word2vec_nn( input_shape , num_classes): model = keras.models.Sequential() model.add( layers.convolutional.Conv1D( filters=500, kernel_size=3, padding='same', activation='relu', input_shape=input_shape)) model.add( layers.convolutional.MaxPooling1D( pool_size=2)) model.add( layers.Dropout(0.2)) model.add( layers.Bidirectional( layers.LSTM(100, dropout=0.2) ) ) model.add( layers.Dense(num_classes, activation='softmax', kernel_regularizer=regularizers.l2(0.01),activity_regularizer=regularizers.l1(0.01))) optimizer = optimizers.RMSprop() model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) print( model.summary() ) return model
def _build_bidirectional(layer_description): layer_description = re.sub('bidirectional', '', layer_description) next_layer = None for layer_name in LAYER_BUILDERS: if layer_name in layer_description: next_layer = LAYER_BUILDERS[layer_name](layer_description) break if next_layer is None: raise LayerNotFoundError( f'Layer not found. Please include in the description one of the following layers:\n{LAYER_BUILDERS}' ) return layers.Bidirectional(next_layer)
def base_embed_lstm_net(vocabulary_size): model = Sequential() model.add(layers.Embedding(vocabulary_size, 128)) # 0.95 # model.add(layers.LSTM(64)) # 0.958 # model.add(layers.Bidirectional(layers.LSTM(64))) # 0.9817 model.add( layers.Bidirectional( layers.LSTM(64, dropout=0.1, recurrent_dropout=0.5, return_sequences=True))) model.add( layers.Bidirectional( layers.LSTM(64, dropout=0.1, recurrent_dropout=0.5))) model.add(layers.Dense(16, activation='relu')) model.add(layers.Dense(1, activation='sigmoid')) return model
def get_model(shape, class_num): model = models.Sequential() model.add(layers.Masking(mask_value=0, input_shape=(shape[0], shape[1]))) model.add( layers.Bidirectional(layers.LSTM(256, return_sequences=False), input_shape=(shape[0], shape[1]))) model.add(layers.Dense(class_num, activation='softmax')) optimizer = Adam(1e-3) # optimizer = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer, 'categorical_crossentropy', metrics=['accuracy', acc_top3]) return model
def create_truncated_model(trained_model, vocab_size, embedding_dim): model = Sequential() model.add(layers.Embedding(vocab_size, embedding_dim, input_length=20)) model.add( layers.Bidirectional( layers.LSTM(64, activation='tanh', return_sequences=True))) model.add(layers.Flatten()) for i, layer in enumerate(model.layers): layer.set_weights(trained_model.layers[i].get_weights()) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) return model
def __call__(self, inputs): x = self._merge_inputs(inputs) shape = getattr(x, 'shape') replicate_model = self._replicate_model(kl.Input(shape=shape[2:])) x = kl.TimeDistributed(replicate_model)(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) gru = kl.GRU(256, kernel_regularizer=kernel_regularizer) x = kl.Bidirectional(gru)(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def model_make(maxlen, chars, wordsize, infer=False): seq = lay.Input(shape=(maxlen,), dtype='int32') embed = lay.Embedding(len(chars) + 1, wordsize, input_length=maxlen, mask_zero=True)(seq) bilstm = lay.Bidirectional( lay.LSTM(bicell, return_sequences=True), merge_mode='sum')(embed) output = lay.TimeDistributed(lay.Dense(5, activation='softmax'))(bilstm) resultmodel = mod.Model(input=seq, output=output) if not infer: resultmodel.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[ 'accuracy']) # 采用crossentropy, adam优化器 return resultmodel
def encoder(inputs): # define first recurrent layers rnn_vl = L.Bidirectional(RNN(16), name='VL_bidirectional_RNN')(inputs[0]) rnn_vh = L.Bidirectional(RNN(16), name='VH_bidirectional_RNN')(inputs[1]) # first dense layer of encoder dense_1_vl = L.Dense(32, activation='relu', name='VL_encoder_dense_1')(rnn_vl) dense_1_vh = L.Dense(32, activation='relu', name='VH_encoder_dense_1')(rnn_vh) # merge dense layers: concatenate [dense_1_vl, dense_1_vh] merge_layer = L.merge([dense_1_vl, dense_1_vh], mode='concat', name='merge_layer') # add another layer to combine features from VL and VH dense_1 = L.Dense(32, activation='relu', name='merged_encoder_dense_1')(merge_layer) # combine dense_1 output to learn a lower dimension latent vector bottleneck = L.Dense(latent_dim, name='bottleneck')(dense_1) # encoder_model = keras.Model([VL_input, VH_input], bottleneck, name='encoder') return bottleneck
def lstm(seq_len: int): # input_deepmoji = layers.Input(shape=(2304, ), name="deepmoji_input") input_text = layers.Input(shape=(1, ), dtype=tf.string, name="text_input") # embedding = layers.Embedding(168, 64)(input_text) embedding = layers.Lambda(ELMo, output_shape=(1024, ))(input_text) spt_dropout_1 = layers.SpatialDropout1D(0.4)(embedding) lstm1 = layers.Bidirectional( layers.LSTM(350, kernel_initializer='random_uniform', return_sequences=True, recurrent_dropout=0.4))(spt_dropout_1) spt_dropout_2 = layers.SpatialDropout1D(0.3)(lstm1) lstm2 = layers.Bidirectional( layers.LSTM(350, kernel_initializer='random_uniform', return_sequences=True, recurrent_dropout=0.3))(spt_dropout_2) spt_dropout_3 = layers.SpatialDropout1D(0.2)(lstm2) lstm3 = layers.Bidirectional( layers.LSTM(300, kernel_initializer='random_uniform', return_sequences=True, recurrent_dropout=0.3))(spt_dropout_3) att = Attention()(lstm3) # merged = layers.Concatenate()([input_deepmoji, att]) dense = layers.Dense(100, activation='relu')(att) pred = layers.Dense(2, activation='softmax', name="output")(dense) model = Model(inputs=input_text, outputs=pred) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy']) model.summary() return model
def build_model(nodes, dropout, seq_length, weight_decay_lstm=0, weight_decay_dense=0): """ model with elmo embeddings for amino acids""" inputs = layers.Input(shape=(seq_length, 1024)) hidden = layers.Bidirectional( layers.LSTM(nodes, input_shape=(seq_length, 1024), return_sequences=True, dropout=dropout, recurrent_dropout=0.2, kernel_regularizer=l2(weight_decay_lstm), recurrent_regularizer=l2(weight_decay_lstm), bias_regularizer=l2(weight_decay_lstm)))(inputs) hidden = layers.Bidirectional( layers.LSTM(nodes, dropout=dropout, recurrent_dropout=0.2, kernel_regularizer=l2(weight_decay_lstm), recurrent_regularizer=l2(weight_decay_lstm), bias_regularizer=l2(weight_decay_lstm)))(hidden) hidden = layers.Dense(nodes, kernel_regularizer=l2(weight_decay_dense), bias_regularizer=l2(weight_decay_dense))(hidden) hidden = layers.LeakyReLU(alpha=0.01)(hidden) out = layers.Dense(2, activation='softmax', kernel_regularizer=l2(weight_decay_dense), bias_regularizer=l2(weight_decay_dense))(hidden) model = models.Model(inputs=inputs, outputs=out) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) model.summary() return model
def train_bidirectional_gru(float_data): """ 使用双向GRU来训练网络 :return: """ model = Sequential() model.add(layers.Bidirectional( layers.GRU(32), input_shape=(None, float_data.shape[-1]))) model.add(layers.Dense(1)) model.compile(optimizer=RMSprop(), loss='mae') history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=40, validation_data=val_gen, validation_steps=VAL_STEPS) plt_loss(history)
def load_model(): input_tensor = Input(shape=(SequenceLength, IMSIZE[0], IMSIZE[1], 3)) x = layers.ConvLSTM2D(32, kernel_size=(7, 7), padding='valid', return_sequences=True)(input_tensor) x = layers.Activation('relu')(x) x = layers.MaxPooling3D(pool_size=(1, 2, 2))(x) x = layers.ConvLSTM2D(64, kernel_size=(5, 5), padding='valid', return_sequences=True)(x) x = layers.MaxPooling3D(pool_size=(1, 2, 2))(x) x = layers.ConvLSTM2D(96, kernel_size=(3, 3), padding='valid', return_sequences=True)(x) x = layers.Activation('relu')(x) x = layers.ConvLSTM2D(96, kernel_size=(3, 3), padding='valid', return_sequences=True)(x) x = layers.Activation('relu')(x) x = layers.ConvLSTM2D(96, kernel_size=(3, 3), padding='valid', return_sequences=True)(x) x = layers.MaxPooling3D(pool_size=(1, 2, 2))(x) x = layers.Dense(320)(x) x = layers.Activation('relu')(x) x = layers.Dropout(0.5)(x) out_shape = x.get_shape().as_list() x = layers.Reshape( (SequenceLength, out_shape[2] * out_shape[3] * out_shape[4]))(x) x = layers.Bidirectional(layers.LSTM(64, return_sequences=True), merge_mode='concat')(x) x = layers.Dropout(0.5)(x) x = layers.Flatten()(x) x = layers.Dense(128, activation='relu')(x) output_tensor = layers.Dense(N_CLASSES, activation='softmax')(x) model = Model(input_tensor, output_tensor) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) return model
def build_model(self, p): # token level inp_token = kl.Input(shape=(p['sent_len'], )) embed_token = kl.Embedding(self.vocab_size, self.token_dim, mask_zero=True, weights=[self.wei_token]) embed_token.trainable = False embed_tout = embed_token(inp_token) inp_char = None if p['use_char']: # char level inp_char = kl.Input(shape=(p['sent_len'], p['word_len'])) embed_char = kl.Embedding(self.char_size, self.char_dim, weights=[self.wei_char]) embed_char.trainable = False embed_cout = embed_char(inp_char) # convolutional layer print('conv filters:', self.filters) conv_layer = kl.Conv2D(self.filters, (1, 3), input_shape=(p['sent_len'], p['word_len'], self.char_dim), use_bias=False, padding='SAME') conv_out = conv_layer(embed_cout) # maxpooling pool_layer = kl.MaxPooling2D(pool_size=(1, p['word_len'])) pool_out = pool_layer(conv_out) reshape_out = kl.Reshape((p['sent_len'], self.filters))(pool_out) # concatenation concat_out = kl.concatenate([embed_tout, reshape_out], axis=2) else: concat_out = embed_tout lstm_out = kl.Bidirectional( kl.LSTM(p['units'], activation=p['lstm_act'], return_sequences=True, dropout=0.4))(concat_out) dense_out = kl.TimeDistributed(kl.Dense(p['outputsize']))(lstm_out) # build model if p['use_char']: model_ner = km.Model(inputs=[inp_token, inp_char], outputs=dense_out) else: model_ner = km.Model(inputs=inp_token, outputs=dense_out) model_ner.compile(loss=p['lstm_loss'], optimizer=p['lstm_opt']) model_ner.summary() self.model = model_ner
def model_LSTMbaseline(embedding_matrix, max_sent_len, n_out): print(config.params_dict) # Take sentence encoded as indices and convert it to embeddings sentence_input = layers.Input(shape=(max_sent_len,), dtype='int32', name='sentence_input') word_embeddings = layers.Embedding(output_dim=embedding_matrix.shape[1], input_dim=embedding_matrix.shape[0], input_length=max_sent_len, weights=[embedding_matrix], mask_zero=True, trainable=False)(sentence_input) word_embeddings = layers.Dropout(config.Params.dropout1)(word_embeddings) # Take token markers that identify entity positions, convert to position embeddings entity_markers = layers.Input(shape=(max_sent_len,), dtype='int8', name='entity_markers') pos_embeddings = layers.Embedding(output_dim=config.Params.position_emb, input_dim=POSITION_VOCAB_SIZE, input_length=max_sent_len, mask_zero=True, embeddings_regularizer=regularizers.l2(), trainable=True)( entity_markers) # Merge word and position embeddings and apply the specified amount of RNN layers x = layers.concatenate([word_embeddings, pos_embeddings]) for i in range(config.Params.rnn1_layers - 1): lstm_layer = layers.LSTM(config.Params.units1, return_sequences=True) if config.Params.bidirectional: lstm_layer = layers.Bidirectional(lstm_layer) x = lstm_layer(x) lstm_layer = layers.LSTM(config.Params.units1, return_sequences=False) if config.Params.bidirectional: lstm_layer = layers.Bidirectional(lstm_layer) sentence_vector = lstm_layer(x) # Apply softmax sentence_vector = layers.Dropout(config.Params.dropout1)(sentence_vector) main_output = layers.Dense(n_out, activation="softmax", name='main_output')(sentence_vector) model = models.Model(inputs=[sentence_input, entity_markers], outputs=[main_output]) model.compile(optimizer=config.Params.optimizer, loss='categorical_crossentropy', metrics=['accuracy']) return model
def GRU3_solo(rd_input, kernels, conv_window_len, maxpooling_len, stride, BN=True, DropoutRate=0.2): initializer = 'glorot_uniform' conv1 = layers.Conv1D(kernels[0], 1, strides= stride , padding='same', \ kernel_initializer=initializer)(rd_input) output = layers.Bidirectional(layers.CuDNNGRU( 32, return_sequences=True))(conv1) output = layers.Bidirectional(layers.CuDNNGRU( 32, return_sequences=True))(output) output = layers.Bidirectional(layers.CuDNNGRU( 32, return_sequences=True))(output) output = layers.TimeDistributed(layers.Dense(8, activation="softmax"))(output) model = models.Model(rd_input, output) return model
def UNet_GRU3(rd_input, kernels, conv_window_len, maxpooling_len, stride, BN=True, DropoutRate=0.2): unet_module_output = UNet_module(rd_input, kernels, conv_window_len, maxpooling_len, stride, BN, DropoutRate) output = layers.Bidirectional(layers.CuDNNGRU( 32, return_sequences=True))(unet_module_output) output = layers.Bidirectional(layers.CuDNNGRU( 32, return_sequences=True))(output) output = layers.Bidirectional(layers.CuDNNGRU( 32, return_sequences=True))(output) output = layers.TimeDistributed(layers.Dense(8, activation="softmax"))(output) model = models.Model(rd_input, output) return model
def addRecurrentLayers(self, merged_input): # Add LSTMs shared_layer = merged_input logging.info("LSTM-Size: %s" % str(self.params['LSTM-Size'])) for count, size in enumerate(self.params['LSTM-Size']): if isinstance(self.params['dropout'], (list, tuple)): shared_layer = layers.Bidirectional( layers.LSTM(size, return_sequences=True, dropout=self.params['dropout'][0], recurrent_dropout=self.params['dropout'][1]), name='shared_varLSTM_' + str(count))(shared_layer) else: # Naive dropout shared_layer = layers.Bidirectional( layers.LSTM(size, return_sequences=True), name='shared_LSTM_'+str(count))(shared_layer) if self.params['dropout'] > 0.0: layer_name = ('shared_dropout_' + str( self.params['dropout']) + "_" + str(count)) shared_layer = layers.TimeDistributed( layers.Dropout(self.params['dropout']), name=layer_name)(shared_layer) return shared_layer
def ref_crnn(input_shape, n_class, model_size_info): cprint('**** CRNN ****','green') assert(len(model_size_info) == 9) cnn_info = model_size_info[:5] rnn_info = model_size_info[5:8] fc_unit = model_size_info[8] init = initializers.glorot_normal() # MODEL model = Sequential() model.add(Conv2D(cnn_info[0], kernel_size=(cnn_info[1],cnn_info[2]), strides=(cnn_info[3],cnn_info[4]), activation='relu', input_shape=input_shape, kernel_initializer=init, padding='valid')) model.add(layers.TimeDistributed(Flatten())) for i in range(rnn_info[0]-1): if rnn_info[2]== 0: model.add(layers.Bidirectional(layers.LSTM(rnn_info[1], return_sequences=True))) elif rnn_info[2]== 1: model.add(layers.Bidirectional(layers.GRU(rnn_info[1], return_sequences=True))) else: raise ValueError('wrong type name') if rnn_info[2]== 0: model.add(layers.Bidirectional(layers.LSTM(rnn_info[1]))) elif rnn_info[2]== 1: model.add(layers.Bidirectional(layers.GRU(rnn_info[1]))) else: raise ValueError('wrong type name') model.add(Dense(fc_unit, activation='relu',kernel_initializer=init, bias_initializer='zeros')) model.add(Dense(n_class, activation='softmax',kernel_initializer=init, bias_initializer='zeros')) return model
def mount_basic_model(vb_size, emb_dim, *, num_classes=3, act='relu'): """ :param vb_size: :param emb_dim: :param num_classes: :param act: :return: """ return Sequential([ layers.Embedding(vb_size, emb_dim), layers.Bidirectional(layers.LSTM(emb_dim, recurrent_dropout=0.2)), layers.Dense(emb_dim, activation=act), layers.Dense(num_classes, activation='softmax'), ])
def get_bidir_gru_model(): model = Sequential() model.add( layers.Bidirectional(layers.GRU(32), input_shape=(None, float_data.shape[-1]))) model.add(layers.Dense(1)) model.compile(optimizer=RMSprop(), loss='mae') history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=20, validation_data=val_gen, validation_steps=val_steps) return history
def model_2(self): word_input = layers.Input(shape=(self.max_words,)) word_embeds = layers.Embedding(input_dim=self.vocab_size + 1, output_dim=self.embedding_dim)(word_input) lstm = layers.Bidirectional(layers.LSTM(units=128, return_sequences=True))(word_embeds) print('lstm shape', lstm) atten = Attention(self.max_words)(lstm) print('atten', atten.shape) output = layers.Dense(self.class_num, activation='softmax')(atten) model = models.Model(input=word_input, output=output) print(model.summary()) return model
def GenerateBLSTMTimeDC(): # The same pre-processing is appled to different DNN structures [inp_shape, out_shape, inp, convoutput] = preprocess() def easyreshape(x): xR = K.reshape(x, shape=[-1, 100, np.prod(convoutput._keras_shape[2::])]) return xR convoutputR = layers.Lambda(easyreshape, name='reshape2')(convoutput) SIZE_RLAYERS = 256 # Regularization parameters DROPOUT = 0.5 # Feed forward dropout RDROPOUT = 0.2 # Recurrent dropout L2R = 1e-6 # L2 regularization factor # #dimension reduction in each frame # simpleModel = models.Sequential(name='dense_layer') # # simpleModel.add(layers.Dropout(0.5, input_shape=(convoutputR._keras_shape[-1],))) # # simpleModel.add(layers.Dense(256, activation='relu', kernel_regularizer=l2(L2R), bias_regularizer=l2(L2R))) # simpleModel.add(layers.Dense(256, activation='relu', kernel_regularizer=l2(L2R), bias_regularizer=l2(L2R),input_shape=(convoutputR._keras_shape[-1],))) # simpleModel.add(layers.BatchNormalization()) # # x = layers.TimeDistributed(simpleModel,name='Dense')(convoutputR) x = convoutputR for i in range(2): # two stacked BiLSTM x = layers.Bidirectional( layers.LSTM(SIZE_RLAYERS, return_sequences=True, kernel_regularizer=l2(L2R), recurrent_regularizer=l2(L2R), bias_regularizer=l2(L2R), dropout=DROPOUT, recurrent_dropout=RDROPOUT))(x) EMBEDDINGS_DIM = 40 cluster_o = layers.TimeDistributed(layers.Dense(out_shape[-1] * EMBEDDINGS_DIM, activation='tanh', kernel_regularizer=l2(L2R), bias_regularizer=l2(L2R)), name='cluster_o')(x) train_model = models.Model(inputs=[inp], outputs=[cluster_o]) return train_model
def prepare_model_architecture(self): input_, emb_layer = self.prepare_input_layers() hidden = emb_layer if self.architecture_params['hidden_layers_list']: output = build_layers(hidden, self.architecture_params['hidden_layers_list']) else: hidden = layers.Bidirectional(layers.LSTM( units=self.architecture_params['hidden_units'], return_sequences=True))(hidden) hidden = layers.GlobalMaxPooling1D()(hidden) output = layers.Dense( units=self.architecture_params['output_units'], activation=self.architecture_params['output_activation'])(hidden) return self.create_model(input_, output)