def build_model(seq_len: int, word_embedding_dim: int, vocab_size: int, hidden_state_dim: int, learning_rate: float): sequence_input = Input(shape=(seq_len, ), dtype='int32') embeddings = Embedding(vocab_size, word_embedding_dim, input_length=seq_len)(sequence_input) lstm = Bidirectional( LSTM(hidden_state_dim, return_sequences=True, return_state=True, dropout=.5, recurrent_dropout=.4))(embeddings) lstm, forward_h, forward_c, backward_h, backward_c = Bidirectional( LSTM(hidden_state_dim, return_sequences=True, return_state=True, dropout=0.5, recurrent_dropout=.4))(lstm) state_h = Add()([forward_h, backward_h]) attention = Attention(hidden_state_dim) context_vector, attention_weights = attention(lstm, state_h) dense = Dense(100, activation='relu')(context_vector) dropout = Dropout(rate=.3)(dense) output = Dense(1, activation='sigmoid')(dropout) model = Model(inputs=sequence_input, outputs=output, name="TweetsModel") print(model.summary()) model.compile(optimizer=Nadam(lr=learning_rate), loss='binary_crossentropy', metrics=['accuracy']) return model
def create_network(network_input, n_vocab): print(network_input.shape[0]) print(network_input.shape[1]) print(network_input.shape[2]) print(n_vocab) model = Sequential() model.add( Bidirectional(LSTM(lstm_size, return_sequences=True, recurrent_dropout=r_dropout), input_shape=(network_input.shape[1], network_input.shape[2]))) model.add(Dropout(dropout)) model.add( Bidirectional( LSTM(lstm_size, return_sequences=False, recurrent_dropout=r_dropout))) model.add(Dropout(dropout)) model.add(Dense(n_vocab)) model.add(Activation('softmax')) optimizer = optimizers.rmsprop() model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) if weights_to_load != "": model.load_weights(weights_to_load) return model
def get_bidirectional_cudnn_model(self, pre_embeddings, dp_rate=-1.0, use_lstm=False): """ cudnn provided versions, should be much faster :param pre_embeddings: :param use_lstm: utilize LSTM or GRU unit :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding(self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) input = Input((self.maxlen,)) embedding = embedding_layer(input) if use_lstm: x = Bidirectional(CuDNNLSTM(RNN_DIM))(embedding) # LSTM else: x = Bidirectional(CuDNNGRU(RNN_DIM))(embedding) # GRU if dp_rate > 0: # 加dropout层 x = Dropout(dp_rate)(x) output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) return model
def train(self, x_train, x_test, y_train, y_test, num_classes, seq_length=20, emb_dim=100, dropouts=(0.4, 0.4), cells=100, bch_siz=50, epoch=45): #setup and train the nueral net from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Bidirectional, Dense, Dropout, Input, LSTM inp = Input(shape=(seq_length, emb_dim)) out = Bidirectional(LSTM(cells, return_sequences=True))(inp) out = Dropout(dropouts[0])(out) out = Bidirectional(LSTM(cells, return_sequences=False))(out) out = Dropout(dropouts[1])(out) out = Dense(num_classes, activation='softmax')(out) model = Model(inp, out) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy']) #print (model.summary) model.fit(x_train, y_train, batch_size=bch_siz, epochs=epoch, verbose=2, validation_data=(x_test, y_test)) return model
def train(self): batch_size = 64 units = 100 embedding_matrix = np.zeros((self.vocab_size, 100)) for word, index in self.tk.word_index.items(): embedding_vector = self.word2vec.get(word) if embedding_vector is not None: embedding_matrix[index] = embedding_vector self.model = Sequential() self.model.add( Embedding(self.vocab_size, units, weights=[embedding_matrix], trainable=False)) self.model.add( Bidirectional(LSTM(units, return_sequences=True, dropout=0.2))) self.model.add(Bidirectional(LSTM(units, dropout=0.2))) self.model.add(Dense(self.output_size, activation='sigmoid')) print(self.model.summary()) self.model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc']) history = self.model.fit(self.X_train, self.y_train, epochs=100, batch_size=batch_size, verbose=1)
def get_bidirectional_model(self, pre_embeddings, dp_rate=0.0, use_lstm=False): """ follow the common model construction step shown in keras manual :param pre_embeddings: :param dp_rate: drop out rate :param use_lstm: utilize LSTM or GRU unit :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding(self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) model = Sequential() model.add(embedding_layer) if use_lstm: model.add(Bidirectional(LSTM(RNN_DIM, recurrent_dropout=dp_rate))) else: model.add(Bidirectional(GRU(RNN_DIM, recurrent_dropout=dp_rate))) # model.add(Dropout(dp_rate)) model.add(Dense(self.class_num, activation=self.last_activation)) return model
def bi_directional_RNN(HIDDEN_SIZE, loss, optimizer, num_x, num_y, activation='tanh'): '''双方向RNNのモデル作成をする関数 args: HIDDEN_SIZE (int): 隠れ層の数 loss (keras.loss): loss関数 optimizer (tensorflow.python.keras.optimizers) : 誤差関数 num_x (int) : インプットするデータの次元数 num_y (int) : アウトプットするデータの次元数 activation (string): 活性化関数名 ''' model = Sequential() model.add( Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True), input_shape=(None, num_x))) model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True))) model.add(Dense(num_y, activation=activation)) model.compile(loss=loss, optimizer=optimizer) print(model.summary()) return model
def create_rnn_model(self): """ """ seq_input = Input(shape=(self.dense_input_len, 1)) seq_output = Input(shape=(self.dense_input_len, 1)) # norm_seq_input = BatchNormalization(name = 'Dense_BN_trainable')(seq_input) rnn_out = Bidirectional( LSTM(self.rnn_units[0], return_sequences=True, activation='relu'))(seq_input) rnn_out = Bidirectional( LSTM(self.rnn_units[1], return_sequences=True, activation='relu'))(rnn_out) seq_pred = TimeDistributed(Dense(self.hidden_dim[0], activation='relu'))(rnn_out) seq_pred = TimeDistributed(Dense(1, activation='relu'))(seq_pred) # seq_pred = Dense(1, activation = 'relu')(rnn_out) seq_pred = Reshape((self.dense_input_len, ))(seq_pred) seq_input_reshape = Reshape((self.dense_input_len, ))(seq_input) model = Model(seq_input, seq_pred) loss = K.mean( mean_squared_error(seq_input_reshape[:, 1:], seq_pred[:, :-1])) model.add_loss(loss) # def _mean_squared_error(y_true, y_pred): # return K.mean(K.square(y_pred - y_true)) model.compile(optimizer='adam', loss=None) #_mean_squared_error) return model
def demo_create_encoder(latent_dim, cat_dim, window_size, input_dim): input_layer = Input(shape=(window_size, input_dim)) code = TimeDistributed(Dense(64, activation='linear'))(input_layer) code = Bidirectional(LSTM(128, return_sequences=True))(code) code = BatchNormalization()(code) code = ELU()(code) code = Bidirectional(LSTM(64))(code) code = BatchNormalization()(code) code = ELU()(code) cat = Dense(64)(code) cat = BatchNormalization()(cat) cat = PReLU()(cat) cat = Dense(cat_dim, activation='softmax')(cat) latent_repr = Dense(64)(code) latent_repr = BatchNormalization()(latent_repr) latent_repr = PReLU()(latent_repr) latent_repr = Dense(latent_dim, activation='linear')(latent_repr) decode = Concatenate()([latent_repr, cat]) decode = RepeatVector(window_size)(decode) decode = Bidirectional(LSTM(64, return_sequences=True))(decode) decode = ELU()(decode) decode = Bidirectional(LSTM(128, return_sequences=True))(decode) decode = ELU()(decode) decode = TimeDistributed(Dense(64))(decode) decode = ELU()(decode) decode = TimeDistributed(Dense(input_dim, activation='linear'))(decode) error = Subtract()([input_layer, decode]) return Model(input_layer, [decode, latent_repr, cat, error])
def create_kaggle_model(fingerprint_input, model_settings, is_training): if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') input_frequency_size = model_settings['dct_coefficient_count'] input_time_size = model_settings['spectrogram_length'] mel_bins = 80 input_shape = [input_time_size, mel_bins, 1] fingerprint_4d = tf.reshape(fingerprint_input, [-1] + input_shape) conv_filters = 32 x = Conv2D(filters=conv_filters, kernel_size=[5, 20], strides=[2, 8], padding='same', use_bias=False, input_shape=input_shape)(fingerprint_4d) x = tf.layers.BatchNormalization(scale=False)(x) x = Activation('relu')(x) # print(x.get_shape().as_list()) x = Reshape((49, 320))(x) rnn_size = 256 x = Bidirectional(GRU(rnn_size, return_sequences=True, unroll=True))(x) x = Bidirectional(GRU(rnn_size, return_sequences=True, unroll=True))(x) x = Dense(rnn_size, activation='relu')(x) x = Flatten()(x) label_count = model_settings['label_count'] final_fc = Dense(label_count)(x) if is_training: return final_fc, dropout_prob else: return final_fc
def define_nmt(hidden_size, batch_size, en_timesteps, en_vsize, fr_timesteps, fr_vsize): """ Defining a NMT model """ # Define an input sequence and process it. if batch_size: encoder_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(batch_shape=(batch_size, fr_timesteps - 1, fr_vsize), name='decoder_inputs') else: encoder_inputs = Input(shape=(en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(shape=(fr_timesteps - 1, fr_vsize), name='decoder_inputs') # Encoder GRU encoder_gru = Bidirectional(GRU(hidden_size, return_sequences=True, return_state=True, name='encoder_gru'), name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru(encoder_inputs) # Set up the decoder GRU, using `encoder_states` as initial state. decoder_gru = Bidirectional(GRU(hidden_size, return_sequences=True, return_state=True, name='decoder_gru'), name='bidirectional_decoder') decoder_out, decoder_fwd_state, decoder_back_state = decoder_gru(decoder_inputs, initial_state=[encoder_fwd_state, encoder_back_state]) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # Concat attention input and decoder GRU output decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer dense = Dense(fr_vsize, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model full_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) full_model.compile(optimizer='adam', loss='categorical_crossentropy') full_model.summary() """ Inference model """ batch_size = 1 """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder_gru(encoder_inf_inputs) encoder_model = Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state]) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(batch_size, 1, fr_vsize), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(batch_size, en_timesteps, 2*hidden_size), name='encoder_inf_states') decoder_init_fwd_state = Input(batch_shape=(batch_size, hidden_size), name='decoder_fwd_init') decoder_init_back_state = Input(batch_shape=(batch_size, hidden_size), name='decoder_back_init') decoder_inf_out, decoder_inf_fwd_state, decoder_inf_back_state = decoder_gru(decoder_inf_inputs, initial_state=[decoder_init_fwd_state, decoder_init_back_state]) attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) decoder_model = Model(inputs=[encoder_inf_states, decoder_init_fwd_state, decoder_init_back_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_fwd_state, decoder_inf_back_state]) return full_model, encoder_model, decoder_model
def train_model_2BLSTM_variableSequenceLength(path, subjectID, modelType, MLtechnique, features, labels, dw, batch_size, patience, LSTMunits=30): """ FUNCTION NAME: train_model_2BLSTM_variableSequenceLength This function trains a bidirectional LSTM model with 2 hidden layers when input sequences have difference length from one sample to the other. In the first step, input sequences are arranged into a tensor of the same length using zero padding. When data is ready, the bidirectional LSTM is trained. INPUT: ------ -> path: full path where to store the trained model -> subjectID: integer indicating the ID of the subject being analyzed -> modelType: type of model to train -> MLtechnique: technique to use to train the model -> features: matrix of features to train the model -> labels: matrix of labels to train the model -> dw: factor used when downsampling the available data -> batch_size: value for batch_size parameter -> patience: value for patience parameter -> LSTMunits: number of units of the LSTM OUTPUT: ------- """ epochs = 200 verbose = 1 if (dw == 1): modelName = path + 'Model_Subject' + str(subjectID) + '_' + MLtechnique + '_LSTMunits' + str(LSTMunits) + '_BatchSize' + str(batch_size) + '_Patience' + str(patience) + '_' + modelType else: modelName = path + 'Model_Subject' + str(subjectID) + '_DW' + str(dw) + '_' + MLtechnique + '_LSTMunits' + str(LSTMunits) + '_BatchSize' + str(batch_size) + '_Patience' + str(patience) + '_' + modelType # Convert data matrices to tensors T_features, T_labels = DE.dataMatrices2tensors(features, labels, modelType) # Define the Bidirectional LSTM model = Sequential([ Masking(mask_value = 0., input_shape=(None,DE.get_3DtensorDimensions(T_features)[2])), Bidirectional(LSTM(LSTMunits, activation='tanh', return_sequences=True)), Bidirectional(LSTM(int(LSTMunits/2), activation='tanh', return_sequences=True)), TimeDistributed(Dense(1, activation='linear')) ]) model.compile(optimizer=Adam(),loss=loss_CCC) earlyStop = EarlyStopping(monitor='loss', patience=patience) callbacks_list = [earlyStop] # Train the model model.fit(T_features, T_labels, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks = callbacks_list, validation_split = 0) print '-> Saving model ..' # Save model model.save(modelName + '.h5') print '<- Model saved'
def deep_rnnblocks(inputdim, inputshape): if inputdim < 2: return (Bidirectional(GRU(10, return_sequences=True), input_shape=inputshape, name='input'), Bidirectional(GRU(20, return_sequences=False))) elif inputdim < 4: return (Bidirectional(GRU(15, return_sequences=True), input_shape=inputshape, name='input'), Bidirectional(GRU(30, return_sequences=False))) elif inputdim < 6: return (Bidirectional(GRU(20, return_sequences=True), input_shape=inputshape, name='input'), Bidirectional(GRU(40, return_sequences=False))) else: return (Bidirectional(GRU(30, return_sequences=True), input_shape=inputshape, name='input'), Bidirectional(GRU(60, return_sequences=False)))
def build_model(time_steps, num_classes, inputdim): model = Sequential() model.add(Bidirectional(GRU(10, return_sequences=True), input_shape=(time_steps, inputdim))) model.add(Bidirectional(GRU(20, return_sequences=False))) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() return model
def build_model_1(embedding_matrix, one_hot_shape): words = Input(shape=(MAX_LEN, )) x = Embedding(*embedding_matrix.shape, weights=[embedding_matrix], trainable=False)(words) x = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True), merge_mode='concat')(x) x = SpatialDropout1D(rate=0.3)(x) #x = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True), merge_mode='ave')(x) #x = SpatialDropout1D(rate=0.3)(x) #x = GlobalAveragePooling1D()(x) # this layer average each output from the Bidirectional layer x = concatenate([ GlobalMaxPooling1D()(x), GlobalAveragePooling1D()(x), ]) summary = Input(shape=(MAX_LEN, )) x_aux = Embedding(*embedding_matrix.shape, weights=[embedding_matrix], trainable=False)(summary) x_aux = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True), merge_mode='concat')(x_aux) x_aux = SpatialDropout1D(rate=0.3)(x_aux) #x_aux = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True), merge_mode='ave')(x_aux) #x_aux = SpatialDropout1D(rate=0.3)(x_aux) # x_aux = GlobalAveragePooling1D()(x_aux) x_aux = concatenate([ GlobalMaxPooling1D()(x_aux), GlobalAveragePooling1D()(x_aux), ]) one_hot = Input(shape=(one_hot_shape, )) hidden = concatenate([x, x_aux, one_hot]) hidden = Dense(400, activation='relu')(hidden) hidden = Dropout(0.4)(hidden) hidden = Dense(400, activation='relu')(hidden) hidden = Dropout(0.4)(hidden) hidden = Dense(300, activation='relu')(hidden) hidden = Dropout(0.4)(hidden) hidden = Dense(300, activation='relu')(hidden) hidden = Dropout(0.4)(hidden) hidden = Dense(100, activation='relu')(hidden) result = Dense(1, activation='linear')(hidden) model = Model(inputs=[words, summary, one_hot], outputs=[result]) # adam = keras.optimizers.Adam(lr=0.0001, clipnorm=1.0, clipvalue=0.5) model.compile(loss='mse', optimizer='adam') return model
def bidirectional_lstm(inputs): with tf.variable_scope('bidirection_gru', reuse=tf.AUTO_REUSE): gru = GRU(units=DEFINES.num_units, return_sequences=True) gru2 = GRU(units=DEFINES.num_units * 2, return_sequences=True) bidirectional = Bidirectional(gru) bidirectional2 = Bidirectional(gru2) output = bidirectional(inputs) output = bidirectional2(output) return output
def _train_LSTM_1(self, X_train, y_train, epochs=5, batch_size=64, learning_rate=0.001, reg=0.01): """ Trains LSTM - X_train: Input sequence - y_train: Target sequence - epochs - batch_size - learning_rate = Adam optimizer's learning rate - reg: Regularization Returns : - history: Scalar loss """ flatten_y = [category for sublist in y_train for category in sublist] class_weights = class_weight.compute_class_weight( 'balanced', np.unique(flatten_y), flatten_y) optim = tf.keras.optimizers.Adam(learning_rate=learning_rate) model = models.Sequential() model.add(Embedding(self.max_word_count, 64)) model.add(Bidirectional(LSTM(64, return_sequences=True))) model.add(Bidirectional(LSTM(32))) model.add( Dense(64, kernel_regularizer=regularizers.l2(reg), activation='relu')) model.add(Dropout(0.5)) model.add(Dense(8, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=optim, metrics=[BinaryAccuracy()]) history = model.fit(X_train, y_train, class_weight=class_weight, epochs=epochs, batch_size=batch_size, validation_split=0.25, verbose=self.verbose, callbacks=[ EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001) ]) self.model = model self.history = history.history
def biLSTM_baseline(embedding_matrix, MAX_LEN, num_words, EMBEDDING_DIM, LSTM_units, LSTM_dropout): input_dimen = Input(shape=(MAX_LEN,)) model = Embedding(input_dim=num_words, output_dim=EMBEDDING_DIM, input_length=MAX_LEN, embeddings_initializer=Constant(embedding_matrix), trainable=False)(input_dimen) model = Bidirectional(LSTM(units=LSTM_units, return_sequences=True, recurrent_dropout=LSTM_dropout, dropout=LSTM_dropout))(model) model = Bidirectional(LSTM(units=LSTM_units, return_sequences=True, recurrent_dropout=LSTM_dropout, dropout=LSTM_dropout))(model) out = TimeDistributed(Dense(1, activation='sigmoid'))(model) model = Model(input_dimen, out) return model
def create_2_BLSTM_layers(vocab_input_data_size, output_bn_vocab_size, output_dom_vocab_size, output_lex_vocab_size, embedding_size, hidden_size, input_dropout, lstm_dropout): ''' Method used to create a double BLSTM model :param vocab_input_data_size the size of the input vocab :param output_bn_vocab_size the size of the output bn vocab :param output_dom_vocab_size the size of the output dom vocab :param output_lex_vocab_size the size of the output lex vocab :param embedding_size the size of the Keras Embedding :param hidden_size the deep of the LSTM Layer :param input_dropout indicates how much dropout after the input Layer :param lstm_dropout indicates how much dropout in the LSTM Layer :return a tensorflow.keras.model ''' print("Creating 2 BLSTM multitask") input_data = Input(shape=(None, )) x1 = Embedding(vocab_input_data_size, embedding_size, mask_zero=True)(input_data) dropout = Dropout(input_dropout, name="Dropout")(x1) layer1_bidirectional = Bidirectional( LSTM(hidden_size, return_sequences=True, recurrent_dropout=lstm_dropout, dropout=lstm_dropout))(dropout) layer2_bidirectional = Bidirectional( LSTM(hidden_size, return_sequences=True, recurrent_dropout=lstm_dropout, dropout=lstm_dropout))(layer1_bidirectional) bn_output = Dense(output_bn_vocab_size, activation='softmax', name='bn_output')(layer2_bidirectional) dom_output = Dense(output_dom_vocab_size, activation='softmax', name='dom_output')(layer2_bidirectional) lex_output = Dense(output_lex_vocab_size, activation='softmax', name='lex_output')(layer2_bidirectional) model = Model(inputs=input_data, outputs=[bn_output, dom_output, lex_output]) return model
def model2(): inp2 = Input(shape=(TIMESERIES_LENGTH, 3)) x2 = Bidirectional(LSTM(30, return_sequences=True, input_shape=(TIMESERIES_LENGTH, 3)), merge_mode='concat')(inp2) bn = BatchNormalization()(x2) x3 = Dropout(0.2)(bn) x4 = Bidirectional(LSTM(30))(x3) x5 = Dropout(0.2)(x4) x6 = Flatten()(x5) # x6 = Dense(3, activation='softmax')(x5) model = Model(inputs=[inp2], outputs=x6) return model
def __init__(self): self.MAX_SEQUENCE_LENGTH = 20 self.STEP = 3 self.ITERATION = 500 self.tokenizer, self.index_word, self.embedding_matrix, self.text_words, self.X, self.y = \ read_dataset(maxlen=self.MAX_SEQUENCE_LENGTH, step=self.STEP) if os.path.exists('saved_model.h5'): print('loading saved model...') self.model = load_model('saved_model.h5') else: print('Build model...') inputs = Input(shape=(self.MAX_SEQUENCE_LENGTH, )) x = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH, weights=[self.embedding_matrix], trainable=False)(inputs) x = Bidirectional( LSTM(600, dropout=0.2, recurrent_dropout=0.1, return_sequences=True))(x) x = LSTM(600, dropout=0.2, recurrent_dropout=0.1)(x) # 可以选择把上面两行注释掉,选择下面这行,简化模型,加快训练的速度 # x = Bidirectional(LSTM(600, dropout=0.2, recurrent_dropout=0.1))(x) x = Dense(len(self.tokenizer.word_index) + 1)(x) predictions = Activation('softmax')(x) model = Model(inputs, predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam') # plot_model(model, to_file='model.png') self.model = model
def _build_embeddings(weights, use_bidirectional=False): # The first 'branch' of the model embeds words. word_emb_input = Input((None, ), name='word_input') mask_word = Masking(mask_value=pad_tag)(word_emb_input) word_emb_output = Embedding(n_words, dim_word, weights=[weights], trainable=False)(mask_word) # The second 'branch' of the model embeds characters. # Note: end to end paper claims to have applied dropout layer on character embeddings before inputting # to a CNN in addition to before both layers of BLSTM char_emb_input = Input((None, None), name='char_input') # Reshape: Input is sentences, words, characters. For characters, we want to just operate it over the character sentence by # number of words and seq of characters so we reshape so that we have words by characters char_emb_output = Lambda(lambda x: tf.keras.backend.reshape(x, (-1, tf.keras.backend.shape(x)[-1])))(char_emb_input) mask_char = Masking(mask_value=pad_tag)(char_emb_output) char_emb_output = Embedding(n_char, dim_char)(mask_char) char_emb_output = Dropout(dropout)(char_emb_output) # construct LSTM layers. Option to use 1 Bidirectonal layer, or one forward and one backward LSTM layer. # Empirical results appear better with 2 LSTM layers hence it is the default. if use_bidirectional: char_emb_output = Bidirectional(LSTM(hidden_size_char, return_sequences=False))(char_emb_output) else: fw_LSTM = LSTM(hidden_size_char, return_sequences=False)(char_emb_output) bw_LSTM = LSTM(hidden_size_char, return_sequences=False, go_backwards=True)(char_emb_output) char_emb_output = concatenate([fw_LSTM, bw_LSTM]) # Use dropout to prevent overfitting (as a regularizer) char_emb_output = Dropout(dropout)(char_emb_output) # Reshape back char_emb_output = Lambda(lambda x, z: tf.keras.backend.reshape(x, (-1, tf.shape(z)[1], 2 * hidden_size_char)), arguments={"z": word_emb_input})(char_emb_output) return word_emb_input, word_emb_output, char_emb_input, char_emb_output
def bd_model(input_shape, output_sequence_length, english_vocab_size, spanish_vocab_size): """ Build and train a bidirectional RNN model on x and y :param input_shape: Tuple of input shape :param output_sequence_length: Length of output sequence :param english_vocab_size: Number of unique English words in the dataset :param french_vocab_size: Number of unique French words in the dataset :return: Keras model built, but not trained """ # TODO: Implement # Hyperparameters #learning_rate = 0.003 # TODO: Build the layers model = Sequential() model.add( Bidirectional(GRU(128, return_sequences=True), input_shape=input_shape[1:])) model.add(TimeDistributed(Dense(512, activation='relu'))) #model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(spanish_vocab_size, activation='softmax'))) # Compile model model.compile(loss='sparse_categorical_crossentropy', optimizer='Adam', metrics=['accuracy']) return model
def Create_CNN(self): """ """ inp = Input(shape=(self.max_len, )) embedding = Embedding(self.max_token, self.embedding_dim, weights=[self.embedding_weight], trainable=not self.fix_wv_model) x = embedding(inp) if self.emb_dropout > 0: x = SpatialDropout1D(self.emb_dropout)(x) # if self.char_split: # # First conv layer # x = Conv1D(filters=128, kernel_size=3, strides=2, padding="same")(x) cnn_list = [] rnn_list = [] for filter_size in self.filter_size: if filter_size > 0: conc = self.ConvBlock(x, filter_size) cnn_list.append(conc) for rnn_unit in self.context_vector_dim: if rnn_unit > 0: rnn_maps = Bidirectional(GRU(rnn_unit, return_sequences=True, \ dropout=self.rnn_input_dropout, recurrent_dropout=self.rnn_state_dropout))(x) conc = self.pooling_blend(rnn_maps) rnn_list.append(conc) conc_list = cnn_list + rnn_list if len(conc_list) == 1: conc = Lambda(lambda x: x, name='RCNN_CONC')(conc_list) else: conc = Concatenate(name='RCNN_CONC')(conc_list) # conc = self.pooling_blend(x) if self.separate_label_layer: for i in range(self.num_classes): full_connect = self.full_connect_layer(conc) proba = Dense(1, activation="sigmoid")(full_connect) if i == 0: outp = proba else: outp = concatenate([outp, proba], axis=1) else: if self.hidden_dim[0] > 0: full_connect = self.full_connect_layer(conc) else: full_connect = conc # full_conv_0 = self.act_blend(full_conv_pre_act_0) # full_conv_pre_act_1 = Dense(self.hidden_dim[1])(full_conv_0) # full_conv_1 = self.act_blend(full_conv_pre_act_1) # flat = Flatten()(conc) outp = Dense(6, activation="sigmoid")(full_connect) model = Model(inputs=inp, outputs=outp) # print (model.summary()) model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) return model
def _cnn_lstm_model(input_length, num_classes, num_features, embedding_matrix, embedding_dim, filters_num=512, filter_sizes=None, dropout_rate=0.5): if filter_sizes is None: filter_sizes = [5] op_units, op_activation = num_classes, 'softmax' model = Sequential() model.add( Embedding(input_dim=num_features, output_dim=embedding_dim, input_length=input_length, weights=[embedding_matrix], trainable=False)) model.add( Bidirectional(LSTM(units=int(embedding_dim / 2), return_sequences=True), input_shape=(-1, embedding_dim))) model.add(Flatten()) model.add(Dropout(rate=dropout_rate)) model.add(Dense(units=op_units, activation=op_activation)) loss = 'sparse_categorical_crossentropy' optimizer = Adam() model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) model.summary() return model
def _cnn_bilstm_attention_dropout(self, name: str) -> Model: """https://qiita.com/fufufukakaka/items/4f9d42a4300392691bf3 """ _inputs = Input(shape=(self.maxlen, ), name='input') l_embed = Embedding(input_dim=self.input_dim, output_dim=self.embed_dim, input_length=self.maxlen, name='embedding')(_inputs) l_drop1 = Dropout(0.2, name='input_dropout')(l_embed) l_cov1 = Conv1D(filters=self.conv_filters, kernel_size=self.conv_kernel_size, padding='same', activation='relu')(l_drop1) l_pool1 = MaxPool1D(pool_size=self.conv_pool_size)(l_cov1) l_bilstm1 = Bidirectional( LSTM(units=self.units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, name='bilstm_dropout'))(l_pool1) l_flat = Flatten()(self.__attention_3d_block(l_bilstm1, l_pool1.shape[1].value)) l_drop2 = Dropout(0.5, name='hidden_dropout')(l_flat) _preds = Dense(self.classes, activation='sigmoid', name='fc1')(l_drop2) return Model(inputs=_inputs, outputs=_preds, name=name)
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=-1)(inputs) x = Bidirectional(CuDNNLSTM(96, name='blstm1', return_sequences=True), merge_mode='concat')(inputs) # activation_1 = Activation('tanh')(lstm_1) x = SpatialDropout1D(0.1)(x) x = Attention(8, 16)([x, x, x]) x1 = GlobalMaxPool1D()(x) x2 = GlobalAvgPool1D()(x) x = Concatenate(axis=-1)([x1, x2]) x = Dense(units=128, activation='elu')(x) x = Dense(units=64, activation='elu')(x) x = Dropout(rate=0.4)(x) outputs = Dense(units=num_classes, activation='softmax')(x) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def CRNN(input_shape): Input_Tr = Input(input_shape, dtype='float', name='Input_Tr') conv_layer1 = Conv2D(32, kernel_size=3, strides=1, padding='SAME')(Input_Tr) batch_layer1 = BatchNormalization(axis=-1)(conv_layer1) conv_layer1_out = Activation('relu')(batch_layer1) pooling_layer1 = MaxPooling2D((1, 4))(conv_layer1_out) dropout_layer1 = Dropout(0.5)(pooling_layer1) conv_layer2 = Conv2D(64, kernel_size=3, strides=1, padding='SAME')(dropout_layer1) batch_layer2 = BatchNormalization(axis=-1)(conv_layer2) conv_layer2_out = Activation('relu')(batch_layer2) pooling_layer2 = MaxPooling2D((1, 4))(conv_layer2_out) dropout_layer2 = Dropout(0.5)(pooling_layer2) print(dropout_layer2.shape) reshape_layer3 = Reshape( (600, 64 * int(round(n_mel / 4 / 4))))(dropout_layer2) print(reshape_layer3.shape) bidir_layer3 = Bidirectional( GRU(64, return_sequences=True, activation='tanh'))(reshape_layer3) output = TimeDistributed(Dense(1, activation='sigmoid'))(bidir_layer3) model = Model(inputs=[Input_Tr], outputs=[output]) return model
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=2)(inputs) lstm_1 = Bidirectional(CuDNNLSTM(64, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def model(embedding_size, n_a): X = Input(batch_shape=(batch_size, None, embedding_size)) a1 = Bidirectional(CuDNNLSTM(units=n_a, return_sequences = True))(X) # functional API needs specifying inputs, just like any functions. a2 = Dense(16, activation = "tanh")(a1) yhat = Dense(1, activation = "sigmoid")(a2) model = Model(inputs = X, outputs = yhat) return model