def good_models(emb_len, optimizer, regulizer): model = Sequential() model.add(Bidirectional(LSTM(10, dropout=0.5, recurrent_dropout=0.5, return_sequences=True), input_shape=(20, emb_len), merge_mode='concat')) if regulizer==True: model.add(SeqSelfAttention( attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, attention_activation='sigmoid', use_attention_bias=True, kernel_regularizer=keras.regularizers.l2(1e-6), bias_regularizer=keras.regularizers.l1(1e-6), attention_regularizer_weight=1e-6, name='Attention')) else: model.add(SeqSelfAttention( attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, attention_activation=None, kernel_regularizer=keras.regularizers.l2(1e-6), use_attention_bias=False, name='Attention', )) model.add(Bidirectional(LSTM(5, dropout=0.5, recurrent_dropout=0.5), merge_mode='concat')) model.add(Dense(2, activation='softmax', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model
def multi_bidirection_lstm_with_attention(max_len=77, emb_dim=32, max_vocab_len=128, W_reg=regularizers.l2(1e-4)): # Input main_input = Input(shape=(max_len,), dtype='int32', name='main_input') # Embedding layer emb = Embedding(input_dim=max_vocab_len, output_dim=emb_dim, input_length=max_len, dropout=0.2, W_regularizer=W_reg)(main_input) # Bi-directional LSTM layer lstm = Bidirectional(LSTM(units=128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))(emb) lstm = Dropout(0.2)(lstm) att = SeqSelfAttention(attention_activation='relu')(lstm) s_lstm = Bidirectional(LSTM(units=64, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))(att) s_lstm = Dropout(0.2)(s_lstm) s_att = SeqSelfAttention(attention_activation='relu')(s_lstm) s_att = Flatten()(s_att) hidden1 = Dense(4736)(s_att) hidden1 = ELU()(hidden1) hidden1 = BatchNormalization(mode=0)(hidden1) hidden1 = Dropout(0.5)(hidden1) # Output layer (last fully connected layer) output = Dense(21, activation='softmax', name='output')(hidden1) # Compile model and define optimizer model = Model(input=[main_input], output=[output]) adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy', tf.keras.metrics.CategoricalAccuracy(), preprocess.fmeasure, preprocess.recall, preprocess.precision]) return model
def load_rlm_lstm_model(): rlm_lstm_model_path = f'{rlm_base_models_path}/lstm_all_data.h5' if path.exists(rlm_lstm_model_path): with open(rlm_lstm_model_path, 'rb') as rlm_lstm_pkl_file: rlm_lstm_model = load_model( rlm_lstm_pkl_file, custom_objects=SeqSelfAttention.get_custom_objects()) else: rlm_lstm_model_url = f'{base_url}/lstm_all_data.h5' rlm_lstm_model_request = requests.get(rlm_lstm_model_url) with tqdm.wrapattr(open(os.devnull, "wb"), "write", miniters=1, desc=rlm_lstm_model_url.split('/')[-1], total=int( rlm_lstm_model_request.headers.get( 'content-length', 0))) as fout: for chunk in rlm_lstm_model_request.iter_content(chunk_size=4096): fout.write(chunk) with open(rlm_lstm_model_path, 'wb') as rlm_lstm_pkl_file: rlm_lstm_pkl_file.write(rlm_lstm_model_request.content) with open(rlm_lstm_model_path, 'rb') as rlm_lstm_pkl_file: rlm_lstm_model = load_model( rlm_lstm_pkl_file, custom_objects=SeqSelfAttention.get_custom_objects()) rlm_lstm_model._make_predict_function() return rlm_lstm_model
def local_context_learning(input_length, input_dim, output_dim, hidden_dim, filters_num, kernel_val, learning_rate, drop_rate): basic_input = Input(shape=(input_length, input_dim)) label_input = Input(shape=(1, )) weighted_input = adding_weight(input_length, input_dim)([basic_input, label_input]) rnn_output = GRU(units=hidden_dim, return_sequences=True)(weighted_input) rnn_att = SeqSelfAttention(attention_activation='sigmoid')(rnn_output) cnn_output = Conv1D(filters=filters_num, kernel_size=kernel_val, padding="same")(weighted_input) cnn_output_reformat = Dense(hidden_dim)(cnn_output) cnn_att = SeqSelfAttention( attention_activation='sigmoid')(cnn_output_reformat) new_value = Concatenate(axis=1)([rnn_att, cnn_att]) new_keys = Lambda(lambda x: ones_like(x))(new_value) new_result = MultiHeadAttention(head_num=2)( [weighted_input, new_keys, new_value]) result = Flatten()(new_result) result_fix = Dropout(rate=drop_rate)(result) output = Dense(output_dim)(result_fix) fixed_output = Activation(activation='sigmoid')(output) model = Model([basic_input, label_input], fixed_output) ada = adam(lr=learning_rate) model.compile(optimizer=ada, loss='categorical_crossentropy') return model
def keras_self_att_multi(X, Y, X_t, Y_t, emb_len, optimizer, activation, regulizer): model = Sequential() model.add(Bidirectional(LSTM(10, dropout=0.5, recurrent_dropout=0.5, return_sequences=True), input_shape=(20, emb_len), merge_mode='concat')) if regulizer==True: model.add(SeqSelfAttention( attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, attention_activation='sigmoid', use_attention_bias=True, kernel_regularizer=keras.regularizers.l2(1e-6), bias_regularizer=keras.regularizers.l1(1e-6), attention_regularizer_weight=1e-6, name='Attention')) else: model.add(SeqSelfAttention( attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, attention_activation=None, kernel_regularizer=keras.regularizers.l2(1e-6), use_attention_bias=False, name='Attention', )) model.add(Bidirectional(LSTM(5, dropout=0.5, recurrent_dropout=0.5), merge_mode='concat')) model.add(Dense(2, activation=activation, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01))) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) #seqModel = model.fit(x=X.cpu().numpy(), y=Y, validation_data=(X_t.cpu().numpy(), Y_t), batch_size=64, epochs=1, shuffle=True, callbacks=[TQDMNotebookCallback(leave_inner=True, leave_outter=True)], verbose=0) history = model.fit(x=X.cpu().numpy(), y=Y, validation_data=(X_t.cpu().numpy(), Y_t), batch_size=64, epochs=100, shuffle=True, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=0, mode='auto', baseline=None, restore_best_weights=True)]) temp = max(history.history['val_acc']) return temp
def __init__(self, num_lstm_layers, LSTM_hidden_state_dim, num_time_frames, num_freq_bins, num_genres, lyrics_embedding_dimension, use_attention): self.num_time_frames = num_time_frames self.chroma_input = Input(shape=(num_frames, num_freq_bins)) self.mfcc_input = Input(shape=(num_frames, num_freq_bins)) self.lyrics_embedding_input = Input(shape=(embedding_dimension, )) self.use_attention = use_attention # chroma LSTM and attention chroma = keras.layers.LSTM(units=LSTM_hidden_state_dim, return_state=True, input_shape=(self.num_frames, self.num_freq_bins))( self.chroma_input) for _ in range(num_lstm_layers - 1): chroma = keras.layers.LSTM(units=LSTM_hidden_state_dim, return_state=True)(chroma) if self.use_attention: chroma = SeqSelfAttention( attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, name='AttentionChroma')(chroma) # MFCC LSTM and attetion mfcc = keras.layers.LSTM( units=LSTM_hidden_state_dim, return_state=True, input_shape=(self.num_frames, self.num_freq_bins))(self.mfcc_input) for _ in range(num_lstm_layers - 1): mfcc = keras.layers.LSTM(units=LSTM_hidden_state_dim, return_state=True)(mfcc) if self.use_attention: mfcc = SeqSelfAttention( attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, name='AttentionMFCC')(mfcc) # concatinating the mfcc chroma and embedding features x = concatenate([mfcc, chroma, self.embedding_input]) # putting the features through a couple fcnn. x = Dense(2048, activation='relu')(x) x = Dense(2048, activation='relu')(x) # the embedding vector for each song. latent_embedding = Dense(50, activation='relu', name='embedding')(x) genre = Dense(num_genres, activation='softmax')(latent_embedding) self.net = Model( inputs=[self.chroma_input, self.mfcc_input, self.embedding_input], outputs=genre) self.embedding = Model(self.net.input, outputs=self.net.get_layer('embedding').output)
def Build_Model_Attention_RNN_GRU_Text(word_index, embeddings_index, nclasses=1, MAX_SEQUENCE_LENGTH=1380, EMBEDDING_DIM=1380, dropout=0.5): model = Sequential() hidden_layer = 2 gru_node = 32 embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM)) for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. if len(embedding_matrix[i]) != len(embedding_vector): print( "could not broadcast input array from shape", str(len(embedding_matrix[i])), "into shape", str(len(embedding_vector)), " Please make sure your" " EMBEDDING_DIM is equal to embedding_vector file ,GloVe,") exit(1) embedding_matrix[i] = embedding_vector model.add( Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=True)) print(gru_node) model.add(Dropout(0.25)) model.add(Dense(nclasses, activation='sigmoid')) for i in range(0, hidden_layer): model.add(GRU(gru_node, return_sequences=True, recurrent_dropout=0.2)) model.add(SeqSelfAttention(attention_activation='sigmoid')) model.add(Dropout(dropout)) model.add(GRU(gru_node, recurrent_dropout=0.2)) model.add(Dropout(dropout)) model.add(Dense(256, activation='relu')) model.add(SeqSelfAttention(attention_activation='sigmoid')) model.add(Dense(nclasses, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) return model
def test_save_load_with_loss(self): attention = SeqSelfAttention(return_attention=True, attention_width=7, attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, kernel_regularizer=keras.regularizers.l2(1e-4), bias_regularizer=keras.regularizers.l1(1e-4), attention_regularizer_weight=1e-3, name='Attention') _, _, token_dict = self.get_input_data() model = self.get_model(attention, token_dict) model_path = os.path.join(tempfile.gettempdir(), 'keras_self_att_test_sl_with_loss_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects=SeqSelfAttention.get_custom_objects()) model.summary() self.assertTrue(model is not None)
def __init__(self, num_lstm_layers, LSTM_hidden_state_dim, num_frames, num_freq_bins, num_genres, lyrics_embedding_dimension, use_attention, lambda1=0.0, lambda2=0.0): self.num_frames = num_frames self.num_freq_bins = num_freq_bins self.chroma_input = Input(shape=(num_frames, num_freq_bins)) self.mfcc_input = Input(shape=(num_frames, num_freq_bins)) self.lyrics_embedding_input = Input(shape=(lyrics_embedding_dimension,)) self.use_attention = use_attention # chroma LSTM and attention chroma = keras.layers.LSTM(units=LSTM_hidden_state_dim, return_sequences=True, input_shape=(self.num_frames, self.num_freq_bins))(self.chroma_input) for i in range(num_lstm_layers-2): chroma = keras.layers.LSTM(units=LSTM_hidden_state_dim, return_sequences=True)(chroma) chroma = keras.layers.LSTM(units=LSTM_hidden_state_dim, return_sequences=self.use_attention)(chroma) if self.use_attention: print(chroma) chroma = SeqSelfAttention(attention_activation='sigmoid', attention_width=15, name='AttentionChroma')(chroma) chroma = Dense(10)(chroma) print(chroma) # MFCC LSTM and attetion mfcc = keras.layers.LSTM(units=LSTM_hidden_state_dim, return_sequences=True, input_shape=(self.num_frames, self.num_freq_bins))(self.mfcc_input) for _ in range(num_lstm_layers - 2): mfcc = keras.layers.LSTM(units=LSTM_hidden_state_dim, return_sequences=True)(mfcc) mfcc = keras.layers.LSTM(units=LSTM_hidden_state_dim, return_sequences=self.use_attention)(mfcc) if self.use_attention: mfcc = SeqSelfAttention(attention_activation='sigmoid', attention_width=15, name='AttentionMFCC')(mfcc) mfcc=Dense(10)(mfcc) print(mfcc) # concatinating the mfcc chroma and embedding features x = concatenate([mfcc, chroma, self.lyrics_embedding_input]) # putting the features through a couple fcnn. x = Dense(128, activation='relu', kernel_regularizer=l1_l2(l1=lambda1, l2=lambda2), bias_regularizer=l1_l2(l1=lambda1, l2=lambda2))(x) x = Dense(128, activation='relu', kernel_regularizer=l1_l2(l1=lambda1, l2=lambda2), bias_regularizer=l1_l2(l1=lambda1, l2=lambda2))(x) latent_embedding = Dense(50, activation='relu', name='embedding', kernel_regularizer=l1_l2(l1=lambda1, l2=lambda2), bias_regularizer=l1_l2(l1=lambda1, l2=lambda2))(x) genre = Dense(num_genres, activation='softmax', kernel_regularizer=l1_l2(l1=lambda1, l2=lambda2), bias_regularizer=l1_l2(l1=lambda1, l2=lambda2))(latent_embedding) self.net = Model(inputs=[self.chroma_input, self.mfcc_input, self.lyrics_embedding_input], outputs=genre) self.embedding = Model(self.net.input, outputs=self.net.get_layer('embedding').output)
def create_model(train_x): model=Sequential() model.add(CuDNNLSTM(32, input_shape=(train_x.shape[1:]), return_sequences=True)) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(CuDNNLSTM(64, return_sequences=True)) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(CuDNNLSTM(32)) model.add(Dropout(0.3)) #model.add(BatchNormalization()) # lstm_section = Dense(1,activation='sigmoid') # attention=Dense(1, activation='tanh')( lstm_section ) # attention=Flatten()( attention ) # attention=Activation('softmax')( attention ) # attention=RepeatVector(64)( attention ) # attention=Permute([2, 1])( attention ) model.add(SeqSelfAttention(attention_activation='sigmoid')) #model.add(Dense(32, activation = attention)) model.add(Dense(32, activation="relu")) model.add(Dropout(0.2)) model.add(Dense(3, activation="softmax")) #model.add(Activation('softmax')) return model
def test_return_attention(self): attention = SeqSelfAttention( return_attention=True, kernel_regularizer=keras.regularizers.l2(1e-4), bias_regularizer=keras.regularizers.l1(1e-4), name='Attention') self.check_mask_shape(attention)
def SARNNKerasCPU(embeddingMatrix=None, embed_size=400, max_features=20000, maxlen=100): inp = Input(shape=(maxlen, )) x = Embedding(input_dim=max_features, output_dim=embed_size, weights=[embeddingMatrix])(inp) x = Bidirectional(LSTM(128, return_sequences=True))(x) x = SeqSelfAttention( attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, attention_regularizer_weight=1e-4, )(x) x = Dropout(0.5)(x) x = Bidirectional(LSTM(128, return_sequences=True))(x) x = SeqWeightedAttention()(x) x = Dropout(0.5)(x) x = Dense(64, activation="relu")(x) x = Dropout(0.5)(x) x = Dense(1, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', f1]) return model
def build_model(self, config): self.input = Input(shape=(self.max_sequence_length, ), name="word_input") self.word_emb = self.embedding_layer()(self.input) self.lstm_l = Bidirectional( LSTM(units=config.units, dropout=config.lstm_Dropout, return_sequences=False))(word_emb) self.self_attention = SeqSelfAttention(attention_activation='sigmoid')( self.lstm_l) self.self_attention_flatten = Flatten()(self.self_attention) self.aux_input = Input(shape=(3, ), name="aux_input") self.concat = concatenate( [self.self_attention_flatten, self.aux_input]) self.hidden_2 = Dense(config.Dense, activation="relu")(self.concat) self.hidden_2 = Dropout(config.Dropout_1)(self.hidden_2) self.hidden_3 = Dense(config.Dense_1, activation="relu")(self.hidden_2) self.hidden_3 = Dropout(config.Dropout_2)(self.hidden_3) self.output_layer = Dense(3, activation="sigmoid", name="output")(self.hidden_3) self.model = Model(inputs=[self.input, self.aux_input], outputs=[self.output_layer], name="cnn") self.model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy", f1])
def createModel(vocabSize, srcLength=500, sumLength=100, wordEmbDim=128, contextVecLen=128): #sourcetxt input inputs = (Input(shape=(srcLength, ))) emb = Embedding(vocabSize, wordEmbDim, mask_zero=True)(inputs) encLSTM = Bidirectional(LSTM(units=contextVecLen, return_sequences=True))(emb) att = SeqSelfAttention(attention_width=10, attention_activation="sigmoid", attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, kernel_regularizer=l2(1e-4), bias_regularizer=l1(1e-4), attention_regularizer_weight=1e-4, name="Attn")(encLSTM) trnsp1 = Lambda(permute)(att) condense = Dense(100)(trnsp1) trnsp2 = Lambda(permute)(condense) #decoder output decLSTM = LSTM(units=contextVecLen, return_sequences=True)(trnsp2) dense = TimeDistributed(Dense(vocabSize, activation='relu'))(decLSTM) sftmx = TimeDistributed(Dense(vocabSize, activation='softmax'))(dense) #encoder+decoder model = Model(inputs=inputs, outputs=sftmx) model.compile(loss='categorical_crossentropy', optimizer='adam') return model
def create_network_add_weights(network_input, n_vocab, wd): """ create the structure of the neural network """ model = Sequential() model.add( Bidirectional(LSTM(512, return_sequences=True), input_shape=(network_input.shape[1], network_input.shape[2])) ) # n_time_steps, n_features? Needed input_shape in first layer, which is Bid not LSTM model.add(SeqSelfAttention(attention_activation='sigmoid')) model.add(Dropout(0.3)) model.add(LSTM(512, return_sequences=True)) model.add(Dropout(0.3)) model.add(Flatten()) # Supposedly needed to fix stuff before dense layer model.add(Dense(n_vocab)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') wd = wd + '-weights.hdf5' print(wd) # Load the weights to each node model.load_weights(wd) return model
def load_model_attention(model_path): """ load a pretrained recurrent network with attention mechanism Parameters ---------- model path : str path of the pretrained attention model Returns ------- pretrained model with attention mechanism """ from keras_self_attention import SeqSelfAttention import keras json_file = open(os.path.join(model_path + '.json'), 'r') loaded_model_json = json_file.read() json_file.close() trained_model = keras.models.model_from_json( loaded_model_json, custom_objects=SeqSelfAttention.get_custom_objects()) # load weights into new model trained_model.load_weights(os.path.join(model_path + '.h5')) print("trained model loaded") model = trained_model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def createModel(vocab_size, tag_size, max_len, emb_matrix=None): input = Input(shape=(max_len, )) if emb_matrix is None: model = Embedding(input_dim=vocab_size, output_dim=param.EMBEDDING_DIMENSION, input_length=max_len)(input) else: model = Embedding(input_dim=vocab_size, output_dim=param.EMBEDDING_DIMENSION, weights=[emb_matrix], input_length=max_len, trainable=False)(input) model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=param.LSTM_UNITS, return_sequences=True, recurrent_dropout=0.1))(model) model = SeqSelfAttention(attention_activation='sigmoid')(model) model = TimeDistributed(Dense(tag_size, activation="softmax"))(model) crf = CRF(tag_size, sparse_target=False) out = crf(model) model = Model(input, out) model.compile(optimizer="adam", loss=crf.loss_function, metrics=[crf.accuracy]) return model
def createModel(embedding_matrix): sequence_input = Input(shape=(101, 84), name='sequence_input') sequence = Convolution1D(filters=128, kernel_size=3, padding='same')(sequence_input) sequence = BatchNormalization(axis=-1)(sequence) sequence = Activation('swish')(sequence) profile_input = Input(shape=(101, ), name='profile_input') embedding = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], trainable=False)(profile_input) profile = Convolution1D(filters=128, kernel_size=3, padding='same')(embedding) profile = BatchNormalization(axis=-1)(profile) profile = Activation('swish')(profile) mergeInput = Concatenate(axis=-1)([sequence, profile]) overallResult = MultiScale(mergeInput) overallResult = AveragePooling1D(pool_size=5)(overallResult) overallResult = Dropout(0.3)(overallResult) overallResult = Bidirectional(GRU(120, return_sequences=True))(overallResult) overallResult = SeqSelfAttention( attention_activation='sigmoid', name='Attention', )(overallResult) overallResult = Flatten()(overallResult) overallResult = Dense(101, activation='swish')(overallResult) ss_output = Dense(2, activation='softmax', name='ss_output')(overallResult) return Model(inputs=[sequence_input, profile_input], outputs=[ss_output])
def build_duration_policy_network(self): advantages = Input(shape=[1]) packets_input = Input(shape=(None, 1)) durations_input = Input(shape=(None, 1)) merged = concatenate([packets_input, durations_input]) lstm1 = LSTM(self.lstm_units, return_sequences=True)(merged) attention = SeqSelfAttention(attention_activation='sigmoid')(lstm1) lstm2 = LSTM(self.lstm_units)(attention) hidden_dense = Dense(self.dense_units, activation='relu')(lstm2) duration_output = Dense(2, activation='relu', name="d_output")(hidden_dense) # Negative log likelihood gaussian * advantages def duration_loss(y_true, y_pred): n_dims = int(int(y_pred.shape[1]) / 2) mu = y_pred[:, 0:n_dims] logsigma = y_pred[:, n_dims:] mse = -0.5 * K.sum(K.square((y_true - mu) / K.exp(logsigma)), axis=1) sigma_trace = -K.sum(logsigma, axis=1) log2pi = -0.5 * n_dims * np.log(2 * np.pi) log_likelihood = mse + sigma_trace + log2pi return K.mean(-log_likelihood*advantages) duration_policy = Model(inputs=[packets_input, durations_input, advantages], outputs=duration_output) duration_policy.compile(optimizer=Adam(lr=self.lr), loss=duration_loss) duration_predictor = Model(inputs=[packets_input, durations_input], outputs=duration_output) return duration_policy, duration_predictor
def pooled_attention_model(embedding_matrix=None, cell_size=256): vocab_size, embedding_dim = embedding_matrix.shape # Input layer x_input = Input(shape=(CLIP_LENGTH, )) # Embedding layer embed = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=CLIP_LENGTH, weights=[embedding_matrix], trainable=True)(x_input) drop = SpatialDropout1D(0.3)(embed) bi_rnn = Bidirectional(GRU(cell_size, return_sequences=True))(drop) att = SeqSelfAttention(attention_activation="sigmoid")(bi_rnn) pool = concatenate( [GlobalAveragePooling1D()(att), GlobalMaxPooling1D()(att)]) y_pred = Dense(1, activation="sigmoid")(pool) # Compile model model = Model(inputs=x_input, outputs=y_pred) opt = keras.optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['acc']) return model
def load_and_evaluate_rnn(args): complete_path = constants.SAVED_RNN_MODELS_DIR + args.model_file print("EVALUATING MODEL %s..." % args.model_file) rnn = load_model(complete_path, custom_objects=SeqSelfAttention.get_custom_objects()) _, _, x_val, _, y_val, _ = get_rnn_data(args) predict_validation(rnn, x_val, y_val)
def build_word_encoder(self): """Build word encoder. The function starts with a Input tensor layer, and go through Embedding layer and then Bidirectional GRU layer and TimeDistributed layer and ends with Attention. Returns: Model, a model layer wraps sent_input and word attention. """ sent_input = Input(shape=(self.max_sent_length, ), dtype='float32') #masked1=Masking(mask_value=Special_value)(sent_input) embedded_sent = Embedding(self.embedding_matrix.shape[0], self.embedding_matrix.shape[1], weights=[self.embedding_matrix], input_length=self.max_sent_length, trainable=True)(sent_input) #Masking # For Bidirectional, devide by 2 encoded_sent = Bidirectional( GRU(int(self.word_embed_dim / 2), return_sequences=True))(embedded_sent) # TODO: check if dense is still needed in timedistributed selfattention_word = SeqSelfAttention(attention_activation='sigmoid', attention_width=5)(encoded_sent) input_merged_1 = Concatenate()([embedded_sent, selfattention_word]) dense_sent = TimeDistributed(Dense(DENSE_SIZE))(input_merged_1) word_att = Attention(name='word_attention')(dense_sent) return Model(sent_input, word_att)
def build_model_arc(self): """ build model architectural """ output_dim = len(self.pre_processor.label2idx) config = self.hyper_parameters embed_model = self.embedding.embed_model layer_blstm = L.Bidirectional(L.LSTM(**config['layer_blstm']), name='layer_blstm') layer_self_attention = SeqSelfAttention(**config['layer_self_attention'], name='layer_self_attention') layer_dropout = L.Dropout(**config['layer_dropout'], name='layer_dropout') layer_time_distributed = L.TimeDistributed(L.Dense(output_dim, **config['layer_time_distributed']), name='layer_time_distributed') layer_activation = L.Activation(**config['layer_activation']) tensor = layer_blstm(embed_model.output) tensor = layer_self_attention(tensor) tensor = layer_dropout(tensor) tensor = layer_time_distributed(tensor) output_tensor = layer_activation(tensor) self.tf_model = keras.Model(embed_model.inputs, output_tensor)
def test_same_as_brute(self): batch_size, sentence_len, feature_dim, units = 2, 3, 5, 7 test_x = numpy.random.rand(batch_size, sentence_len, feature_dim) seed = random.randint(0, 1000) self._reset_seed(seed) inp = keras.layers.Input((sentence_len, feature_dim)) att = SeqSelfAttention(units=units, kernel_initializer='glorot_normal', bias_initializer='zeros') out = att(inp) model = keras.models.Model(inp, out) predict_1 = model.predict(test_x) self.assertEqual((batch_size, sentence_len, feature_dim), predict_1.shape) self._reset_seed(seed) inp = keras.layers.Input((sentence_len, feature_dim)) att = SelfAttentionBrute(units=units) out = att(inp) model = keras.models.Model(inp, out) predict_2 = model.predict(test_x) self.assertEqual((batch_size, sentence_len, feature_dim), predict_2.shape) self.assertTrue(numpy.allclose(predict_1, predict_2))
def model_attention(CL,input_shape): X_in = Input(input_shape) X_att = SeqSelfAttention(attention_width=CL,attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL,attention_activation=None)(X_in) return Model(X_in,X_att)
def multi_head(n_head, at_units, width, pre_layer, activation='sigmoid'): layers_list = [] for i in range(n_head): self_at = SeqSelfAttention(units=at_units, attention_width=width, attention_activation=activation)(pre_layer) layers_list.append(self_at) mt = layers.Concatenate()(layers_list) return mt
def createModel(self, text): self.embeddings_index = {} f = open(os.path.join(GLOVE_DIR, 'glove.840B.300d.txt'), encoding='utf') for line in f: values = line.split() word = ''.join(values[:-300]) #word = values[0] coefs = np.asarray(values[-300:], dtype='float32') self.embeddings_index[word] = coefs f.close() print('Found %s word vectors.' % len(self.embeddings_index)) tokenizer = Tokenizer(num_words=self.MAX_NB_WORDS, lower=False) tokenizer.fit_on_texts(text) self.word_index = tokenizer.word_index pickle.dump(self.word_index, open("../Models/DeId/word_index.pkl", 'wb')) self.embedding_matrix = np.zeros( (len(self.word_index) + 1, self.EMBEDDING_DIM)) print(self.embedding_matrix.shape) for word, i in self.word_index.items(): embedding_vector = self.embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. self.embedding_matrix[i] = embedding_vector self.embedding_layer = Embedding(len(self.word_index) + 1, self.EMBEDDING_DIM, weights=[self.embedding_matrix], input_length=70, trainable=True) self.model = Sequential() self.model.add(self.embedding_layer) self.model.add( Bidirectional( LSTM(150, dropout=0.3, recurrent_dropout=0.6, return_sequences=True)) ) #{'sum', 'mul', 'concat', 'ave', None} self.model.add( Bidirectional( LSTM(60, dropout=0.2, recurrent_dropout=0.5, return_sequences=True))) self.model.add( SeqSelfAttention(attention_activation='sigmoid', attention_width=12)) self.model.add(TimeDistributed(Dense( 9, activation='softmax'))) # a dense layer as suggested by neuralNer self.model.compile(loss="categorical_crossentropy", optimizer='rmsprop', metrics=['accuracy']) self.model.summary() pass
def build_sent_encoder(self): """Build sentence encoder. Perform a Bidirectional GRU layer, and then a TimeDistributed Dense layer before going to the Attention. Args: sent_encoder: the input sentence encoder. Returns: doc_att: sentence attention weights. """ text_input = Input(shape=( self.max_sent_num, self.max_sent_length, )) #masked2=Masking(mask_value=Special_value)(text_input) # encode sentences into a single vector per sentence self.model_word = self.build_word_encoder() # time distribute word model to accept text input sent_encoder = TimeDistributed(self.model_word)(text_input) # For Bidirectional, devide by 2 encoded_text = Bidirectional( GRU(int(self.sent_embed_dim / 2), return_sequences=True))(sent_encoder) selfattention_sent = SeqSelfAttention(attention_activation='sigmoid', attention_width=5)(encoded_text) input_merged_2 = Concatenate()([sent_encoder, selfattention_sent]) dense_text = TimeDistributed(Dense(DENSE_SIZE))(input_merged_2) doc_att = Attention(name='sent_attention')(dense_text) return Model(text_input, doc_att)
def multitask_attention_model(output_size, pos_vocab_size, lex_vocab_size, config_params, visualize=False, plot=False): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) embedding_size = 768 max_seq_len = 512 in_id = Input(shape=(max_seq_len, ), name="input_ids") in_mask = Input(shape=(max_seq_len, ), name="input_masks") in_segment = Input(shape=(max_seq_len, ), name="segment_ids") bert_inputs = [in_id, in_mask, in_segment] bert_output_ = BertEmbeddingLayer(n_fine_tune_layers=3, pooling="mean")(bert_inputs) bert_output = Reshape((max_seq_len, embedding_size))(bert_output_) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') bert_inputs.append(in_mask) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size)), merge_mode='sum')(bert_output) attention = SeqSelfAttention(units=128, attention_activation='sigmoid', name='Attention')(bilstm) logits = TimeDistributed(Dense(output_size))(attention) logits_mask = Add()([logits, in_mask]) pos_logits = TimeDistributed(Dense(pos_vocab_size), name='POS_logits')(attention) lex_logits = TimeDistributed(Dense(lex_vocab_size), name='LEX_logits')(attention) wsd_output = Softmax(name="WSD_output")(logits_mask) pos_output = Softmax(name="POS_output")(pos_logits) lex_output = Softmax(name="LEX_output")(lex_logits) model = Model(inputs=bert_inputs, outputs=[wsd_output, pos_output, lex_output], name='Bert_BiLSTM_ATT_MultiTask') model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def test_save_load(self): _, _, token_dict = self.get_input_data() model = self.get_model(SeqSelfAttention(name='Attention'), token_dict) model_path = os.path.join(tempfile.gettempdir(), 'keras_self_att_test_save_load_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects={'SeqSelfAttention': SeqSelfAttention}) model.summary() self.assertTrue(model is not None)