def _test_save_load(self, attention): inputs = keras.layers.Input(shape=(None,), name='Input') embd = keras.layers.Embedding(input_dim=3, output_dim=5, mask_zero=True, name='Embedding')(inputs) lstm = keras.layers.Bidirectional(keras.layers.LSTM(units=7, return_sequences=True), name='Bi-LSTM')(embd) if attention.return_attention: layer, weights = attention(lstm) else: layer = attention(lstm) dense = keras.layers.Dense(units=2, activation='softmax', name='Softmax')(layer) loss = {'Softmax': 'sparse_categorical_crossentropy'} if attention.return_attention: outputs = [dense, weights] loss[attention.name] = 'mse' else: outputs = dense model = keras.models.Model(inputs=inputs, outputs=outputs) model.compile(optimizer='adam', loss=loss) model_path = os.path.join(tempfile.gettempdir(), 'keras_weighted_att_test_sl_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects=Attention.get_custom_objects()) model.summary(line_length=100) if attention.return_attention: self.assertEqual(2, len(model.outputs)) else: self.assertEqual(1, len(model.outputs))
def lstm(x_dim) -> Model: model = Sequential() model.add(InputLayer(input_shape=(x_dim, 1))) model.add(LSTM(256, return_sequences=True)) model.add(SeqWeightedAttention()) return model
def SARNNKerasCPU(embeddingMatrix=None, embed_size=400, max_features=20000, maxlen=100): inp = Input(shape=(maxlen, )) x = Embedding(input_dim=max_features, output_dim=embed_size, weights=[embeddingMatrix])(inp) x = Bidirectional(LSTM(128, return_sequences=True))(x) x = SeqSelfAttention( attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, attention_regularizer_weight=1e-4, )(x) x = Dropout(0.5)(x) x = Bidirectional(LSTM(128, return_sequences=True))(x) x = SeqWeightedAttention()(x) x = Dropout(0.5)(x) x = Dense(64, activation="relu")(x) x = Dropout(0.5)(x) x = Dense(1, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', f1]) return model
def HARNN(embeddingMatrix=None, embed_size=400, max_features=20000, max_nb_sent=3, max_sent_len=40, use_fasttext=False, trainable=True, use_additive_emb=False): if use_fasttext: sent_inp = Input(shape=(max_sent_len, embed_size)) embed = sent_inp else: sent_inp = Input(shape=(max_sent_len, )) embed = Embedding(input_dim=max_features, output_dim=embed_size, weights=[embeddingMatrix], trainable=trainable)(sent_inp) if use_additive_emb: embed = AdditiveLayer()(embed) embed = Dropout(0.5)(embed) word_lstm = Bidirectional(CuDNNLSTM(128, return_sequences=True))(embed) word_att = SeqWeightedAttention()(word_lstm) word_att = Dropout(0.5)(word_att) sent_encoder = Model(sent_inp, word_att) plot_model(sent_encoder, to_file='{}.png'.format("HARNN1"), show_shapes=True, show_layer_names=True) doc_input = Input(shape=(max_nb_sent, max_sent_len)) doc_encoder = TimeDistributed(sent_encoder)(doc_input) sent_lstm = Bidirectional(CuDNNLSTM(128, return_sequences=True))(doc_encoder) sent_att = SeqWeightedAttention()(sent_lstm) sent_att = Dropout(0.5)(sent_att) preds = Dense(1, activation="sigmoid")(sent_att) model = Model(inputs=doc_input, outputs=preds) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', f1]) return model
def new_architecture(x_len, vocab_size, layers, bi, att, cells): inputs = keras.layers.Input(shape=( x_len, vocab_size, ), name='Input') prev = inputs for i in range(layers): ret_seq = True if i == layers - 1 and att == False: ret_seq = False this_layer = keras.layers.LSTM(cells, dropout=0.4, name='LSTM_%s' % i, return_sequences=ret_seq) if bi: this_layer = Bidirectional(this_layer, name='bi_%s' % i) prev = this_layer(prev) if att: attention = SeqWeightedAttention(return_attention=False, name='Attention') prev = attention(prev) dense = keras.layers.Dense(vocab_size, activation='softmax', name="dense_outputs")(prev) model = keras.Model(inputs=inputs, outputs=[dense]) # lstm = Bidirectional( # keras.layers.LSTM( # cells, # dropout=0.4, # name="LSTM", # return_sequences=True), # name="bi1" # )(inputs) # lstm2 = Bidirectional( # keras.layers.LSTM( # cells, # dropout=0.4, # name="LSTM2", # return_sequences=True), # name="bi2" # )(lstm) return model
def build_model_attention(): model = Sequential() model.add( Bidirectional( LSTM(units=128, dropout=0.5, recurrent_dropout=0.5, return_sequences=True))) #model.add(Bidirectional(LSTM(128,recurrent_dropout=0.5))) model.add(SeqWeightedAttention()) model.add(Dense(1, activation=sigmoid)) model.compile(loss=losses.binary_crossentropy, optimizer=Adam(1e-5), metrics=['accuracy']) return model
def get_model(args): model = None modelname = args.model_id # workaround for getting vis for attention if modelname == "model_folk100k_melody_2lstm32_attention": # (100000, 64, 58) model = att_model(32, False, 2, True) elif modelname == "model_folk100k_melody_bi3lstm64_attention": model = att_model(64, True, 3, True) else: json_model = open(os.path.join(modelname, "model.json"), "r").read() model = keras.models.model_from_json( json_model, custom_objects=SeqWeightedAttention.get_custom_objects()) model.load_weights(os.path.join(modelname, "model.h5")) print(model.summary(line_length=100)) return model
def att_model(cells, bi, layers, att): """ hardcoded model for vis. attention """ # cells = 64 vocab_size = 58 # bi = True # layers=3 # att=True inputs = keras.layers.Input(shape=( 63, 58, ), name='Input') prev = inputs for i in range(layers): ret_seq = True if i == layers - 1 and att == False: ret_seq = False this_layer = keras.layers.LSTM(cells, dropout=0.4, name='LSTM_%s' % i, return_sequences=ret_seq) if bi: this_layer = keras.layers.Bidirectional(this_layer, name='bi_%s' % i) prev = this_layer(prev) attention = SeqWeightedAttention(return_attention=True, name='Attention') attention_layer = attention(prev) attention_layer, attention = attention_layer dense = keras.layers.Dense(vocab_size, activation='softmax', name="dense_outputs")(attention_layer) outputs = [dense, attention] model = keras.Model(inputs=inputs, outputs=outputs) model.compile(optimizer='adam', loss={ 'dense_outputs': 'categorical_crossentropy', 'Attention': attention_loss(1e-4) }) return model
def SARNNKeras(embeddingMatrix=None, embed_size=400, max_features=20000, maxlen=100, rnn_type=CuDNNLSTM, use_fasttext=False, trainable=True, use_additive_emb=False): if use_fasttext: inp = Input(shape=(maxlen, embed_size)) x = inp else: inp = Input(shape=(maxlen, )) x = Embedding(input_dim=max_features, output_dim=embed_size, weights=[embeddingMatrix], trainable=trainable)(inp) if use_additive_emb: x = AdditiveLayer()(x) x = Dropout(0.5)(x) x = Bidirectional(rnn_type(128, return_sequences=True))(x) x = SeqSelfAttention( # attention_type = SeqSelfAttention.ATTENTION_TYPE_MUL, attention_regularizer_weight=1e-4, )(x) # x = LayerNormalization()(x) x = Dropout(0.5)(x) x = Bidirectional(rnn_type(128, return_sequences=True))(x) x = SeqWeightedAttention()(x) # x = LayerNormalization()(x) x = Dropout(0.5)(x) x = Dense(64, activation="relu")(x) x = Dropout(0.5)(x) x = Dense(1, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', f1]) return model
def get_model(args, dshape): model_dir = get_model_dir(args) model = None loss = 'categorical_crossentropy' optimizer = keras.optimizers.Adam(lr=0.005) if args.new: print('generating NEW model...') model = new_architecture(dshape[1] - 1, dshape[2], args.layers, args.bi, args.att, args.cells) # copy arch to folder shutil.copy('architecture.py', model_dir) model_json = model.to_json() model_json_path = os.path.join(model_dir, "model.json") print('storing model json in %s' % model_json_path) with open(model_json_path, "w") as json_file: json_file.write(model_json) # delete epoch counters delete_epoch_counters(model_dir) model.compile(loss=loss, optimizer=optimizer) else: print('using existing model...') model_json_path = os.path.join(model_dir, "model.json") model = keras.models.model_from_json( open(model_json_path, "r").read(), custom_objects=SeqWeightedAttention.get_custom_objects()) model_weights_path = os.path.join(model_dir, "model.h5") print('loading existing weights from %s...' % model_weights_path) model.load_weights(model_weights_path) model.compile(loss=loss, optimizer=optimizer) print(model.summary()) return model, model_dir
def build_model(embeddings_size): # Inputs q1_embeddings_input = Input(shape=( None, embeddings_size, ), name='q1_word_embeddings') q2_embeddings_input = Input(shape=( None, embeddings_size, ), name='q2_word_embeddings') # RNN word_lstm1 = Bidirectional( ONLSTM(units=256, chunk_size=8, dropout=args.dropout_rate, return_sequences=True, kernel_initializer='glorot_normal')) q1_word_lstm1 = word_lstm1(q1_embeddings_input) q2_word_lstm1 = word_lstm1(q2_embeddings_input) word_lstm2 = Bidirectional( ONLSTM(units=256, chunk_size=8, dropout=args.dropout_rate, return_sequences=True, kernel_initializer='glorot_normal')) q1_word_lstm2 = word_lstm2(q1_word_lstm1) q2_word_lstm2 = word_lstm2(q2_word_lstm1) word_attention = SeqWeightedAttention() q1_word_attention = word_attention(q1_word_lstm2) q2_word_attention = word_attention(q2_word_lstm2) # Concatenate subtract = Subtract()([q1_word_attention, q2_word_attention]) multiply_subtract = Multiply()([subtract, subtract]) # Fully Connected dense1 = Dropout(args.dropout_rate)( Dense(units=1024, activation='relu', kernel_initializer='glorot_normal')(multiply_subtract)) dense2 = Dropout( args.dropout_rate)(Dense(units=512, activation='relu', kernel_initializer='glorot_normal')(dense1)) dense3 = Dropout( args.dropout_rate)(Dense(units=256, activation='relu', kernel_initializer='glorot_normal')(dense2)) dense4 = Dropout( args.dropout_rate)(Dense(units=128, activation='relu', kernel_initializer='glorot_normal')(dense3)) # Predict output = Dense(units=1, activation='sigmoid', kernel_initializer='glorot_normal')(dense4) model = Model([q1_embeddings_input, q2_embeddings_input], output) model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy', f1]) model.summary() return model
for category in categories: x = [] for sentence in brown.sents(categories=[category]): _tokens = tokenizer.texts_to_sequences(sentence[:10]) if len(_tokens) < 10: _tokens = _tokens + [[0]] * (10 - len(_tokens)) x.append(np.array(_tokens)) if len(x) == 3: X.append(np.array(x)) x = [] Y.append(to_categorical(categories.index(category), num_classes=3)) print(np.array(X).shape) input = Input(shape=(3, 10, 1)) layer = TimeDistributed(lstm(10))(input) layer = LSTM(256, return_sequences=True)(layer) layer = SeqWeightedAttention()(layer) layer = Dense(3)(layer) layer = Activation('softmax')(layer) model = Model(input, layer) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy']) model.fit(np.array(X), np.array(Y), epochs=10)
def create_model_template(layers: int, units, shape, use_attention_first=False, use_attention_middle=False, lr=3e-4, optimizer='Adam', dropout=0.2, dropout_last_only=False): model_metrics = [ metrics.BinaryAccuracy(name='acc'), metrics.Precision(name='precision'), metrics.Recall(name='recall'), metrics.AUC(name='auc') ] model = Sequential() if not isinstance(units, list): units = [units] * layers elif len(units) < layers: units = [units[0]] * layers model.add( Bidirectional(LSTM(units[0], return_sequences=layers > 1 or use_attention_first), input_shape=shape)) # model.add(Bidirectional(tfa.rnn.cell.LayerNormLSTMCell(units[0], return_sequences=layers > 1), input_shape=shape)) if use_attention_first: if layers > 1: model.add(SeqSelfAttention()) else: model.add(SeqWeightedAttention()) for i in range(1, layers): if use_attention_middle: model.add(SeqSelfAttention()) if dropout_last_only is False: model.add(Dropout(dropout)) model.add( Bidirectional( LSTM(units[i], return_sequences=layers > i + 1 or use_attention_middle))) if use_attention_middle: model.add(SeqWeightedAttention()) model.add(Dropout(dropout)) model.add(Dense(1, activation='sigmoid')) if optimizer == 'SGD': optimizer = SGD(lr=lr) if optimizer == 'RMSprop': optimizer = RMSprop(lr=lr) if optimizer == 'Adadelta': optimizer = Adadelta(lr=lr) if optimizer == 'Adagrad': optimizer = Adagrad(lr=lr) if optimizer == 'Nadam': optimizer = Nadam(lr=lr) if optimizer == 'Adamax': optimizer = Adamax(lr=lr) else: optimizer = Adam(lr=lr) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=model_metrics) return model
:param text_list: :param token_dict: :return: """ X1 = [] X2 = [] tokenizer = Tokenizer(token_dict) for line in text_list: x1, x2 = tokenizer.encode(first=line) X1.append(x1) X2.append(x2) X1 = sequence.pad_sequences(X1, maxlen=maxlen, padding='post', truncating='post') X2 = sequence.pad_sequences(X2, maxlen=maxlen, padding="post", truncating='post') return [X1, X2] if __name__=="__main__": maxlen=100 text_list=["TW 0:02 / 41:54 Mind Your Language Season 3 Episode 2 Who Loves Ya Baby? | Funny TV Show (GM)","I have a dream"] token_dict=get_token_dict(dict_path) [X1,X2]=get_encode(text_list,token_dict) print(X1) wordvec=build_bert_model(X1,X2) print(wordvec) #with open("test_keras_bert2.yml","r") as f: # yaml_string=yaml.load(f) #model=keras.models.load_model(yaml_string,custom_objects=SeqSelfAttention.get_custom_objects()) print("loading weights") model=keras.models.load_model("test_keras_bert4.h5",custom_objects=SeqWeightedAttention.get_custom_objects()) result=model.predict(wordvec) print(result) del model