def createModel(vocab_size, tag_size, max_len, emb_matrix=None): input = Input(shape=(max_len, )) if emb_matrix is None: model = Embedding(input_dim=vocab_size, output_dim=param.EMBEDDING_DIMENSION, input_length=max_len)(input) else: model = Embedding(input_dim=vocab_size, output_dim=param.EMBEDDING_DIMENSION, weights=[emb_matrix], input_length=max_len, trainable=False)(input) model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=param.LSTM_UNITS, return_sequences=True, recurrent_dropout=0.1))(model) model = SeqSelfAttention(attention_activation='sigmoid')(model) model = TimeDistributed(Dense(tag_size, activation="softmax"))(model) crf = CRF(tag_size, sparse_target=False) out = crf(model) model = Model(input, out) model.compile(optimizer="adam", loss=crf.loss_function, metrics=[crf.accuracy]) return model
def train(self): input = Input(shape=(self.max_len, )) model = Embedding(input_dim=self.num_word + 1, output_dim=20, input_length=self.max_len, mask_zero=True)(input) model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(model) model = TimeDistributed(Dense(50, activation="relu"))( model) # softmax output layer crf = CRF(self.num_tag) out = crf(model) model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) self.pad_process() train_X, test_X, train_y, test_y = self.train_test_split() history = model.fit(train_X, np.array(train_y), batch_size=32, epochs=1, validation_split=0.1, verbose=1) self.make_plot(history) return model
def models(self, input_length, vocab_size, embedding_dim, conv_layer_info, connected_layer_info, batch_size,epochs): model_input = Input(shape = (input_length,), dtype = 'int64') model = Embedding(vocab_size, embedding_dim, input_length = input_length)(model_input) for i in range( len(conv_layer_info) ): model = Convolution1D(filters= conv_layer_info[i][0], kernel_size=conv_layer_info[i][1], padding="valid", activation="relu", strides=1)(model) if len(conv_layer_info[i]) > 2: model = MaxPooling1D(pool_size = conv_layer_info[i][2])(model) model = Flatten()(model) for i in range(len(connected_layer_info)): model = Dense(connected_layer_info[i][0],activation = "relu")(model) model = Dropout(connected_layer_info[i][1])(model) model_output = Dense(self.num_of_classes, activation = "softmax")(model) model =Model(inputs=model_input,outputs=model_output) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) print("Started Training : ") model.fit(self.x_train, self.y_train, batch_size = batch_size, epochs= epochs, validation_data=(self.x_test, self.y_test), verbose=2) print("Training Completed") self.model = model
def get_model(): """ Definition du model cnn utilise """ inp = Input(shape=(MAX_TEXT_LENGTH,)) model = Embedding(MAX_TEXT_LENGTH, EMBED_SIZE)(inp) model = Conv1D(filters=64, kernel_size=7, padding='same', activation='relu')(model) model = MaxPooling1D(pool_size=3)(model) model = BatchNormalization(axis=1)(model) model = Dropout(0.25)(model) model = Conv1D(filters=128, kernel_size=5, padding='same', activation='relu')(model) model = MaxPooling1D(pool_size=5)(model) model = BatchNormalization(axis=1)(model) model = Dropout(0.3)(model) model = Flatten()(model) model = Dense(1024, activation="relu")(model) model = Dense(10, activation="softmax")(model) model = Model(inputs=inp, outputs=model) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.summary() return model
def train(X_tr, y_tr): output_dim = 50 word_input = Input(shape=(MAXLEN, )) model = Embedding(input_dim=VOCAB_SIZE, output_dim=output_dim, input_length=MAXLEN)(word_input) model = SpatialDropout1D(0.1)(model) model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) out = TimeDistributed(Dense(len(tags), activation="softmax"))(model) model = Model(word_input, out) model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) checkpointer = ModelCheckpoint(filepath='NER_model.h5', verbose=0, mode='auto', save_best_only=True, monitor='val_loss') history = model.fit(X_tr, y_tr.reshape(*y_tr.shape, 1), batch_size=BATCH_SIZE, epochs=EPOCHS, shuffle=True, validation_split=VALIDATION_SPLIT, verbose=1, callbacks=[checkpointer]) return history
def train(): input = Input(shape=(max_len, )) model = Embedding(input_dim=n_words, output_dim=50, input_length=max_len)(input) model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) out = TimeDistributed(Dense(n_tags, activation='softmax'))( model) # softmax output layer model = Model(input, out) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model.summary() # checkpoint # filepath = "../result/bilstm-weights-{epoch:02d}-{val_acc:.2f}.hdf5" # checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') # history=model.fit(X_train,np.array(y_train),batch_size=32,epochs=5,validation_split=0.1,verbose=1,callbacks=[checkpoint]) history = model.fit(X_train, np.array(y_train), batch_size=32, epochs=5, validation_split=0.1, verbose=1) # 保存模型 model.save(filepath="../result/bi-lstm.h5") hist = pd.DataFrame(history.history) plt.figure(figsize=(12, 12)) plt.plot(hist["acc"]) plt.plot(hist["val_acc"]) plt.show()
def create_model(train=True): if train: (train_x, train_y, train_max_len, train_length), ( test_x, test_y, test_max_len, test_length), \ (vocab, maxlen, chunk_tags, embedding_weights) = process_data.load_lstm_data() else: with open('model/chars-config.pkl', 'rb') as inp: (vocab, chunk_tags, embedding_weights) = pickle.load(inp) input = Input(shape=(train_max_len, )) model = Embedding(len(vocab) + 1, EMBED_DIM, mask_zero=True)(input) model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=200, return_sequences=True, recurrent_dropout=0.1))(model) model = Dropout(0.7)(model) out = TimeDistributed(Dense(len(chunk_tags) + 1, activation="softmax"))(model) model = Model(input, out) model.summary() model.compile('adam', loss="categorical_crossentropy", metrics=["accuracy"]) if train: return model, (train_x, train_y, train_max_len), (test_x, test_y, test_max_len), (vocab, chunk_tags) else: return model, (vocab, chunk_tags)
def createModel(vocab_size, tag_size, max_len, emb_matrix=None): input = Input(shape=(max_len, )) if emb_matrix is None: model = Embedding(input_dim=vocab_size, output_dim=param.EMBEDDING_DIMENSION, input_length=max_len)(input) else: model = Embedding(input_dim=vocab_size, output_dim=param.EMBEDDING_DIMENSION, weights=[emb_matrix], input_length=max_len, trainable=False)(input) model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=param.LSTM_UNITS, return_sequences=True, recurrent_dropout=0.1))(model) out = TimeDistributed(Dense(tag_size, activation="softmax"))( model) # softmax output layer model = Model(input, out) model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) return model
def run(X_train, Y_train, X_val, Y_val, embedding_matrix, vocab_size, maxlen=40, emb_dim=300, neg_ratio=0, hidden_dim=300, drop=0.2, r_drop=0.1): ##build model input = Input(shape=(maxlen, )) model = Embedding(vocab_size, emb_dim, weights=[embedding_matrix], input_length=maxlen, trainable=False)(input) model = Dropout(drop)(model) model = Bidirectional( LSTM(hidden_dim, return_sequences=True, recurrent_dropout=r_drop))(model) model = Dropout(drop)(model) out = TimeDistributed(Dense(1, activation='sigmoid'))(model) model = Model(input, out) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) earlyStop = [EarlyStopping(monitor='val_loss', patience=1)] history = model.fit(X_train, Y_train, batch_size=64, epochs=10, validation_data=(X_val, Y_val), callbacks=earlyStop) pred = model.predict(X_val) Y_pred = np.squeeze(pred) test = [[1 if y >= threshold else 0 for y in x] for x in Y_pred] test_arr = np.asarray(test) test_arr = np.reshape(test_arr, (-1)) target = np.reshape(Y_val, (-1)) print( metrics.precision_recall_fscore_support(target, test_arr, average=None, labels=[0, 1])) # Y_pred_ = [[1 if y>=threshold else 0 for y in x] for x in Y_pred] Y_val_ = np.squeeze(Y_val) print("Evaluate: dev seg exact") pred_out_dir = out_dir + 'seg_' + str(neg_ratio) + 'neg' gold_dir = '../../data/val_segs/' + 'seg_' + str(neg_ratio) + 'neg' p, r, f = seg_exact_match(test, Y_val_, pred_out_dir, gold_dir) return model, history, p, r, f
def build(self, hp): # Model definition inpt = Input(shape=(MAX_LEN, )) # MAX_LEN, VECT_SIZE # input_dim: Size of the vocabulary, i.e. maximum integer index + 1 # output_dim: Dimension of the dense embedding # input_shape: 2D tensor with shape (batch_size, input_length) # doc_vocab: vocabulary - number of words - of the train dataset model = Embedding( doc_vocab, output_dim=100, input_length=MAX_LEN, # n_words + 2 (PAD & UNK) weights=[embedding_matrix], # use GloVe vectors as initial weights mask_zero=True, trainable=True, activity_regularizer=l1(0.0000001))(inpt) # name='word_embedding' # hp.Choice('activity_regularizer_1', values=[0.0, 0.00001, 0.000001, 0.0000001]) # , activity_regularizer=l1(0.0000001) hp.Choice('activity_regularizer_2', values=[0.0, 0.0000001, 0.00000001, 0.000000001]) # recurrent_dropout=0.1 (recurrent_dropout: 10% possibility to drop of the connections that simulate LSTM memory cells) # units = 100 / 0.55 = 182 neurons (to account for 0.55 dropout) model = Bidirectional( LSTM(units=100, return_sequences=True, activity_regularizer=l1(0.000000001), recurrent_constraint=max_norm(2)))( model) # input_shape=(1, MAX_LEN, VECT_SIZE) model = Dropout(hp.Choice('dropout', values=[0.0, 0.3, 0.5]))(model) # model = TimeDistributed(Dense(number_labels, activation="relu"))(model) # a dense layer as suggested by neuralNer model = Dense(number_labels, activation=None)( model) # activation='linear' (they are the same) crf = CRF( ) # CRF layer { SHOULD I SET -> number_labels+1 (+1 -> PAD) } out = crf(model) # output model = Model(inputs=inpt, outputs=out) # set learning rate # lr_rate = InverseTimeDecay(initial_learning_rate=0.05, decay_rate=4, decay_steps=steps_per_epoch) # lr_rate = ExponentialDecay(initial_learning_rate=0.01, decay_rate=0.5, decay_steps=10000) # set optimizer # decay=learning_rate / epochs # CASE 1: decay=0.01 # CASE 2: decay=0.1/5 opt = SGD( learning_rate=0.0, momentum=0.9, clipvalue=5.0 ) # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5] # opt = SGD(learning_rate=0.01, decay=0.01/steps_per_epoch, momentum=0.9, clipvalue=10.0) # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5] # opt = SGD(learning_rate=lr_rate, clipvalue=3.0, clipnorm=2.0, momentum=0.9) # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5] # compile Bi-LSTM-CRF model.compile(optimizer=opt, loss=crf.loss, metrics=[crf.accuracy]) # , f1score() # model.compile(optimizer=opt, loss=crf.loss, metrics=[crf.viterbi_accuracy]) self.initial_lrate = hp.Choice('learning_rate', [0.05, 0.01]) return model
def train(): input = Input(shape=(max_len, )) model = Embedding(input_dim=n_words + 1, output_dim=100, input_length=max_len, mask_zero=True)(input) # 20-dim embedding model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(model) # variational biLSTM model = TimeDistributed(Dense(50, activation="relu"))( model) # a dense layer as suggested by neuralNer crf = CRF(n_tags) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() history = model.fit(x_train, np.array(y_train), batch_size=64, epochs=5, validation_split=0.1, verbose=1) save_load_utils.save_all_weights(model, filepath="models/bilstm-crf.h5") hist = pd.DataFrame(history.history) print(hist) plt.figure(figsize=(12, 12)) plt.plot(hist["crf_viterbi_accuracy"]) plt.plot(hist["val_crf_viterbi_accuracy"]) plt.show()
def create_model(vocabulary_size, num_classes, max_length, units=100, dense_neurons=16, embedding_vector_length=300): input = Input(shape=(max_length, )) model = Embedding(input_dim=vocabulary_size + 1, output_dim=embedding_vector_length, input_length=max_length)(input) model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) #model = TimeDistributed(Dense(dense_neurons, activation='relu'))(model) #model = Dense(num_classes, activation="softmax")(model) out = TimeDistributed(Dense(3, activation="softmax"))(model) #crf = CRF(3, name="output") #out = crf(model) # softmax output layer model = Model(input, out) #model.compile(optimizer="adam", loss=crf.loss_function, metrics=[crf.accuracy],sample_weight_mode="temporal") model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'], sample_weight_mode="temporal") return model
def define_model(self): """Define the Bi-LSTM/CRF model""" input_layer = Input(shape=(self.MAX_SEQ_LEN, )) model = Embedding( input_dim=self.n_words, output_dim=self.EMBEDDING, # actual n_words + 2 (PAD & UNK) input_length=self.MAX_SEQ_LEN)( input_layer) # default: 300-dim embedding model = Bidirectional( LSTM(units=self.LSTM_HIDDEN_UNITS, return_sequences=True, recurrent_dropout=0.1))(model) # variational biLSTM model = TimeDistributed(Dense(self.LSTM_DENSE_DIM, activation="relu"))( model) # a dense layer as suggested by neuralNer crf = CRF(self.n_tags) # CRF layer, actual n_tags+1(PAD) output_layer = crf(model) # output model = Model(input_layer, output_layer) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() return model
def base_lstm_user(vocabulary_size: int, embedding_size: int, history_size: int, max_seq_length: int, embedding_matrix: np.array, y_dictionary: dict) -> Model: lstm_output_shape = 256 input = Input(shape=(max_seq_length, ), name='main_input') history = Input(shape=(history_size, ), name='history_input') model = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, weights=[embedding_matrix], input_length=max_seq_length, trainable=True, embeddings_regularizer=regularizers.l2(0.000001))(input) model = Dropout(0.4)(model) model = Bidirectional(LSTM(lstm_output_shape, return_sequences=False))(model) h_model = history for i in range(2): h_model = Dense(256, activation='tanh', kernel_regularizer=regularizers.l2(0.00001))(h_model) model = Concatenate()([model, h_model]) model = Dense(len(y_dictionary), activation='softmax')(model) model = Model([input, history], model) optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.001) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model
def create_model(number_of_words_total, number_of_tags, len_max, embedding_size, lstm_units, dropout, recurrent_dropout): # Input input_layer = Input(shape=(len_max, )) # Embedding Layer model = Embedding(input_dim=number_of_words_total, output_dim=embedding_size, input_length=len_max)(input_layer) # BI-LSTM Layer model = Bidirectional( LSTM(units=lstm_units, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout, kernel_initializer=keras.initializers.he_normal()))(model) # TimeDistributed layer model = TimeDistributed(Dense(number_of_tags, activation="relu"))(model) # CRF Layer crf = CRF(number_of_tags) # Output output_layer = crf(model) model = Model(input_layer, output_layer) # Optimiser adam = Adam(lr=0.0005, beta_1=0.9, beta_2=0.999) model.compile(optimizer=adam, loss=crf.loss_function, metrics=[crf.accuracy, 'accuracy']) model.summary() return model
def cross_validate(self, X, y): X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.1) input = Input(shape=(self.max_len, )) model = Embedding(input_dim=self.n_words, output_dim=50, input_length=self.max_len)(input) model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) out = TimeDistributed(Dense(self.n_labels, activation="softmax"))( model) # softmax output layer model = Model(input, out) model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) history = model.fit(X_tr, np.array(y_tr), batch_size=32, epochs=1, validation_split=0.1, verbose=1) p = model.predict(np.array([X_te[10]])) p = np.argmax(p, axis=-1) for w, pred in zip(X_te[10], p[0]): if self.words[w] != 'PADGARBAGE': print("{:15}: {}".format(self.words[w], self.labels[pred]))
def fit(self, X, y): input = Input(shape=(self.max_len, )) model = Embedding(input_dim=self.n_words + 1, output_dim=20, input_length=self.max_len, mask_zero=True)(input) model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(model) model = TimeDistributed(Dense(50, activation="relu"))( model) # softmax output layer crf = CRF(self.n_labels) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="adam", loss=crf.loss_function, metrics=[crf.accuracy]) self.model = model history = self.model.fit(X, np.array(y), batch_size=32, epochs=20, validation_split=0.1, verbose=1)
def get_model(units, dout, emb, mtl, n_out): """ Build Model :param units: cells number of conv1D :param dout: rate for dropout :param emb: output size of embedding layer :param mlt: input size :param n_out: number of classes to dense layer :return: model """ inp = Input(shape=(mtl, )) model = Embedding(mtl, emb)(inp) model = Dropout(dout)(model) model = Conv1D(filters=units, kernel_size=emb, padding='same', activation='relu')(model) model = MaxPooling1D(pool_size=2)(model) model = Flatten()(model) model = Dense(n_out, activation="softmax")(model) model = Model(inputs=inp, outputs=model) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() return model
def lstm_crf(x, y, vocab_size, n_tags, batch_size, epochs): output_dim = 30 hid_size = 50 dense1 = 50 seq_len = x.shape[1] input_ = Input(shape=(seq_len, )) model = Embedding(input_dim=vocab_size, output_dim=output_dim, input_length=seq_len, mask_zero=True)(input_) model = Bidirectional( LSTM(units=hid_size, return_sequences=True, recurrent_dropout=0.1))(model) model = TimeDistributed(Dense(dense1, activation='relu'))(model) crf = CRF(n_tags, learn_mode='marginal') out = crf(model) # prob model = Model(inputs=input_, outputs=out) model.compile(optimizer='rmsprop', loss=crf_loss, metrics=[crf.accuracy]) model.summary() history = model.fit(x, np.array(y), batch_size=batch_size, epochs=epochs, validation_split=0.1, verbose=1) return model, history
def trainLSTM(): # fit a LSTM network with an embedding layer from keras.models import Model, Input from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional input = Input(shape=(max_len, )) model = Embedding(input_dim=n_words, output_dim=50, input_length=max_len)(input) model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) out = TimeDistributed(Dense(n_tags, activation="softmax"))(model) model = Model(input, out) model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) history = model.fit(X_tr, np.array(y_tr), batch_size=32, epochs=5, validation_split=0.1, verbose=1) hist = pd.DataFrame(history.history) plt.figure(figsize=(12, 12)) plt.plot(hist["acc"]) plt.plot(hist["val_acc"]) plt.show() return model
def createArchitecture(parameters): optimizer = 0 if parameters["optimizer"] == 'rmsprop': optimizer = optimizers.rmsprop(lr=parameters["learning_rate"], epsilon=parameters["epsilon"]) elif parameters["optimizer"] == 'adam': optimizer = optimizers.adam(lr=parameters["learning_rate"], epsilon=parameters["epsilon"]) elif parameters["optimizer"] == 'nadam': optimizer = optimizers.nadam(lr=parameters["learning_rate"], epsilon=parameters["epsilon"]) elif parameters["optimizer"] == 'sgd': optimizer = optimizers.sgd(lr=parameters["learning_rate"]) #else: # optimizer = parameters["optimizer"] if parameters["use_embedding_layer"]: input = Input(shape=(parameters["max_seq_len"], )) model = Embedding(input_dim=parameters["one_hot_vector_len"], output_dim=parameters["embedding_layer_output"], input_length=parameters["max_seq_len"])(input) if parameters["embedding_dropout"] > 0: model = Dropout(rate=parameters["embedding_dropout"])(model) else: input = Input(shape=(parameters["max_seq_len"], parameters["one_hot_vector_len"])) model = input if parameters["bi_lstm1_units"] > 0: model = Bidirectional( CuDNNLSTM(units=parameters["bi_lstm1_units"], return_sequences=True))(model) if parameters["bi_lstm2_units"] > 0: model = Bidirectional( CuDNNLSTM(units=parameters["bi_lstm2_units"], return_sequences=True))(model) if parameters["bi_lstm3_units"] > 0: model = Bidirectional( CuDNNLSTM(units=parameters["bi_lstm3_units"], return_sequences=True))(model) if parameters["use_crf_layer"]: crf = CRF(parameters["num_tags"], learn_mode="marginal") out = crf(model) # output model = Model(input, out) model.compile(optimizer=optimizer, loss=losses.crf_loss, metrics=[metrics.crf_accuracy, avg_proximity_metric()]) else: out = TimeDistributed( Dense(parameters["num_tags"], activation="softmax"))(model) model = Model(input, out) model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy", avg_proximity_metric()]) model.summary() return model
def BUILD_MODEL(X,MAX,n_words,n_tags,embedding_matrix): input_word = Input(shape = (MAX,)) model=Embedding(input_dim=n_words,input_length=X.shape[1], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix],trainable=False)(input_word) model=Bidirectional(LSTM(64, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))(model) model=TimeDistributed(Dense(32, activation ='relu'))(model) crf = CRF(n_tags) # CRF layer out = crf(model) # output model = Model(input_word, out) model.summary() model.compile(optimizer='adam', loss=crf.loss_function, metrics=[crf.accuracy, 'accuracy']) return model
def buildmodel(cls, max_len: int, n_words: int, n_tags: int): # define the LSTM network to fit an embedding layer input = Input(shape=(max_len,)) model = Embedding(input_dim=n_words, output_dim=50, input_length=max_len)(input) model = Dropout(0.1)(model) model = Bidirectional(LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) out = TimeDistributed(Dense(n_tags, activation="softmax"))(model) # softmax out layer # use defitions to compile and train the model model = Model(input, out) model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) return model
def createModel(MAX_LEN, n_words, n_tags, embedding_matrix, lstm_cells=LSTM_CELLS, trainable=False, lstm_layers=1, bi_direc=False, activation="softmax", optimizer="rmsprop", loss="categorical_crossentropy"): inputt = Input(shape=(MAX_LEN, )) if not trainable: model = Embedding(input_dim=n_words, output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], trainable=False, input_length=MAX_LEN)(inputt) else: model = Embedding(input_dim=n_words, output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, input_length=MAX_LEN)(model) model = Dropout(0.1)(model) # If want to add multiple LSTM layers if lstm_layers > 1: for i in range(lstm_layers - 1): model = LSTM(units=lstm_cells, return_sequences=True, recurrent_dropout=0.1)(model) if bi_direc: model = Bidirectional( LSTM(units=lstm_cells, return_sequences=True, recurrent_dropout=0.1))(model) # variational biLSTM else: model = LSTM(units=lstm_cells, return_sequences=True, recurrent_dropout=0.1)(model) out = TimeDistributed(Dense(n_tags, activation=activation))( model) # softmax output layer model = Model(inputt, out) model.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"]) return model
def buildmodel(cls, max_len: int, n_words: int, n_tags: int): input = Input(shape=(max_len,)) model = Embedding(input_dim=n_words + 1, output_dim=20, # TODO: should this be 20 here and 50 in LSTM? input_length=max_len, mask_zero=True)(input) # 20-dim embedding model = Bidirectional(LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(model) model = TimeDistributed(Dense(50, activation="relu"))(model) crf = kcCRF(n_tags) out = crf(model) model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) return model
def model_stuff(): global model word_embedding_size = 100 input = Input(shape=(maxlen,)) model = Embedding(input_dim=n_words, output_dim=word_embedding_size, weights=[glove_embedding_matrix()], input_length=maxlen, trainable=False)(input) model = Bidirectional(LSTM(units=word_embedding_size,return_sequences=True,dropout=0.5,recurrent_dropout=0.5,kernel_initializer=k.initializers.he_normal()))(model) model = Bidirectional(LSTM(units=word_embedding_size*2,return_sequences=True,dropout=0.5,recurrent_dropout=0.5,kernel_initializer=k.initializers.he_normal()))(model) model = TimeDistributed(Dense(n_tags, activation="relu"))(model) crf = CRF(n_tags) out = crf(model) model = Model(input, out) # adam = k.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999) model.compile(optimizer='adam', loss=crf.loss_function, metrics=[crf.accuracy, 'accuracy']) model.summary()
def cnn_non_static(self): x_train, y_train, x_test, y_test, vocabolary_dict = data_load_and_preproccess( self.word_size, self.sequence_length) embedding_weights = word_to_vector(np.vstack( (x_train, x_test)), vocabolary_dict, self.embedding_dim, self.min_word_count, self.context_window_size) embedding_weights = np.array( [value for value in embedding_weights.values()]) model_input = Input(shape=(self.sequence_length, )) model = Embedding(len(vocabolary_dict), self.embedding_dim, weights=[embedding_weights], input_length=self.sequence_length, name="embedding")(model_input) model = Dropout(self.drop_prob[0])(model) multi_cnn_channel = [] for kernal in self.kernal_size: conv_channel = Convolution1D(filters=self.filters, kernel_size=kernal, padding="valid", activation="relu", strides=1)(model) conv_channel = MaxPooling1D(pool_size=2)(conv_channel) conv_channel = Flatten()(conv_channel) multi_cnn_channel.append(conv_channel) model = Concatenate()(multi_cnn_channel) model = Dropout(self.drop_prob[1])(model) for dimension in self.hidden_dims: model = Dense(dimension, activation="relu")(model) model_output = Dense(1, activation="sigmoid")(model) model = Model(model_input, model_output) model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) print("Started Training : ") model.fit(x_train, y_train, batch_size=self.batch_size, epochs=self.epochs, validation_data=(x_test, y_test), verbose=2) print("Training Completed") self.model = model
def existing_model(self, embedding_matrix, output_neurons): inp = Input(shape=(self.maxlength, )) model = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], input_length=self.maxlength, weights=[embedding_matrix], trainable=False)(inp) lstm = Bidirectional(LSTM(50, return_sequences=False), merge_mode='concat')(model) outputs = Dense(output_neurons, activation='sigmoid', trainable=True)(lstm) model = Model(inp, outputs) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def create(self, train_file_path: str, output_summary: bool = False): if not os.path.exists(train_file_path): raise FileNotFoundError with open(train_file_path, 'r') as train_file: self.max_words_count_in_sentence = pd.read_csv(train_file).groupby( "article_id").size().max() input_layer = Input(shape=(self.max_words_count_in_sentence, )) word_embedding_size = 150 model = Embedding( input_dim=len(self.lang.vocab), output_dim=word_embedding_size, input_length=self.max_words_count_in_sentence)(input_layer) model = Bidirectional( LSTM(units=word_embedding_size, return_sequences=True, dropout=0.5, recurrent_dropout=0.5, kernel_initializer=keras.initializers.he_normal()))(model) model = LSTM(units=word_embedding_size * 2, return_sequences=True, dropout=0.5, recurrent_dropout=0.5, kernel_initializer=keras.initializers.he_normal())(model) model = TimeDistributed(Dense(len(self._tags), activation="relu"))(model) crf = CRF(len(self._tags)) model = Model(input_layer, crf(model)) model.compile(optimizer=keras.optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999), loss=crf.loss_function, metrics=[crf.accuracy, 'accuracy']) if output_summary: model.summary() self._model = model
def run(X_train, Y_train, X_val, Y_val, embedding_matrix, vocab_size, maxlen=40, emb_dim=300, neg_ratio=0, hidden_dim=300, drop=0.2, r_drop=0.1): ##build model # input = Input(shape=(maxlen,)) # model = Embedding(vocab_size, emb_dim, weights=[embedding_matrix], input_length=maxlen, trainable=False)(input) # model = Dropout(drop)(model) # model = Bidirectional(LSTM(hidden_dim, return_sequences=True, recurrent_dropout=r_drop))(model) # model = Dropout(drop)(model) # out = TimeDistributed(Dense(1, activation='sigmoid'))(model) input = Input(shape=(maxlen,)) model = Embedding(vocab_size, emb_dim, weights=[embedding_matrix], input_length=maxlen, trainable=False)(input) model = Bidirectional(LSTM(hidden_dim, return_sequences=True, recurrent_dropout=r_drop))(model) model = TimeDistributed(Dense(hidden_dim//4, activation='relu'))(model) model = TimeDistributed(Dropout(drop))(model) ##use CRF instead of Dense crf = CRF(2) out = crf(model) model = Model(input, out) Y_train_2 = keras.utils.to_categorical(Y_train) Y_val_2 = keras.utils.to_categorical(Y_val) model.compile(optimizer='adam', loss=crf.loss_function, metrics=[crf.accuracy]) earlyStop = [EarlyStopping(monitor='val_loss', patience=1)] history = model.fit(X_train, Y_train_2, batch_size=64, epochs=10, validation_data=(X_val, Y_val_2), callbacks=earlyStop) preds = model.predict(X_val) test = [[np.argmax(y) for y in x] for x in preds] test_arr = np.asarray(test) test_arr = np.reshape(test_arr, (-1)) print (metrics.precision_recall_fscore_support(np.reshape(Y_val,(-1)), test_arr, average=None, labels=[0, 1])) # Y_pred_ = [[1 if y>=threshold else 0 for y in x] for x in Y_pred] Y_val_ = np.squeeze(Y_val) print ("Evaluate: dev seg exact") pred_out_dir = out_dir+'seg_'+str(neg_ratio)+'neg' gold_dir = '../../data/val_segs/'+'seg_'+str(neg_ratio)+'neg' p, r, f = seg_exact_match(test, Y_val_, pred_out_dir, gold_dir) return model, history, p, r, f