def TrainLstmCrf(data_name, model_name): n_classes = 4 max_len = 75 batch_size = 128 epoch = 100 tags = ['S', 'B', 'I', 'E'] sentences, words = get_sents(datasets=data_name) print(len(sentences), len(words)) word2idx = {w: i + 1 for i, w in enumerate(words)} tag2idx = {t: i for i, t in enumerate(tags)} vocab_size = len(words) X = [[word2idx[w[0]] for w in s] for s in sentences] X = pad_sequences(maxlen=max_len, sequences=X, padding="post", value=vocab_size - 1) y = [[tag2idx[w[1]] for w in s] for s in sentences] y = pad_sequences(maxlen=max_len, sequences=y, padding="post", value=tag2idx["E"]) y = [to_categorical(i, num_classes=n_classes) for i in y] # 获得数据 X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.1) print(len(X_tr), len(y_tr), len(X_te), len(y_te)) s = np.asarray([max_len] * batch_size, dtype='int32') # 建立模型 word_ids = Input(batch_shape=(batch_size, max_len), dtype='int32') sequence_lengths = Input(batch_shape=[batch_size, 1], dtype='int32') print(sequence_lengths) word_embeddings = Embedding(vocab_size, n_classes)(word_ids) blstm = Bidirectional(LSTM(units=50, return_sequences=True))(word_embeddings) model = TimeDistributed(Dense(4, activation='tanh'))(blstm) crf = CrfModel() pred = crf(inputs=[model, sequence_lengths]) model = Model(inputs=[word_ids, sequence_lengths], outputs=[pred]) print("word_ids:{}".format(word_ids)) print("sequence_lengths:{}".format(sequence_lengths)) model.compile(optimizer="rmsprop", loss=crf.loss, metrics=['accuracy']) print(model.summary()) k = 0 for batch_x, batch_y in minibatches(X_tr, y_tr, batch_size=batch_size): model.fit([batch_x, s], np.array(batch_y), epochs=epoch, batch_size=batch_size) k += 1 if k % 50 == 0: model.save("./models/{}_{}".format(k, model_name)) print("saved") # 保存模型 model.save(model_name)
def new_model(image_size=299, video_length=40, cnn_trainable=False): inputs = Input(shape=(video_length, image_size, image_size, 3)) cnn = inception_v3.InceptionV3(include_top=False, weights='imagenet') model = TimeDistributed(cnn)(inputs) model.trainable = cnn_trainable model = LSTM(512)(model) model = Dropout(0.5)(model) model = Dense(1, activation='softmax')(model) model = Model(inputs=inputs, outputs=model) adam = keras.optimizers.Adam(learning_rate=1e-5) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) model.summary() return model
def temporal_convs_linear(n_nodes, conv_len, n_classes, n_feat, max_len, causal=False, loss='categorical_crossentropy', optimizer='adam', return_param_str=False): """ Used in paper: Segmental Spatiotemporal CNNs for Fine-grained Action Segmentation Lea et al. ECCV 2016 Note: Spatial dropout was not used in the original paper. It tends to improve performance a little. """ inputs = Input(shape=(max_len, n_feat)) if causal: model = ZeroPadding1D((conv_len // 2, 0))(model) model = Convolution1D(n_nodes, conv_len, input_dim=n_feat, input_length=max_len, border_mode='same', activation='relu')(inputs) if causal: model = Cropping1D((0, conv_len // 2))(model) model = SpatialDropout1D(0.3)(model) model = TimeDistributed(Dense(n_classes, activation="softmax"))(model) model = Model(input=inputs, output=model) model.compile(loss=loss, optimizer=optimizer, sample_weight_mode="temporal") if return_param_str: param_str = "tConv_C{}".format(conv_len) if causal: param_str += "_causal" return model, param_str else: return model
def seq2seq(): n_classes = len(LABELS) converse_input = Input(shape=(None, SENTENCE_ENCODING_DIM)) # length_input = Input(shape=(None, 1)) # word_input = Input(shape=(None, WORD_EMBEDDING_DIM)) time_input = Input(shape=(None, 1)) converse = Masking(mask_value=-1.)(converse_input) converse = Dropout(0.2)(converse) converse = Bidirectional(LSTM(1024, return_sequences=True))(converse) converse = Bidirectional(LSTM(1024, return_sequences=True))(converse) converse = Dropout(0.3)(converse) # lengths = Masking(mask_value=-1)(length_input) # words = Masking(mask_value=-1.)(word_input) # words = Dropout(0.2)(words) model = concatenate([converse, time_input], axis=-1) # print("merged outpout shape", model.output_shape) model = TimeDistributed(Dense(1024, activation='relu'))(model) model = Dropout(0.3)(model) model = TimeDistributed(Dense(512, activation='relu'))(model) model = Dropout(0.3)(model) # predictions = TimeDistributed(Dense(n_classes, activation='softmax'))(model) crf = CRF(n_classes, sparse_target=True) predictions = crf(model) model = Model(inputs=[converse_input, time_input], outputs=predictions) model.summary() # model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy]) return model
def NN_model(self, word2idx, char2idx, max_len, max_len_char, n_tags): word_in = Input(shape=(max_len, )) n_words = len(word2idx.keys()) n_chars = len(char2idx.keys()) emb_word = Embedding(input_dim=n_words, output_dim=30, input_length=max_len, mask_zero=True)(word_in) char_in = Input(shape=( max_len, max_len_char, )) emb_char = TimeDistributed( Embedding(input_dim=n_chars, output_dim=10, input_length=max_len_char, mask_zero=True))(char_in) char_enc = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.3))(emb_char) x = concatenate([emb_word, char_enc]) x = SpatialDropout1D(0.3)(x) main_lstm = Bidirectional( LSTM(units=30, return_sequences=True, recurrent_dropout=0.4))(x) model = TimeDistributed(Dense(35, activation='relu'))(main_lstm) crf = CRF(n_tags, learn_mode='marginal') out = crf(model) # prob model = Model([word_in, char_in], out) # from keras.utils.vis_utils import plot_model # plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True) model.compile(optimizer='rmsprop', loss=crf_loss, metrics=[crf.accuracy]) # model.summary() return model
main_lstm = Bidirectional(LSTM(units=50, return_sequences=True, recurrent_dropout=0.6))(x) #dropout 0.1试试? model = TimeDistributed(Dense(50, activation="relu"))(main_lstm) crf = CRF(n_tags+1) # CRF layer, n_tags+1(PD) out = crf(model) # output # out = Lambda(lambda x: K.reshape(x,(-1,5)))(out) model = Model([word_in, char_in], out) # set optimizer # rmsprop = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=1e-5) adam = optimizers.Adam(lr=0.01, epsilon=None, decay=1e-1) model.compile(optimizer=adam, loss=crf.loss_function, metrics=[crf.accuracy]) #use crf model.summary() #sample_weight_mode="temporal" tr_pubs = pub_ids[:int(len(pub_ids)*0.9)] val_pubs = pub_ids[int(len(pub_ids)*0.9):] train = subdata_getter(tr_pubs,data) validation = subdata_getter(val_pubs,data) tr_generator = DataGenerator(tr_pubs,train) val_generator = DataGenerator(val_pubs,validation) history = NBatchLogger()
recurrent_dropout=0.4))(x) #attention attention_sum = keras.layers.multiply([main_lstm, alpha_in]) attention_conc = concatenate([attention_sum,main_lstm]) model = TimeDistributed(Dense(75, activation="tanh"))(attention_conc) model = TimeDistributed(Dense(50, activation="tanh"))(model) #CRF layers crf = CRF(n_tags+1) # CRF layer, n_tags+1(PAD) out = crf(model) # output model = Model([word_in, char_in, alpha_in], out) # set optimizer rmsprop = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=1e-3) model.compile(optimizer=rmsprop, loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() # DataGenerator and fit inot Model #################################################################################################################################### tr_pubs = pub_ids[:int(len(pub_ids)*0.9)] val_pubs = pub_ids[int(len(pub_ids)*0.9):] train = subdata_getter(tr_pubs,data) validation = subdata_getter(val_pubs,data) tr_generator = DataGenerator(tr_pubs,train) val_generator = DataGenerator(val_pubs,validation)
def test_exist(self, glove, test_data, test_labels): # get word embeddings utils = wordUtils.Utils() if glove: # use glove self.words_list, self.embedding_matrix = utils.load_glove() unword_n = len(self.words_list) else: self.words_list, self.embedding_matrix = utils.load_word2vec() unword_n = len(self.words_list) # get the training corpus cr = corpusreader.CorpusReader(test_data, test_labels) corpus = cr.trainseqs # get the number of the embedding for idx in range(len(corpus)): words = corpus[idx]['tokens'] words_id = [] for i in words: # get the number of the embedding try: # the index of the word in the embedding matrix index = self.words_list.index(i) except ValueError: # use the embedding full of zeros to identify an unknown word index = unword_n # the index of the word in the embedding matrix words_id.append(index) corpus[idx]['embs'] = words_id input = Input(shape=(None,)) el = Embedding(len(self.words_list) + 1, 200, weights=[self.embedding_matrix], trainable=False)(input) bl1 = Bidirectional(LSTM(128, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm1")(el) bl2 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm2")(bl1) bl3 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm3")(bl2) model = TimeDistributed(Dense(50, activation="relu"))(bl3) # a dense layer as suggested by neuralNer crf = CRF(self.lab_len) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() save_load_utils.load_all_weights(model, 'word_models/words_glove_multiLSTM31.h5') for doc in corpus: doc_arr = doc['embs'] p = model.predict(np.array([doc_arr])) p = np.argmax(p, axis=-1) position = 0 offsets = defaultdict(list) counter = 0 # check if there are any mutations identified # {'O': 0, 'B-E': 1, 'I-E': 2, 'E-E': 3, 'S-E': 4} B = False last = 0 for idx in p[0]: if idx == 1 and last == 1: counter = counter + 1 offsets[counter].append(position) B = True elif idx == 1: B = True offsets[counter].append(position) last = 1 elif idx == 2 and B: offsets[counter].append(position) last = 2 elif idx == 3 and B: offsets[counter].append(position) last = 3 B = False counter = counter + 1 elif idx == 4: offsets[counter].append(position) counter = counter + 1 last = 4 else: B = False position = position + 1 # open file to write textid = str(doc['textid']) abstract = open("words-silver/" + textid + ".a1", 'w') for i in offsets: word = offsets.get(i) size = len(word) if size == 1: s = word[0] # just one; singleton abstract.write(str(doc['tokstart'][s]) + "\t") abstract.write(str(doc['tokend'][s]) + "\t") abstract.write(str(doc['tokens'][s]) + "\n") elif size > 1: s = word[0] # start of token e = word[-1] # end of token abstract.write(str(doc['tokstart'][s]) + "\t") abstract.write(str(doc['tokend'][e]) + "\t") token = "" for c in word: token = token + doc['tokens'][c] abstract.write(str(token) + "\n")
def create_model(X_train, y_train, X_test, y_test, look_back, num_features): # CNN Model cnn = Sequential() ks1_first = 10 ks1_second = 2 ks2_first = 2 ks2_second = 10 cnn.add( Conv2D(filters=(2), kernel_size=(ks1_first, ks1_second), padding='same', kernel_initializer='TruncatedNormal')) cnn.add(BatchNormalization()) cnn.add(LeakyReLU()) cnn.add(Dropout(0.240)) for _ in range(1): cnn.add( Conv2D(filters=(8), kernel_size=(ks2_first, ks2_second), padding='same', kernel_initializer='TruncatedNormal')) cnn.add(BatchNormalization()) cnn.add(LeakyReLU()) cnn.add(Dropout(0.434)) cnn.add(Flatten()) # RNN Model rnn = Sequential() rnn.add( CuDNNLSTM(3, return_sequences=True, kernel_initializer='TruncatedNormal')) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout(0.622)) for _ in range(0): rnn.add( CuDNNLSTM(32, kernel_initializer='TruncatedNormal', return_sequences=True)) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout(0.612)) rnn.add( CuDNNLSTM(4, kernel_initializer='TruncatedNormal', return_sequences=False)) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout(0.281)) # DNN Model dnn = Sequential() for _ in range(4): dnn.add(Dense(128, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout(0.006)) for _ in range(4): dnn.add(Dense(16, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout(0.08)) for _ in range(4): dnn.add(Dense(256, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout(0.171)) dnn.add(Dense(512, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout(0.257)) dnn.add(Dense(1)) # Putting it all together main_input = Input(shape=( X_train.shape[1], X_train.shape[2])) # Data has been reshaped to (800, 5, 120, 60, 1) reshaped_to_smaller_images = Reshape(target_shape=(24, 5, X_train.shape[2], 1))(main_input) model = TimeDistributed(cnn)( reshaped_to_smaller_images) # this should make the cnn 'run' 5 times? model = rnn(model) # combine timedistributed cnn with rnn model = dnn(model) # add dense # create the model, specify in and output model = Model(inputs=main_input, outputs=model) model.compile(loss='mse', metrics=['mape'], optimizer='adam') early_stopping_monitor = EarlyStopping( patience=50000 ) # Not using earlystopping monitor for now, that's why patience is high bs = 256 epoch_size = 14 schedule = SGDRScheduler( min_lr=4.6e-6, #1e-5 max_lr=4.8e-2, # 1e-2 steps_per_epoch=np.ceil(epoch_size / bs), lr_decay=0.9, cycle_length=5, # 5 mult_factor=1.5) checkpoint1 = ModelCheckpoint("models\\timedist.val_loss.hdf5", monitor='val_loss', verbose=1, save_best_only=True, mode='min') checkpoint2 = ModelCheckpoint("models\\timedist.val_mape.hdf5", monitor='val_mape', verbose=1, save_best_only=True, mode='min') checkpoint4 = ModelCheckpoint("models\\timedist.train_loss.hdf5", monitor='loss', verbose=1, save_best_only=True, mode='min') checkpoint5 = ModelCheckpoint("models\\timedist.train_mape.hdf5", monitor='mape', verbose=1, save_best_only=True, mode='min') result = model.fit( X_train, y_train, batch_size=bs, epochs=4000, # should take 24h ish verbose=1, validation_split=0.2, callbacks=[schedule, checkpoint1, checkpoint2]) pd.DataFrame(result.history).to_csv('models\\timedist_fit_history.csv') #get the highest validation accuracy of the training epochs validation_loss = np.amin(result.history['val_loss']) print('validation loss of epoch:', validation_loss) return model
###tensorflow session: #sess = tf.Session() #K.set_session(sess) #init_op = tf.global_variables_initializer() #sess.run(init_op) ###### COMPILE: ###### #Set GLOBAL VARIABLES which go in to all sorts of possible functions: max_shift_number_global = 3 #COMPILE MODEL: #from keras.optimizers import Adam,SGD,RMSprop,Adagrad,Nadam,TFOptimizer metrics_list = ['mse'] conv_model_time_distributed.compile(optimizer='adam', loss=custom_loss_function, metrics=metrics_list) #Fit Model: flag_method_of_fitting = 3 #Callbacks: filepath_string = 'deep_speckles_weights.{epoch:02d}_{val_loss:.2f}.hdf5' model_checkpoint_function = ModelCheckpoint(filepath_string, period=1, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto') custom_callback_function = custom_callback()
def create_model(X_train, y_train, X_test, y_test): # CNN Model cnn = Sequential() ks1_first = {{choice([2, 3, 4, 5, 8, 10])}} ks1_second = {{choice([2, 3, 4, 5, 8, 10])}} ks2_first = {{choice([2, 3, 4, 5, 8, 10])}} ks2_second = {{choice([2, 3, 4, 5, 8, 10])}} cnn.add( Conv2D(filters=({{choice([1, 2, 3, 4, 5, 8])}}), kernel_size=(ks1_first, ks1_second), padding='same', kernel_initializer='TruncatedNormal')) cnn.add(BatchNormalization()) cnn.add(LeakyReLU()) cnn.add(Dropout({{uniform(0, 1)}})) for _ in range({{choice([0, 1, 2, 3])}}): cnn.add( Conv2D(filters=({{choice([4, 8])}}), kernel_size=(ks2_first, ks2_second), padding='same', kernel_initializer='TruncatedNormal')) cnn.add(BatchNormalization()) cnn.add(LeakyReLU()) cnn.add(Dropout({{uniform(0, 1)}})) cnn.add(Flatten()) # RNN Model rnn = Sequential() rnn.add( CuDNNLSTM({{choice([1, 2, 3, 4, 5, 6, 7, 8])}}, return_sequences=True, kernel_initializer='TruncatedNormal')) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout({{uniform(0, 1)}})) for _ in range({{choice([0, 1, 2, 3, 4, 8])}}): rnn.add( CuDNNLSTM({{choice([4, 8])}}, kernel_initializer='TruncatedNormal', return_sequences=True)) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout({{uniform(0, 1)}})) rnn.add( CuDNNLSTM({{choice([1, 2, 3, 4, 5, 6, 7, 8])}}, kernel_initializer='TruncatedNormal', return_sequences=False)) rnn.add(BatchNormalization()) rnn.add(LeakyReLU()) rnn.add(Dropout({{uniform(0, 1)}})) # DNN Model dnn = Sequential() for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}): dnn.add( Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}}, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout({{uniform(0, 1)}})) for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}): dnn.add( Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}}, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout({{uniform(0, 1)}})) for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}): dnn.add( Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}}, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout({{uniform(0, 1)}})) for _ in range({{choice([0, 1, 2, 3, 4, 8, 16, 32])}}): dnn.add( Dense({{choice([4, 8, 16, 32, 64, 128, 256, 512])}}, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout({{uniform(0, 1)}})) dnn.add( Dense({{choice([8, 16, 32, 64, 128, 256, 512, 1024])}}, kernel_initializer='TruncatedNormal')) dnn.add(BatchNormalization()) dnn.add(LeakyReLU()) dnn.add(Dropout({{uniform(0, 1)}})) dnn.add(Dense(1)) # Putting it all together main_input = Input(shape=( X_train.shape[1], X_train.shape[2])) # Data has been reshaped to (800, 5, 120, 60, 1) reshaped_to_smaller_images = Reshape(target_shape=(24, 5, X_train.shape[2], 1))(main_input) model = TimeDistributed(cnn)( reshaped_to_smaller_images) # this should make the cnn 'run' 5 times? model = rnn(model) # combine timedistributed cnn with rnn model = dnn(model) # add dense # create the model, specify in and output model = Model(inputs=main_input, outputs=model) model.compile(loss='mse', metrics=['mape'], optimizer='nadam') early_stopping_monitor = EarlyStopping( patience=15 ) # Not using earlystopping monitor for now, that's why patience is high bs = {{choice([32, 64, 128, 256])}} epoch_size = 1 if bs == 32: epoch_size = 109 elif bs == 64: epoch_size = 56 elif bs == 128: epoch_size = 28 elif bs == 256: epoch_size = 14 #bs = 256 #epoch_size = 14 schedule = SGDRScheduler( min_lr={{uniform(1e-8, 1e-5)}}, #1e-5 max_lr={{uniform(1e-3, 1e-1)}}, # 1e-2 steps_per_epoch=np.ceil(epoch_size / bs), lr_decay=0.9, cycle_length=5, # 5 mult_factor=1.5) result = model.fit(X_train, y_train, batch_size=bs, epochs=100, verbose=1, validation_split=0.2, callbacks=[early_stopping_monitor, schedule]) #get the highest validation accuracy of the training epochs hist = pd.DataFrame(result.history['val_loss']).fillna( 9999) #replace nan with 9999 val_loss = np.amin(hist.values) print('Best validation loss of epoch:', val_loss) K.clear_session() # Clear the tensorflow session (Free up RAM) return { 'loss': val_loss, 'status': STATUS_OK } # Not returning model to save RAM
def model_with_padding(self, DICT, n_char): # get sequences and labels separated. # convert BIO tags to numbers sequences, labels = self.get_seq(DICT) # sequences = sequences[:100] # labels = labels[:100] # X = pad_sequences(sequences, maxlen=self.w_arit_mean, padding='post', truncating='post') # y_pad = pad_sequences(labels, maxlen=self.w_arit_mean, padding='post', truncating='post') X = pad_sequences(sequences, maxlen=self.maxSeqLength, padding='post') y_pad = pad_sequences(labels, maxlen=self.maxSeqLength, padding='post') y = [to_categorical(i, num_classes=self.lab_len) for i in y_pad] # early stopping and best epoch #early_stop = keras.callbacks.EarlyStopping(monitor='loss', patience=2, verbose=0, mode='auto') #filepath = "max-seq.h5" #checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='max') #callbacks_list = [checkpoint, early_stop] # Set up the keras model input = Input(shape=(self.maxSeqLength, )) el = Embedding(n_char + 1, 200, name="embed")(input) bl1 = Bidirectional(LSTM(128, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm1")(el) bl2 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm2")(bl1) bl3 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm3")(bl2) model = TimeDistributed(Dense(self.lab_len, activation="relu"))(bl3) crf = CRF(self.lab_len) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() #treinar com 32, 147, 245, 735 history = model.fit(X, np.array(y), batch_size=32, epochs=self.epochsN, validation_split=0.0, verbose=1) # save all epochs save_load_utils.save_all_weights(model, 'max_seq_%s_32b.h5' % self.epochsN)
def model_no_padding(self, DICT, n_char): # convert BIO tags to numbers self.convert_tags() ''' check if bion contains 'B' and 'I' for i in self.train_data: print(i['bion']) ''' for i in range(len(self.train_data)): corp = self.train_data[i]['corpus'] corp_num = [] for c in corp: corp_num.append(DICT.get(c)) self.train_data[i]['corpus'] = corp_num # get all sizes from the sequences with training data train_l_d = {} train_l_labels = {} for seq in self.train_data: # corpus l = len(seq['corpus']) if l not in train_l_d: train_l_d[l] = [] train_l_d[l].append(seq['corpus']) # labels l1 = len(seq['bion']) if l1 not in train_l_labels: train_l_labels[l1] = [] train_l_labels[l1].append(seq['bion']) ''' for i in range(len(train_l_d[110])): print(len(train_l_d[110][i]) == len(train_l_labels[110][i])) print() print("\n\n") for i in range(len(train_l_d[31])): print(len(train_l_d[31][i]) == len(train_l_labels[31][i])) print("\n\n") for i in range(len(train_l_d[103])): print(len(train_l_d[103][i]) == len(train_l_labels[103][i])) print("\n\n") exit() ''' sizes = list(train_l_d.keys()) # Set up the keras model il = Input(shape=(None, ), dtype='int32') el = Embedding(n_char + 1, 200, name="embed")(il) bl1 = Bidirectional(LSTM(128, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm1")(el) bl2 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm2")(bl1) bl3 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm3")(bl2) model = TimeDistributed(Dense(self.num_labs, activation="relu"))(bl3) crf = CRF(self.num_labs) # CRF layer out = crf(model) # output model = Model(il, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() f_best = -1 f_index = -1 # OK, start actually training for epoch in range(self.epochsN): print("Epoch", epoch, "start at", datetime.now()) # Train in batches of different sizes - randomize the order of sizes # Except for the first few epochs if epoch > 2: random.shuffle(sizes) for size in sizes: batch = train_l_d[size] labs = train_l_labels[size] tx = np.array([seq for seq in batch]) y = [seq for seq in labs] ty = [to_categorical(i, num_classes=self.num_labs) for i in y] # This trains in mini-batches model.fit(tx, np.array(ty), verbose=0, epochs=1) print("Trained at", datetime.now()) # save all epochs save_load_utils.save_all_weights( model, 'mini-batch-results/epoch_%s.h5' % epoch) # test the results self.test_minibatch(DICT, model) f = self.eval() if f > f_best: f_best = f f_index = epoch # Pick the best model, and save it with a useful name print("Choosing the best epoch") shutil.copyfile("mini-batch-results/epoch_%s.h5" % f_index, "minibatch_%s.h5" % f_index)