def fit_and_evaluate(model: Model, model_filename: str, t_x, val_x, t_y, val_y, epochs=20, batch_size=128) -> History: results = model.fit( t_x, t_y, epochs=epochs, batch_size=batch_size, callbacks=get_callbacks(model_filename), verbose=1, validation_data=[val_x, val_y], ) logging.info("Score against validation set: %s", model.evaluate(val_x, val_y)) return results
def vgg16(train_data, train_labels, val_data, val_labels): # 使用keras内置的vgg16 # weights:指定模型初始化的权重检查点、 # include_top: 指定模型最后是否包含密集连接分类器。 # 默认情况下,这个密集连接分类器对应于ImageNet的100个类别。 # 如果打算使用自己的密集连接分类器,可以不适用它,置为False。 sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model_vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) # 查看model_vgg16的架构 # model_vgg16.summary() # 权值冻结 for layer in model_vgg16.layers: layer.trainable = False # 用于将输入层的数据压成一维的数据 model = layers.Flatten(name='flatten')(model_vgg16.output) # 全连接层,输入的维度z model = layers.Dense(64, activation='relu')(model) # 每一批的前一层的激活值重新规范化,输出均值接近0,标准差接近1 model = layers.BatchNormalization()(model) model = layers.Dropout(0.5)(model) model = layers.Dense(32, activation='relu')(model) model = layers.BatchNormalization()(model) model = layers.Dropout(0.5)(model) model = layers.Dense(16, activation='relu')(model) model = layers.BatchNormalization()(model) model = layers.Dropout(0.5)(model) model = layers.Dense(5, activation='softmax')(model) model = Model(inputs=model_vgg16.input, outputs=model, name='vgg16') model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.fit(train_data, train_labels, batch_size=32, epochs=50, validation_data=(val_data, val_labels))
def fit_model_on_fold(self, compiled_model: Model, curr_fold_indices, train_sequences, test_sequences): """ trains compiled (but previously unfitted) model against given indices :param compiled_model: :param curr_fold_indices: :param train_sequences: :param test_sequences: :return: """ train_indices, val_indices = curr_fold_indices x_train = train_sequences[train_indices] y_train = self.raw_train_df[ self.target_cols].iloc[train_indices].values x_val = train_sequences[val_indices] y_val = self.raw_train_df[self.target_cols].iloc[val_indices].values with tf.Session() as session: K.set_session(session) session.run(tf.global_variables_initializer()) session.run(tf.tables_initializer()) compiled_model.fit(x_train, y_train, batch_size=self.batch_size, epochs=self.epochs, validation_data=(x_val, y_val)) val_pred = compiled_model.predict(x_val, batch_size=self.batch_size, verbose=0) val_roc_auc_score = roc_auc_score(y_val, val_pred) print('ROC-AUC val score: {0:.4f}'.format(val_roc_auc_score)) val_df = pd.DataFrame(val_pred, index=val_indices) val_df.columns = self.target_cols return val_roc_auc_score, val_df
def mlp(x, y, activation="relu", nodes_per_layer=100, num_layers=5, epochs=1000): inputs = Input(shape=(1, )) nn = Dense(nodes_per_layer, activation=activation)(inputs) for __ in range(num_layers - 1): nn = Dense(nodes_per_layer, activation=activation)(nn) predictions = Dense(1, activation='linear')(nn) model = Model(inputs=inputs, outputs=predictions) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(x, y, batch_size=x.shape[0], epochs=epochs, verbose=0) fitted = model.predict(x, batch_size=x.shape[0], verbose=0) return model, fitted
def model1(x_train, y_train, x_test, y_test): #200维度的三元组(head,relation,tail) inputs = keras.Input(shape=( 100, 3, 1, )) cnn1 = Conv2D(filters=50, kernel_initializer=keras.initializers.TruncatedNormal( mean=0.0, stddev=0.05, seed=None), kernel_size=(1, 3), padding='valid', strides=1, activation='relu')(inputs) flat = Flatten()(cnn1) drop = Dropout(0.2)(flat) #out1 = Dense(units=1,use_bias=False,kernel_regularizer=keras.regularizers.l2(0.0005))(drop) out1 = Dense(units=1, use_bias=False)(drop) # net model1 = Model(inputs, out1) #model1.compile() #model1.summary() #ou1_output=model1.predict(x_train,) #shape为(-1,1) model1.compile(loss=myLoss, optimizer=Adam(6e-6)) model1.summary() loss_value = [] history = model1.fit(x_train, y_train, batch_size=30, epochs=200, validation_data=(x_test, y_test)) # plot history pyplot.plot(history.history['loss'], label='train') pyplot.plot(history.history['val_loss'], label='valid') pyplot.legend() pyplot.show() model1.save('../data/modelFile/originalConvKB_onlyType13_%s_%s.h5' % (round(history.history['loss'][-1], 4), round(history.history['val_loss'][-1], 4)))
def AE_train(encoding_dim, x_train, epochs_num): # 编码层 input_data = Input(shape=[29]) encoded = Dense(24, activation='relu')(input_data) encoded = Dense(16, activation='relu')(encoded) encoded = Dense(8, activation='relu')(encoded) encoder_output = Dense(encoding_dim)(encoded) # 解码层 decoded = Dense(8, activation='relu')(encoder_output) decoded = Dense(16, activation='relu')(decoded) decoded = Dense(24, activation='relu')(decoded) decoded = Dense(29, activation='tanh')(decoded) autoencoder = Model(inputs=input_data, outputs=decoded) encoder = Model(inputs=input_data, outputs=encoder_output) autoencoder.compile(optimizer='adam', loss='mse') def step_decay(epoch): initial_lrate = 0.01 drop = 0.5 epochs_drop = 10.0 _lrate = initial_lrate * math.pow( drop, math.floor((1 + epoch) / epochs_drop)) return _lrate lrate = LearningRateScheduler(step_decay) history = autoencoder.fit(x_train, x_train, epochs=epochs_num, batch_size=256, callbacks=[lrate]) loss = history.history['loss'] epochs = range(1, epochs_num + 1) plt.title('Loss') plt.plot(epochs, loss, 'blue', label='loss') plt.legend() plt.show() encoder.save("encoder_model.h5")
def forward(self, X_train, X_test, y_train, y_test): X_shape = X_train.shape[1] y_shape = y_train.shape[1] X = Input(shape=(self.image_size, self.image_size, 1), name='input') label = Input(shape=(y_shape,), name='label') encoder, shape = self.encode(X, label) encoder.summary() z_inputs = Input(shape=(self.n_dim,), name='latent_input') decoder = self.decode(z_inputs, label, shape) decoder.summary() z_output = encoder([X, label])[2] outputs = decoder([z_output, label]) cvae = Model([X, label], outputs, name='cvae') cvae.compile(optimizer=Adam(lr=self.learning_rate, decay=self.decay_rate, epsilon=1e-08), loss=self.vae_loss) cvae.summary() tensorboard = TensorBoard(log_dir="{}/{}".format(self.logs_dir,time())) cvae_hist = cvae.fit([X_train, y_train], X_train, verbose=1, batch_size=self.batch_size, epochs=self.epochs, validation_data=([X_test, y_test], X_test), callbacks=[tensorboard], shuffle=True) decoder.save(self.args.save_model + '.h5') return cvae, cvae_hist
def fit(self,hidden_nodes,activation="sigmoid"): start = time.time() input_img = Input(shape=(self.D,)) # this is our input placeholder encoded = Dense(hidden_nodes, activation=activation, kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.05), bias_initializer=keras.initializers.Zeros(), kernel_regularizer=keras.regularizers.l1(self.Lambda))(input_img) # "encoded" is the encoded representation of the input decoded = Dense(self.D, activation='sigmoid', kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.05), bias_initializer=keras.initializers.Zeros(), kernel_regularizer=keras.regularizers.l1(self.Lambda))(encoded) # "decoded" is the lossy reconstruction of the input # encode self.encoder = Model(inputs=input_img, outputs=encoded) # encoder model: maps an input to its encoded representation autoencoder = Model(inputs=input_img, outputs=decoded) # this model maps an input to its reconstruction encoded_input = Input(shape=(hidden_nodes,)) # placeholder for encoded (32-dimensional) input decoder_layer = autoencoder.layers[-1] # retrieve the last layer of the autoencoder model self.decoder = Model(inputs=encoded_input, outputs=decoder_layer(encoded_input)) # create the decoder model optimizer = keras.optimizers.SGD(lr=0.1, momentum=0.9, decay = 0, nesterov=False) # autoencoder.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy","mean_squared_error"]) autoencoder.compile(optimizer=optimizer, loss="mse", metrics=["accuracy","mean_squared_error"]) info = autoencoder.fit(self.X_train, self.X_train, # for training: x, y epochs=100, batch_size=50, # default: 32 shuffle=True # shuffle each batch ,validation_data = (self.X_val, self.X_val) #no validation ) # self.val_accs.append(info.history["val_acc"]) # print(info.history["val_loss"]) self.timeLst.append(time.time()-start) # self.val_mse.append(info.history["val_mean_squared_error"]) self.train_mse.append(info.history["mean_squared_error"])
def main(): from tensorflow.examples.tutorials.mnist import input_data data = input_data.read_data_sets("data/MNIST/", one_hot=True) # data_train = tfds.load(name="mnist", split="train") # data_test = tfds.load(name="mnist", split="test") print("Size of:") print("- Training-set:\t\t{}".format(len(data.train.labels))) print("- Test-set:\t\t{}".format(data.test.labels)) # Get the first images from the test-set. data.test.cls = np.array([label.argmax() for label in data.test.labels]) # images = data.x_test[0:9] images = data.test.images[0:9] #Get the true classes # cls_true = data.y_test_cls[0:9] cls_true = data.test.cls[0:9] # Plot the images and labels using our helper-function above. plot_images(images=images, cls_true=cls_true) if using_seq_model: model = Sequential() # Add an input layer which is similar to a feed_dict in TensorFlow. # Note that the input-shape must be a tuple containing the image-size. model.add(InputLayer(input_shape=(img_size_flat, ))) # The input is a flattened array with 784 elements, # but the convolutional layers expect images with shape (28, 28, 1) model.add(Reshape(img_shape_full)) # x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='x') # x_image = tf.reshape(x, [-1, img_size, img_size, num_channels]) # y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true') # y_true_cls = tf.argmax(y_true, axis=1) # First convolutional layer with ReLU-activation and max-pooling. model.add( Conv2D(kernel_size=5, strides=1, filters=16, padding='same', activation='relu', name='layer_conv1')) model.add(MaxPooling2D(pool_size=2, strides=2)) # layer_conv1, weights_conv1 = new_conv_layer(input=x_image, # num_input_channels=num_channels, # filter_size=filter_size1, # num_filters=num_filters1, # use_pooling=True) # print (layer_conv1) # Second convolutional layer with ReLU-activation and max-pooling. model.add( Conv2D(kernel_size=5, strides=1, filters=36, padding='same', activation='relu', name='layer_conv2')) model.add(MaxPooling2D(pool_size=2, strides=2)) # layer_conv2, weights_conv2 = new_conv_layer(input=layer_conv1, # num_input_channels=num_filters1, # filter_size=filter_size2, # num_filters=num_filters2, # use_pooling=True) # print (layer_conv2) # Flatten the 4-rank output of the convolutional layers # to 2-rank that can be input to a fully-connected / dense layer. model.add(Flatten()) # layer_flat, num_features = flatten_layer(layer_conv2) # print (layer_flat) # print (num_features) # First fully-connected / dense layer with ReLU-activation. model.add(Dense(128, activation='relu')) # layer_fc1 = new_fc_layer(input=layer_flat, # num_inputs=num_features, # num_outputs=fc_size, # use_relu=True) # print (layer_fc1) # Last fully-connected / dense layer with softmax-activation # for use in classification. model.add(Dense(num_classes, activation='softmax')) # layer_fc2 = new_fc_layer(input=layer_fc1, # num_inputs=fc_size, # num_outputs=num_classes, # use_relu=False) # print(layer_fc2) # y_pred = tf.nn.softmax(layer_fc2) # y_pred_cls = tf.argmax(y_pred, axis=1) # cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2, # labels=y_true) # cost = tf.reduce_mean(cross_entropy) from tensorflow.keras.optimizers import Adam optimizer = Adam(lr=1e-3) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) # optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost) # correct_prediction = tf.equal(y_pred_cls, y_true_cls) # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # session = tf.Session() # session.run(tf.global_variables_initializer()) model.fit(x=data.train.images, y=data.train.labels, epochs=1, batch_size=128) result = model.evaluate(x=data.test.images, y=data.test.labels) print('') for name, value in zip(model.metrics_names, result): print(name, value) print("{0}: {1:.2%}".format(model.metrics_names[1], result[1])) # `save_model` requires h5py model.save(path_model) del model if using_fun_model: # Create an input layer which is similar to a feed_dict in TensorFlow. # Note that the input-shape must be a tuple containing the image-size. inputs = Input(shape=(img_size_flat, )) # Variable used for building the Neural Network. net = inputs # The input is an image as a flattened array with 784 elements. # But the convolutional layers expect images with shape (28, 28, 1) net = Reshape(img_shape_full)(net) # First convolutional layer with ReLU-activation and max-pooling. net = Conv2D(kernel_size=5, strides=1, filters=16, padding='same', activation='relu', name='layer_conv1')(net) net = MaxPooling2D(pool_size=2, strides=2)(net) # Second convolutional layer with ReLU-activation and max-pooling. net = Conv2D(kernel_size=5, strides=1, filters=36, padding='same', activation='relu', name='layer_conv2')(net) net = MaxPooling2D(pool_size=2, strides=2)(net) # Flatten the output of the conv-layer from 4-dim to 2-dim. net = Flatten()(net) # First fully-connected / dense layer with ReLU-activation. net = Dense(128, activation='relu')(net) # Last fully-connected / dense layer with softmax-activation # so it can be used for classification. net = Dense(num_classes, activation='softmax')(net) # Output of the Neural Network. outputs = net from tensorflow.python.keras.models import Model model2 = Model(inputs=inputs, outputs=outputs) model2.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model2.fit(x=data.train.images, y=data.train.labels, epochs=1, batch_size=128) result = model2.evaluate(x=data.test.images, y=data.test.labels) print('') for name, value in zip(model2.metrics_names, result): print(name, value) print("{0}: {1:.2%}".format(model2.metrics_names[1], result[1])) # `save_model` requires h5py model2.save(path_model) if reload_model: from tensorflow.python.keras.models import load_model model3 = load_model(path_model) #images = data.x_test[0:9] images = data.test.images[0:9] #cls_true = data.y_test_cls[0:9] cls_true = data.test.labels[0:9] y_pred = model3.predict(x=images) cls_pred = np.argmax(y_pred, axis=1) plot_images(images=images, cls_true=cls_true, cls_pred=cls_pred) y_pred = model3.predict(x=data.test.images) cls_pred = np.argmax(y_pred, axis=1) cls_true = data.test.cls correct = (cls_true == cls_pred) plot_example_errors(data, cls_pred=cls_pred, correct=correct) model3.summary() # Attention: the functional and sequential models are different in # layers, for sequential ones: if reading_seq_model: layer_input = model3.layers[0] layer_conv1 = model3.layers[1] print(layer_conv1) layer_conv2 = model3.layers[3] elif reading_fun_model: layer_input = model3.layers[0] layer_conv1 = model3.layers[2] print(layer_conv1) layer_conv2 = model3.layers[4] weights_conv1 = layer_conv1.get_weights()[0] print(weights_conv1.shape) plot_conv_weights(weights=weights_conv1, input_channel=0) weights_conv2 = layer_conv2.get_weights()[0] plot_conv_weights(weights=weights_conv2, input_channel=0) image1 = data.test.images[0] plot_image(image1) # from tensorflow.keras import backend as K # output_conv1 = K.function(inputs=[layer_input.input], # outputs=[layer_conv1.output]) # print(output_conv1) # print(output_conv1([[image1]])) # layer_output1 = output_conv1([[image1]])[0] # print(layer_output1.shape) # plot_conv_output(values=layer_output1) from tensorflow.keras.models import Model output_conv2 = Model(inputs=layer_input.input, outputs=layer_conv2.output) layer_output2 = output_conv2.predict(np.array([image1])) layer_output2.shape plot_conv_output(values=layer_output2)
class ConvMnist: def __init__(self, filename=None): ''' 学習済みモデルファイルをロードする (optional) ''' self.model = None if filename is not None: print('load model: ', filename) self.model = load_model(filename) self.model.summary() def train(self): ''' 学習する ''' # MNISTの学習用データ、テストデータをロードする (x_train_org, y_train), (x_test_org, y_test) = mnist.load_data() # 学習データの前処理 # X: 6000x28x28x1のTensorに変換し、値を0~1.0に正規化 # Y: one-hot化(6000x1 -> 6000x10) x_train = np.empty((x_train_org.shape[0], x_train_org.shape[1], x_train_org.shape[2], 3)) x_train[:, :, :, 0] = x_train_org x_train[:, :, :, 1] = x_train_org x_train[:, :, :, 2] = x_train_org x_test = np.empty( (x_test_org.shape[0], x_test_org.shape[1], x_test_org.shape[2], 3)) x_test[:, :, :, 0] = x_test_org x_test[:, :, :, 1] = x_test_org x_test[:, :, :, 2] = x_test_org x_train = x_train / 255. x_test = x_test / 255. y_train = to_categorical(y_train, 10) y_test = to_categorical(y_test, 10) # 学習状態は悪用のTensorBoard設定 # tsb = TensorBoard(log_dir='./logs') # Convolutionモデルの作成 input = Input(shape=(28, 28, 3)) conv1 = Conv2D(filters=8, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu')(input) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = Conv2D(filters=4, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu')(pool1) dropout1 = Dropout(0.2)(conv2) flatten1 = Flatten()(dropout1) output = Dense(units=10, activation='softmax')(flatten1) self.model = Model(inputs=[input], outputs=[output]) self.model.summary() self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Convolutionモデルの学習 self.model.fit( x_train, y_train, batch_size=128, epochs=10, validation_split=0.2, # callbacks=[tsb], ) # 学習したモデルを使用して、テスト用データで評価する score = self.model.evaluate(x_test, y_test, verbose=0) print("test data score: ", score) def save_trained_model(self, filename): ''' 学習済みモデルをファイル(h5)に保存する ''' self.model.save(filename) def predict(self, input_image): ''' 1つのカラー入力画像(28x28のndarray)に対して、数字(0~9)を判定する ret: result, score ''' if input_image.shape != (28, 28, 3): return -1, -1 input_image = input_image.reshape(1, input_image.shape[0], input_image.shape[1], 3) input_image = input_image / 255. probs = self.model.predict(input_image) result = np.argmax(probs[0]) return result, probs[0][result]
y_train = np_utils.to_categorical(y_train, num_classes) y_test = np_utils.to_categorical(y_test, num_classes) X_train = X_train.astype("float") / 255.0 X_test = X_test.astype("float") / 255.0 model = VGG16(weights='imagenet', include_top=False, input_shape=(image_size, image_size, 3)) top_model = Sequential() top_model.add(Flatten(input_shape=model.output_shape[1:])) top_model.add(Dense(256, activation='relu')) top_model.add(Dropout(0.5)) top_model.add(Dense(num_classes, activation="softmax")) model = Model(inputs=model.input, outputs=top_model(model.output)) for layer in model.layers[:15]: layer.trainable = False opt = Adam(lr=0.0001) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) model.fit(X_train, y_train, batch_size=32, epochs=17) score = model.evaluate(X_test, y_test, batch_size=32) model.save('./vgg16_transfer.h5')
class BuildModel(): def __init__(self): self.TIMESTEP = 20 self.DATA_DIM = 1 self.model = None self.input = None self.output = None self.x_train = None self.y_train = None self.history = None self.filename = r"../../../data/all_part" self.original_data = pd.read_csv(self.filename, sep="\t") def __get_data(self): x = [] y = [] data = self.original_data.iloc[:, -1].values for i in range(len(data) - self.TIMESTEP * 2 - 1): x.append(data[i:i + self.TIMESTEP]) y.append(data[i + self.TIMESTEP:i + self.TIMESTEP * 2 + 1]) x = np.asarray(x, dtype=np.float32) x = (x - x.min()) / (x.max() - x.min()) y = np.asarray(y, dtype=np.float32) y = (y - y.min()) / (y.max() - y.min()) self.x_train = x.reshape([x.shape[0], x.shape[1], 1]) self.y_train = y.reshape([y.shape[0], 21]) self.input = Input(shape=(20, 1), name="input_tensor") def __built_multi_cell_Layer(self): """ :return: the output tensor. """ o1 = RNN(MinimalRNNCell(32, "tanh"), return_sequences=True)(self.input) o1 = BatchNormalization(1)(o1) o2 = RNN(MinimalRNNCell(32, "tanh"), return_sequences=True)(o1) o2 = BatchNormalization(1)(o2) o3 = RNN(MinimalRNNCell(32, "tanh"), return_sequences=True)(o2) o3 = BatchNormalization(1)(o3) o4 = RNN(MinimalRNNCell(32, "tanh"), return_sequences=False)(o3) o5 = Dense(21, activation="relu")(o4) self.output = o5 def build_model(self, if_load_old_model=False): if self.input is None: self.__get_data() if self.output is None: self.__built_multi_cell_Layer() if self.model is None: try: if if_load_old_model: self.model = load_model( "./model_tensorboard_3.h5", custom_objects={'MinimalRNNCell': MinimalRNNCell}) print("train prepare model.......") history = self.model.fit(self.x_train, self.y_train, 20, epochs=1000, verbose=1, callbacks=[TensorBoard('./log3')]) self.history = history.history self.model.save("./model_tensorboard_4.h5") self._write_val_loss_to_csv('./val_loss_4.csv', 'mean_absolute_error') else: if not isinstance(self.model, Sequential): print("train new model .......") print(self.input.shape, self.output.shape) self.model = Model(inputs=self.input, outputs=self.output) self.model.compile("adam", loss="mae", metrics=["mae"]) print(self.model.summary()) print(self.x_train.shape, self.y_train.shape) history = self.model.fit(self.x_train, self.y_train, 50, 1000, 1, validation_split=0.2, callbacks=[TensorBoard()]) self.history = history.history self.model.save("./model_tensorboard_1.h5") self._write_val_loss_to_csv('./val_loss_2.csv') except: raise BaseException def _write_val_loss_to_csv(self, file_name, keys): val_loss = self.history[keys] val_loss = np.asarray(val_loss, dtype=np.float32) df = pd.DataFrame(val_loss) df.to_csv(file_name, mode='a', header=False)
return_state=True) dec_output, _ = dec_gru(dec_one_hot, initial_state=enc_state) else: dec_gru = GRU(units=dec_size, return_sequences=True, return_state=True) dec_output, _ = dec_gru(dec_one_hot, initial_state=enc_state) dec_dense = Dense(spa_vocab_size, activation='softmax') pred = dec_dense(dec_output) # compile and fit model = Model(inputs=[enc_inp, dec_inp], outputs=pred) model.compile(optimizer=Adam(0.005), loss='categorical_crossentropy', metrics=['accuracy']) model.fit([enc_sequence_inps, dec_sequence_inps], dec_sequence_outputs, batch_size=128, epochs=100) # save model model.save('s2s.hd5') ################################################################################ ################################################################################ ################################################################################ # retrieve model model = load_model('s2s.hd5') encoder_inputs = model.input[0] # input_1 _, encoder_states = model.layers[4].output # gru_1 encoder_model = Model(encoder_inputs, encoder_states)
class JointBertModel1(NLUModel): def __init__(self, intents_num, bert_hub_path, num_bert_fine_tune_layers=10, is_bert=True): #self.slots_num = slots_num self.intents_num = intents_num self.bert_hub_path = bert_hub_path self.num_bert_fine_tune_layers = num_bert_fine_tune_layers self.is_bert = is_bert self.model_params = { 'intents_num': intents_num, 'bert_hub_path': bert_hub_path, 'num_bert_fine_tune_layers': num_bert_fine_tune_layers, 'is_bert': is_bert } self.build_model() self.compile_model() def compile_model(self): # Instead of `using categorical_crossentropy`, # we use `sparse_categorical_crossentropy`, which does expect integer targets. optimizer = tf.keras.optimizers.Adam(lr=5e-5) #0.001) losses = { 'intent_classifier': 'sparse_categorical_crossentropy', } loss_weights = {'intent_classifier': 1.0} metrics = {'intent_classifier': 'acc'} self.model.compile(optimizer=optimizer, loss=losses, loss_weights=loss_weights, metrics=metrics) self.model.summary() def build_model(self): in_id = Input(shape=(None, ), name='input_word_ids', dtype=tf.int32) in_mask = Input(shape=(None, ), name='input_mask', dtype=tf.int32) in_segment = Input(shape=(None, ), name='input_type_ids', dtype=tf.int32) #in_valid_positions = Input(shape=(None, self.slots_num), name='valid_positions') bert_inputs = [in_id, in_mask, in_segment] inputs = bert_inputs if self.is_bert: name = 'BertLayer' else: name = 'AlbertLayer' bert_pooled_output, bert_sequence_output = hub.KerasLayer( self.bert_hub_path, trainable=True, name=name)(bert_inputs) intents_fc = Dense(self.intents_num, activation='softmax', name='intent_classifier')(bert_pooled_output) self.model = Model(inputs=inputs, outputs=intents_fc) def fit(self, X, Y, validation_data=None, epochs=5, batch_size=32): """ X: batch of [input_ids, input_mask, segment_ids, valid_positions] """ X = (X[0], X[1], X[2]) if validation_data is not None: print("INSIDE") X_val, Y_val = validation_data validation_data = ((X_val[0], X_val[1], X_val[2]), Y_val) history = self.model.fit(X, Y, validation_data=validation_data, epochs=epochs, batch_size=batch_size) #self.visualize_metric(history.history, 'slots_tagger_loss') #self.visualize_metric(history.history, 'intent_classifier_loss') #self.visualize_metric(history.history, 'loss') #self.visualize_metric(history.history, 'intent_classifier_acc') def prepare_valid_positions(self, in_valid_positions): in_valid_positions = np.expand_dims(in_valid_positions, axis=2) in_valid_positions = np.tile(in_valid_positions, (1, 1, self.slots_num)) return in_valid_positions def predict_slots_intent(self, x, slots_vectorizer, intent_vectorizer, remove_start_end=True, include_intent_prob=False): valid_positions = x[3] x = (x[0], x[1], x[2], self.prepare_valid_positions(valid_positions)) y_slots, y_intent = self.predict(x) slots = slots_vectorizer.inverse_transform(y_slots, valid_positions) if remove_start_end: slots = [x[1:-1] for x in slots] if not include_intent_prob: intents = np.array([ intent_vectorizer.inverse_transform([np.argmax(i)])[0] for i in y_intent ]) else: intents = np.array([ (intent_vectorizer.inverse_transform([np.argmax(i)])[0], round(float(np.max(i)), 4)) for i in y_intent ]) return slots, intents def save(self, model_path): with open(os.path.join(model_path, 'params.json'), 'w') as json_file: json.dump(self.model_params, json_file) self.model.save(os.path.join(model_path, 'joint_bert_model.h5')) def load(load_folder_path): with open(os.path.join(load_folder_path, 'params.json'), 'r') as json_file: model_params = json.load(json_file) #slots_num = model_params['slots_num'] intents_num = model_params['intents_num'] bert_hub_path = model_params['bert_hub_path'] num_bert_fine_tune_layers = model_params['num_bert_fine_tune_layers'] is_bert = model_params['is_bert'] new_model = JointBertModel(intents_num, bert_hub_path, num_bert_fine_tune_layers, is_bert) new_model.model.load_weights( os.path.join(load_folder_path, 'joint_bert_model.h5')) return new_model
class Pmodel: def __init__(self, fl, mode, hparams): """ Initialises new DNN model based on input features_dim, labels_dim, hparams :param features_dim: Number of input feature nodes. Integer :param labels_dim: Number of output label nodes. Integer :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function. hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes. """ # self.features_dim = fl.features_c_dim # self.labels_dim = fl.labels_dim # Assuming that each task has only 1 dimensional output self.features_dim = fl.features_c_dim + 1 # 1 for the positional argument self.labels_dim = 1 self.numel = fl.labels.shape[1] + 1 self.hparams = hparams self.mode = mode self.normalise_labels = fl.normalise_labels self.labels_scaler = fl.labels_scaler features_in = Input(shape=(self.features_dim, ), name='main_features_c_input') # Selection of model if mode == 'ann': model = ann(self.features_dim, self.labels_dim, self.hparams) x = model(features_in) self.model = Model(inputs=features_in, outputs=x) elif mode == 'ann2': model_1 = ann(self.features_dim, 50, self.hparams) x = model_1(features_in) model_end = ann(50, 50, self.hparams) end = model_end(x) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) model_2 = ann(50, self.labels_dim - 1, self.hparams) x = model_2(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'ann3': x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(0))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) # x = BatchNormalization()(x) x = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_set_19')(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv1': x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='shared' + str(1))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) #x = BatchNormalization()(x) x = Dense(units=19, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_set_19')(x) #x = BatchNormalization()(x) x = Reshape(target_shape=(19, 1))(x) x = Conv1D(filters=hparams['filters'], kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = BatchNormalization()(x) x = Conv1D(filters=hparams['filters'] * 2, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = Conv1D(filters=hparams['filters'] * 4, kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(19, ))(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv2': x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=80, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Reshape(target_shape=(80, 1))(x) x = Conv1D(filters=8, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) x = Conv1D(filters=16, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(20, ))(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'lstm': x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) x = RepeatVector(n=20)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = TimeDistributed(Dense(1))(x) x = Reshape(target_shape=(20, ))(x) ''' x = Permute((2,1))(x) x = GlobalAveragePooling1D()(x) ''' self.model = Model(inputs=features_in, outputs=[end_node, x]) optimizer = Adam(clipnorm=1) self.model.compile(optimizer=optimizer, loss='mean_squared_error') #self.model.summary() def train_model(self, fl, i_fl, save_name='mt.h5', save_dir='./save/models/', save_mode=False, plot_name=None): # Training model training_features = fl.features_c_norm val_features = i_fl.features_c_norm if self.normalise_labels: training_labels = fl.labels_norm val_labels = i_fl.labels_norm else: training_labels = fl.labels val_labels = i_fl.labels p_features = [] for features in training_features.tolist(): for idx in list(range(1, self.numel)): p_features.append(features + [idx]) training_features = np.array(p_features) training_labels = training_labels.flatten()[:, None] # Plotting if plot_name: p_features = [] for features in val_features.tolist(): for idx in list(range(1, self.numel)): p_features.append(features + [idx]) val_features = np.array(p_features) val_labels = val_labels.flatten()[:, None] history = self.model.fit(training_features, training_labels, validation_data=(val_features, val_labels), epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) # Debugging check to see features and prediction # pprint.pprint(training_features) # pprint.pprint(self.model.predict(training_features)) # pprint.pprint(training_labels) # summarize history for accuracy plt.semilogy(history.history['loss'], label=['train']) plt.semilogy(history.history['val_loss'], label=['test']) plt.plot([], [], ' ', label='Final train: {:.3e}'.format( history.history['loss'][-1])) plt.plot([], [], ' ', label='Final val: {:.3e}'.format( history.history['val_loss'][-1])) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(loc='upper right') plt.savefig(plot_name, bbox_inches='tight') plt.close() else: history = self.model.fit(training_features, training_labels, epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) # Saving Model if save_mode: self.model.save(save_dir + save_name) return self.model, history def eval(self, eval_fl): eval_features = eval_fl.features_c_norm predictions = [] for features in eval_features.tolist(): single_expt = [] for idx in list(range(1, self.numel)): single_expt.append( self.model.predict(np.array(features + [idx])[None, ...])[0][0]) predictions.append(single_expt) predictions = np.array(predictions) if self.normalise_labels: mse_norm = mean_squared_error(eval_fl.labels_norm, predictions) mse = mean_squared_error( eval_fl.labels, self.labels_scaler.inverse_transform(predictions)) else: mse = mean_squared_error(eval_fl.labels, predictions) mse_norm = mse return predictions, mse, mse_norm
train_data_gen = train_image_generator.flow_from_directory(batch_size=16, directory=train_dir, shuffle=True, target_size=(150, 150), class_mode='binary') test_data_gen = test_image_generator.flow_from_directory(batch_size=16, directory=test_dir, target_size=(150, 150), class_mode='binary') # 모델 학습 history = new_model.fit(train_data_gen, epochs=5, validation_data=test_data_gen) new_model.save("newVGG16") # 최종 결과 리포트 acc = history.history['accuracy'] val_acc = history.history['val_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(len(acc)) from matplotlib import pyplot as plt plt.plot(epochs, acc, 'r', label='Training acc') plt.plot(epochs, val_acc, 'b', label='testing acc')
class JointBertModel(NLUModel): def __init__(self, slots_num, intents_num, sess, num_bert_fine_tune_layers=12): self.slots_num = slots_num self.intents_num = intents_num self.num_bert_fine_tune_layers = num_bert_fine_tune_layers self.model_params = { 'slots_num': slots_num, 'intents_num': intents_num, 'num_bert_fine_tune_layers': num_bert_fine_tune_layers } self.build_model() self.compile_model() self.initialize_vars(sess) def build_model(self): in_id = Input(shape=(None, ), name='input_ids') in_mask = Input(shape=(None, ), name='input_masks') in_segment = Input(shape=(None, ), name='segment_ids') in_valid_positions = Input(shape=(None, self.slots_num), name='valid_positions') bert_inputs = [in_id, in_mask, in_segment, in_valid_positions] # the output of trained Bert bert_pooled_output, bert_sequence_output = BertLayer( n_fine_tune_layer=self.num_bert_fine_tune_layers, name='BertLayer')(bert_inputs) # add the additional layer for intent classification and slot filling intents_drop = Dropout(rate=0.1)(bert_pooled_output) intents_fc = Dense(self.intents_num, activation='softmax', name='intent_classifier')(intents_drop) slots_drop = Dropout(rate=0.1)(bert_sequence_output) slots_output = TimeDistributed( Dense(self.slots_num, activation='softmax'))(slots_drop) slots_output = Multiply(name='slots_tagger')( [slots_output, in_valid_positions]) self.model = Model(inputs=bert_inputs, outputs=[slots_output, intents_fc]) def compile_model(self): optimizer = tf.keras.optimizers.Adam(lr=5e-5) # if the targets are one-hot labels, using 'categorical_crossentropy'; while if targets are integers, using 'sparse_categorical_crossentropy' losses = { 'slots_tagger': 'sparse_categorical_crossentropy', 'intent_classifier': 'sparse_categorical_crossentropy' } ## loss_weights: to weight the loss contributions of different model outputs. loss_weights = {'slots_tagger': 3.0, 'intent_classifier': 1.0} metrics = {'intent_classifier': 'acc'} self.model.compile(optimizer=optimizer, loss=losses, loss_weights=loss_weights, metrics=metrics) self.model.summary() def fit(self, X, Y, validation_data=None, epochs=5, batch_size=32): X = (X[0], X[1], X[2], self.prepare_valid_positions(X[3])) if validation_data is not None: X_val, Y_val = validation_data validation_data = ((X_val[0], X_val[1], X_val[2], self.prepare_valid_positions(X_val[3])), Y_val) history = self.model.fit(X, Y, validation_data=validation_data, epochs=epochs, batch_size=batch_size) self.visualize_metric(history.history, 'slots_tagger_loss') self.visualize_metric(history.history, 'intent_classifier_loss') self.visualize_metric(history.history, 'loss') self.visualize_metric(history.history, 'intent_classifier_acc') def prepare_valid_positions(self, in_valid_positions): ## the input is 2-D in_valid_position in_valid_positions = np.expand_dims( in_valid_positions, axis=2) ## expand the shape of the array to axis=2 ## 3-D in_valid_position in_valid_positions = np.tile(in_valid_positions, (1, 1, self.slots_num)) ## return in_valid_positions def predict_slots_intent(self, x, slots_vectorizer, intent_vectorizer, remove_start_end=True): valid_positions = x[3] x = (x[0], x[1], x[2], self.prepare_valid_positions(valid_positions)) y_slots, y_intent = self.predict(x) ### get the real slot-tags using 'inverse_transform' of slots-vectorizer slots = slots_vectorizer.inverse_transform(y_slots, valid_positions) if remove_start_end: ## remove the first '[CLS]' and the last '[SEP]' tokens. slots = np.array([x[1:-1] for x in slots]) ### get the real intents using 'inverse-transform' of intents-vectorizer intents = np.array([ intent_vectorizer.inverse_transform([np.argmax(y_intent[i])])[0] for i in range(y_intent.shape[0]) ]) return slots, intents def initialize_vars(self, sess): sess.run(tf.compat.v1.local_variables_initializer()) sess.run(tf.compat.v1.global_variables_initializer()) K.set_session(sess) def save(self, model_path): with open(os.path.join(model_path, 'params.json'), 'w') as json_file: json.dump(self.model_params, json_file) self.model.save(os.path.join(model_path, 'joint_bert_model.h5')) def load(load_folder_path, sess): with open(os.path.join(load_folder_path, 'params.json'), 'r') as json_file: model_params = json.load(json_file) slots_num = model_params['slots_num'] intents_num = model_params['intents_num'] num_bert_fine_tune_layers = model_params['num_bert_fine_tune_layers'] new_model = JointBertModel(slots_num, intents_num, sess, num_bert_fine_tune_layers) new_model.model.load_weights( os.path.join(load_folder_path, 'joint_bert_model.h5')) return new_model
x = Flatten()(x) encoded = Dense(units=10)(x) y = Dense(units=1152, activation='relu')(encoded) y = Reshape((3, 3, 128))(y) y = Conv2DTranspose(filters=64, kernel_size=(3, 3), strides=(2, 2), padding='valid', name ='decoder_deconv1', activation='relu')(y) y = Conv2DTranspose(filters=32, kernel_size=(5, 5), strides=(2, 2), padding='same', name ='decoder_deconv2', activation='relu')(y) decoded_image = Conv2DTranspose(filters=1, kernel_size=(5, 5), strides=(2, 2), padding='same', name ='decoder_deconv3', activation='relu')(y) CAE = Model(inputs = input_image, outputs = decoded_image, name = 'CAE') # In[4]: tb = TensorBoard(log_dir='logs', write_graph=True) mc = ModelCheckpoint(filepath='models/top_weights.h5', monitor='acc', save_best_only='True', save_weights_only='True', verbose=1) es = EarlyStopping(monitor='loss', patience=15, verbose=1) rlr = ReduceLROnPlateau(monitor='loss') callbacks = [tb, mc, es, rlr] CAE.compile(optimizer='adam', loss='mse', metrics=['accuracy']) # In[ ]: # CAE.load_weights('models/top_weights.h5') # CAE.save('CAE.h5') CAE.fit(X, X, epochs=1000, batch_size=256, callbacks=callbacks)
def main(batch_size=150, p_drop=0.4, latent_dim=2, cpl_fn='minvar', cpl_str=1e-3, n_epoch=500, run_iter=0, model_id='cnn', exp_name='MNIST'): fileid = model_id + \ '_cf_' + cpl_fn + \ '_cs_' + str(cpl_str) + \ '_pd_' + str(p_drop) + \ '_bs_' + str(batch_size) + \ '_ld_' + str(latent_dim) + \ '_ne_' + str(n_epoch) + \ '_ri_' + str(run_iter) fileid = fileid.replace('.', '-') train_dat, train_lbl, val_dat, val_lbl, dir_pth = dataIO(exp_name=exp_name) #Architecture parameters ------------------------------ input_dim = train_dat.shape[1] n_arms = 2 fc_dim = 49 #Model definition ------------------------------------- M = {} M['in_ae'] = Input(shape=(28, 28, 1), name='in_ae') for i in range(n_arms): M['co1_ae_' + str(i)] = Conv2D(10, (3, 3), activation='relu', padding='same', name='co1_ae_' + str(i))(M['in_ae']) M['mp1_ae_' + str(i)] = MaxPooling2D( (2, 2), padding='same', name='mp1_ae_' + str(i))(M['co1_ae_' + str(i)]) M['dr1_ae_' + str(i)] = Dropout(rate=p_drop, name='dr1_ae_' + str(i))( M['mp1_ae_' + str(i)]) M['fl1_ae_' + str(i)] = Flatten(name='fl1_ae_' + str(i))(M['dr1_ae_' + str(i)]) M['fc01_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc01_ae_' + str(i))(M['fl1_ae_' + str(i)]) M['fc02_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc02_ae_' + str(i))(M['fc01_ae_' + str(i)]) M['fc03_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc03_ae_' + str(i))(M['fc02_ae_' + str(i)]) if cpl_fn in ['mse']: M['ld_ae_' + str(i)] = Dense(latent_dim, activation='linear', name='ld_ae_' + str(i))(M['fc03_ae_' + str(i)]) elif cpl_fn in ['mseBN', 'fullcov', 'minvar']: M['fc04_ae_' + str(i)] = Dense(latent_dim, activation='linear', name='fc04_ae_' + str(i))( M['fc03_ae_' + str(i)]) M['ld_ae_' + str(i)] = BatchNormalization( scale=False, center=False, epsilon=1e-10, momentum=0.99, name='ld_ae_' + str(i))(M['fc04_ae_' + str(i)]) M['fc05_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc05_ae_' + str(i))(M['ld_ae_' + str(i)]) M['fc06_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc06_ae_' + str(i))(M['fc05_ae_' + str(i)]) M['fc07_ae_' + str(i)] = Dense(fc_dim * 4, activation='relu', name='fc07_ae_' + str(i))(M['fc06_ae_' + str(i)]) M['re1_ae_' + str(i)] = Reshape( (14, 14, 1), name='re1_ae_' + str(i))(M['fc07_ae_' + str(i)]) M['us1_ae_' + str(i)] = UpSampling2D( (2, 2), name='us1_ae_' + str(i))(M['re1_ae_' + str(i)]) M['co2_ae_' + str(i)] = Conv2D(10, (3, 3), activation='relu', padding='same', name='co2_ae_' + str(i))(M['us1_ae_' + str(i)]) M['ou_ae_' + str(i)] = Conv2D(1, (3, 3), activation='sigmoid', padding='same', name='ou_ae_' + str(i))(M['co2_ae_' + str(i)]) cplAE = Model(inputs=M['in_ae'], outputs=[M['ou_ae_' + str(i)] for i in range(n_arms)] + [M['ld_ae_' + str(i)] for i in range(n_arms)]) if cpl_fn in ['mse', 'mseBN']: cpl_fn_loss = mse elif cpl_fn == 'fullcov': cpl_fn_loss = fullcov elif cpl_fn == 'minvar': cpl_fn_loss = minvar assert type(cpl_fn) #Create loss dictionary loss_dict = { 'ou_ae_0': mse(M['in_ae'], M['ou_ae_0']), 'ou_ae_1': mse(M['in_ae'], M['ou_ae_1']), 'ld_ae_0': cpl_fn_loss(M['ld_ae_0'], M['ld_ae_1']), 'ld_ae_1': cpl_fn_loss(M['ld_ae_1'], M['ld_ae_0']) } #Loss weights dictionary loss_wt_dict = { 'ou_ae_0': 1.0, 'ou_ae_1': 1.0, 'ld_ae_0': cpl_str, 'ld_ae_1': cpl_str } #Add loss definitions to the model cplAE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict) #Data feed train_input_dict = {'in_ae': train_dat} val_input_dict = {'in_ae': val_dat} train_output_dict = { 'ou_ae_0': train_dat, 'ou_ae_1': train_dat, 'ld_ae_0': np.empty((train_dat.shape[0], latent_dim)), 'ld_ae_1': np.empty((train_dat.shape[0], latent_dim)) } val_output_dict = { 'ou_ae_0': val_dat, 'ou_ae_1': val_dat, 'ld_ae_0': np.empty((val_dat.shape[0], latent_dim)), 'ld_ae_1': np.empty((val_dat.shape[0], latent_dim)) } log_cb = CSVLogger(filename=dir_pth['logs'] + fileid + '.csv') #Train model cplAE.fit(train_input_dict, train_output_dict, validation_data=(val_input_dict, val_output_dict), batch_size=batch_size, initial_epoch=0, epochs=n_epoch, verbose=2, shuffle=True, callbacks=[log_cb]) #Saving weights cplAE.save_weights(dir_pth['result'] + fileid + '-modelweights' + '.h5') matsummary = {} #Trained model prediction for i in range(n_arms): encoder = Model(inputs=M['in_ae'], outputs=M['ld_ae_' + str(i)]) matsummary['z_val_' + str(i)] = encoder.predict({'in_ae': val_dat}) matsummary['z_train_' + str(i)] = encoder.predict({'in_ae': train_dat}) matsummary['train_lbl'] = train_lbl matsummary['val_lbl'] = val_lbl sio.savemat(dir_pth['result'] + fileid + '-summary.mat', matsummary) return
hidden1 = LSTM(32, return_sequences=True, name='firstLSTMLayer')(visible) hidden2 = LSTM(16, name='secondLSTMLayer',return_sequences=True)(hidden1) #left branch decides second agent action hiddenLeft = LSTM(10, name='leftBranch')(hidden2) agent2 = Dense(5,activation='softmax',name='agent2classifier')(hiddenLeft) #right branch decides third agent action hiddenRight = LSTM(10, name='rightBranch')(hidden2) agent3 = Dense(5,activation='softmax',name='agent3classifier')(hiddenRight) model = Model(inputs=visible,outputs=[agent2,agent3]) model.compile(optimizer='adam', loss={'agent2classifier': 'categorical_crossentropy', 'agent3classifier': 'categorical_crossentropy'}, metrics={'agent2classifier': ['acc'], 'agent3classifier': ['acc']}) print(model.summary()) history = model.fit(trainX, y={'agent2classifier': trainY1,'agent3classifier':trainY2}, epochs=3000, batch_size=5000, verbose=2, validation_data = (valX, {'agent2classifier': valY1,'agent3classifier':valY2}),shuffle=False) model.save('Agent0ObsNetwork.keras') #model = load_model("actionMultiClassNetwork.keras") np.save("agent0obs_history.npy", history.history, allow_pickle=True)
[shared_model(left_input), shared_model(right_input)]) model = Model(inputs=[left_input, right_input], outputs=[malstm_distance]) model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.SGD(), metrics=['accuracy']) model.summary() shared_model.summary() batch_size = 1024 * 2 n_epoch = 50 training_start_time = time() malstm_trained = model.fit( [X_train['left'], X_train['right']], Y_train, batch_size=batch_size, epochs=n_epoch, validation_data=([X_validation['left'], X_validation['right']], Y_validation)) training_end_time = time() print("Training time finished.\n%d epochs in %12.2f" % (n_epoch, training_end_time - training_start_time)) model.save('../data/model.h5') #======== plt.subplot(211) plt.plot(malstm_trained.history['acc']) plt.plot(malstm_trained.history['val_acc']) plt.title('Model Accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch')
def main( batch_size=16, episode_length=16, filters=16, width=64, height=64, memory_size=32, ): # Prevent TensorFlow from allocating all available GPU memory config = tf.ConfigProto() config.gpu_options.allow_growth = True tf.keras.backend.set_session(tf.Session(config=config)) input_layer = Input([episode_length, width, height, 1]) layer = input_layer layer = Conv3D(filters=filters, kernel_size=3, strides=(1, 2, 2), padding="same")(layer) layer = Conv3D(filters=filters, kernel_size=3, strides=(1, 2, 2), padding="same")(layer) layer = Conv3D(filters=filters, kernel_size=3, strides=(1, 2, 2), padding="same")(layer) layer = Conv3D(filters=filters, kernel_size=3, strides=(1, 2, 2), padding="same")(layer) layer = Conv3D(filters=filters, kernel_size=3, strides=(1, 2, 2), padding="same")(layer) tmp_shape = layer.shape.as_list()[1:] code_size = tmp_shape[1] * tmp_shape[2] * tmp_shape[3] layer = Reshape([episode_length, code_size])(layer) layer = KanervaMemory(code_size=code_size, memory_size=memory_size)(layer) layer = Reshape(tmp_shape)(layer) layer = Conv3DTranspose(filters=filters, kernel_size=3, strides=(1, 2, 2), padding="same")(layer) layer = Conv3DTranspose(filters=filters, kernel_size=3, strides=(1, 2, 2), padding="same")(layer) layer = Conv3DTranspose(filters=filters, kernel_size=3, strides=(1, 2, 2), padding="same")(layer) layer = Conv3DTranspose(filters=filters, kernel_size=3, strides=(1, 2, 2), padding="same")(layer) layer = Conv3DTranspose(filters=filters, kernel_size=3, strides=(1, 2, 2), padding="same")(layer) layer = Conv3DTranspose(filters=1, kernel_size=1, strides=1, padding="same", activation="sigmoid")(layer) output_layer = layer model = Model(inputs=input_layer, outputs=output_layer) model.compile("adam", loss="mse", metrics=["mse"]) model.summary() dataset_input_tensor = tf.random.normal( shape=[episode_length, width, height, 1]) dataset_input_tensor = tf.clip_by_value(dataset_input_tensor, 0.0, 1.0) dataset = tf.data.Dataset.from_tensors(dataset_input_tensor) dataset = dataset.repeat(-1) dataset = dataset.map(lambda x: (x, x)) dataset = dataset.batch(batch_size) log_dir = "../logs/KanervaMachine/log_{}".format(int(time())) os.makedirs(log_dir) tensorboard = TensorBoard(log_dir=log_dir, update_freq="batch") model.fit(dataset, callbacks=[tensorboard], steps_per_epoch=500, epochs=100)
class Kmodel: def __init__(self, fl, mode, hparams): """ Initialises new DNN model based on input features_dim, labels_dim, hparams :param features_dim: Number of input feature nodes. Integer :param labels_dim: Number of output label nodes. Integer :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function. hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes. """ self.features_dim = fl.features_c_dim self.labels_dim = fl.labels_dim # Assuming that each task has only 1 dimensional output self.hparams = hparams self.mode = mode self.normalise_labels = fl.normalise_labels self.labels_scaler = fl.labels_scaler features_in = Input(shape=(self.features_dim, ), name='main_features_c_input') # Selection of model if mode == 'ann': model = ann(self.features_dim, self.labels_dim, self.hparams) x = model(features_in) self.model = Model(inputs=features_in, outputs=x) elif mode == 'ann2': model_1 = ann(self.features_dim, 50, self.hparams) x = model_1(features_in) model_end = ann(50, 50, self.hparams) end = model_end(x) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) model_2 = ann(50, self.labels_dim - 1, self.hparams) x = model_2(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'ann3': x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(0))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) # x = BatchNormalization()(x) x = Dense(units=self.labels_dim, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Final')(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv1': if fl.label_type == 'gf20': final_dim = 20 else: final_dim = 19 x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='shared' + str(1))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) #x = BatchNormalization()(x) x = Dense(units=final_dim, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_set_19')(x) #x = BatchNormalization()(x) x = Reshape(target_shape=(final_dim, 1))(x) x = Conv1D(filters=hparams['filters'], kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = BatchNormalization()(x) x = Conv1D(filters=hparams['filters'] * 2, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = Conv1D(filters=hparams['filters'] * 4, kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(final_dim, ))(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv2': x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=80, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Reshape(target_shape=(80, 1))(x) x = Conv1D(filters=8, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) x = Conv1D(filters=16, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(20, ))(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'lstm': x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) x = RepeatVector(n=20)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = TimeDistributed(Dense(1))(x) x = Reshape(target_shape=(20, ))(x) ''' x = Permute((2,1))(x) x = GlobalAveragePooling1D()(x) ''' self.model = Model(inputs=features_in, outputs=[end_node, x]) optimizer = Adam(learning_rate=hparams['learning_rate'], clipnorm=1) def weighted_mse(y_true, y_pred): loss_weights = np.sqrt(np.arange(1, 20)) #loss_weights = np.arange(1, 20) return K.mean(K.square(y_pred - y_true) * loss_weights, axis=-1) def haitao_error(y_true, y_pred): diff = K.abs( (y_true - y_pred) / K.reshape(K.clip(K.abs(y_true[:, -1]), K.epsilon(), None), (-1, 1))) return 100. * K.mean(diff, axis=-1) if hparams['loss'] == 'mape': self.model.compile(optimizer=optimizer, loss=MeanAbsolutePercentageError()) elif hparams['loss'] == 'haitao': self.model.compile(optimizer=optimizer, loss=haitao_error) elif hparams['loss'] == 'mse': self.model.compile(optimizer=optimizer, loss='mean_squared_error') #self.model.summary() def train_model(self, fl, i_fl, save_name='mt.h5', save_dir='./save/models/', save_mode=False, plot_name=None): # Training model training_features = fl.features_c_norm val_features = i_fl.features_c_norm if self.normalise_labels: training_labels = fl.labels_norm val_labels = i_fl.labels_norm else: training_labels = fl.labels val_labels = i_fl.labels # Plotting if plot_name: history = self.model.fit(training_features, training_labels, validation_data=(val_features, val_labels), epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) # Debugging check to see features and prediction # pprint.pprint(training_features) # pprint.pprint(self.model.predict(training_features)) # pprint.pprint(training_labels) # summarize history for accuracy plt.semilogy(history.history['loss'], label=['train']) plt.semilogy(history.history['val_loss'], label=['test']) plt.plot([], [], ' ', label='Final train: {:.3e}'.format( history.history['loss'][-1])) plt.plot([], [], ' ', label='Final val: {:.3e}'.format( history.history['val_loss'][-1])) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(loc='upper right') plt.savefig(plot_name, bbox_inches='tight') plt.close() else: history = self.model.fit(training_features, training_labels, epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) # Saving Model if save_mode: self.model.save(save_dir + save_name) return self.model, history def eval(self, eval_fl): features = eval_fl.features_c_norm predictions = self.model.predict(features) if self.normalise_labels: mse_norm = mean_squared_error(eval_fl.labels_norm, predictions) mse = mean_squared_error( eval_fl.labels, self.labels_scaler.inverse_transform(predictions)) else: mse = mean_squared_error(eval_fl.labels, predictions) mse_norm = mse return predictions, mse, mse_norm
# conv_3 = MaxPooling2D(pool_size=2, strides=2)(conv_3) # # merged = concatenate([conv_2, conv_3], axis=1) # net = Flatten()(merged) # net = Dense(128, activation='relu')(net) # net = Dense(num_classes, activation='softmax')(net) # # outputs = net # Model Compilation model = Model(inputs=inputs, outputs=outputs) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) # Training model.fit(x=data.train.images, y=data.train.labels, epochs=1, batch_size=128) # Evaluation result = model.evaluate(x=data.test.images, y=data.test.labels) for name, value in zip(model.metrics_names, result): print(name, value) y_pred = model.predict(x=data.test.images) print(y_pred) cls_pred = np.argmax(y_pred, axis=1) def plot_example_errors(cls_pred): incorrect = (cls_pred != data.test.cls) images = data.test.images[incorrect]
class MTmodel: def __init__(self, fl, mode, hparams, labels_norm=True): """ Initialises new DNN model based on input features_dim, labels_dim, hparams :param features_dim: Number of input feature nodes. Integer :param labels_dim: Number of output label nodes. Integer :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function. hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes. """ self.features_dim = fl.features_c_dim self.labels_dim = [ 1 for _ in range(fl.labels_dim) ] # Assuming that each task has only 1 dimensional output self.hparams = hparams self.labels_norm = labels_norm features_in = Input(shape=(self.features_dim, ), name='main_features_c_input') # Selection of model if mode == 'hps': hps_model = hps(self.features_dim, self.labels_dim, self.hparams) x = hps_model(features_in) elif mode == 'cs': cs_model = cross_stitch(self.features_dim, self.labels_dim, self.hparams) x = cs_model(features_in) self.model = Model(inputs=features_in, outputs=x) self.model.compile(optimizer=hparams['optimizer'], loss='mean_squared_error') def train_model(self, fl, i_fl, save_name='mt.h5', save_dir='./save/models/', save_mode=False, plot_name=None): # Training model training_features = fl.features_c_norm if self.labels_norm: training_labels = fl.labels_norm.T.tolist() else: training_labels = fl.labels.T.tolist() if plot_name: history = self.model.fit(training_features, training_labels, epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) # Debugging check to see features and prediction # pprint.pprint(training_features) # pprint.pprint(self.model.predict(training_features)) # pprint.pprint(training_labels) # Saving Model # summarize history for accuracy plt.plot(history.history['loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train'], loc='upper left') plt.savefig(plot_name, bbox_inches='tight') plt.close() else: self.model.fit(training_features, training_labels, epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) if save_mode: self.model.save(save_dir + save_name) return self.model def eval(self, eval_fl): features = eval_fl.features_c_norm if self.labels_norm: labels = eval_fl.labels_norm.tolist() labels_actual = eval_fl.labels.tolist() predictions = self.model.predict(features) predictions = [prediction.T for prediction in predictions] predictions = np.vstack(predictions).T predictions = predictions.tolist() predictions_actual = eval_fl.labels_scaler.inverse_transform( predictions) # Calculating metrics mse = mean_squared_error(labels_actual, predictions_actual) mse_norm = mean_squared_error(labels, predictions) else: labels = eval_fl.labels.tolist() predictions = self.model.predict(features) predictions = [prediction.T for prediction in predictions] predictions = np.vstack(predictions).T predictions_actual = predictions.tolist() mse = mean_squared_error(labels, predictions_actual) mse_norm = mse return predictions_actual, mse, mse_norm
def fit(self, learning_rate=1e-4, epochs=5, activation='relu', dropout=0, hidden_size=1024, nb_layers=1, include_class_weight=False, batch_size=20, save_model=False, verbose=True, fine_tuning=False, NB_IV3_LAYERS_TO_FREEZE=279, use_TPU=False, transfer_model='Inception', min_accuracy=None, extract_SavedModel=False): #read the tfrecords data TRAIN_DATA = tf.data.TFRecordDataset(['train.tfrecord']) VAL_DATA = tf.data.TFRecordDataset(['val.tfrecord']) print('Read the TFrecords') if transfer_model in ['Inception', 'Xception', 'Inception_Resnet']: target_size = (299, 299) else: target_size = (224, 224) #We expect the classes to be the name of the folders in the training set self.categories = os.listdir(TRAIN_DIR) """ helper functions to load tfrecords. Strongly inspired by https://colab.research.google.com/github/GoogleCloudPlatform/training-data-analyst/blob/master/courses/fast-and-lean-data-science/07_Keras_Flowers_TPU_playground.ipynb#scrollTo=LtAVr-4CP1rp """ def read_tfrecord(example): features = { "image": tf.FixedLenFeature( (), tf.string), # tf.string means byte string "label": tf.FixedLenFeature((), tf.int64) } example = tf.parse_single_example(example, features) image = tf.image.decode_jpeg(example['image']) image = tf.cast( image, tf.float32) / 255.0 # convert image to floats in [0, 1] range image = tf.image.resize_images( image, size=[*target_size], method=tf.image.ResizeMethod.BILINEAR) feature = tf.reshape(image, [*target_size, 3]) label = tf.cast(example['label'], tf.int32) # byte string target = tf.one_hot(label, len(self.categories)) return feature, target def get_training_dataset(): dataset = TRAIN_DATA.map(read_tfrecord) dataset = dataset.cache() dataset = dataset.repeat() dataset = dataset.shuffle(1000) dataset = dataset.batch( batch_size, drop_remainder=True) # drop_remainder needed on TPU dataset = dataset.prefetch( -1 ) # prefetch next batch while training (-1: autotune prefetch buffer size) return dataset def get_validation_dataset(): dataset = VAL_DATA.map(read_tfrecord) dataset = dataset.cache() dataset = dataset.repeat() dataset = dataset.shuffle(1000) dataset = dataset.batch( batch_size, drop_remainder=True) # drop_remainder needed on TPU dataset = dataset.prefetch( -1 ) # prefetch next batch while training (-1: autotune prefetch buffer size) return dataset #if we want stop training when no sufficient improvement in accuracy has been achieved if min_accuracy is not None: callback = EarlyStopping(monitor='categorical_accuracy', baseline=min_accuracy) callback = [callback] else: callback = None #load the pretrained model, without the classification (top) layers if transfer_model == 'Xception': base_model = Xception(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) elif transfer_model == 'Inception_Resnet': base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) elif transfer_model == 'Resnet': base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) else: base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) #Add the classification layers using Keras functional API x = base_model.output x = GlobalAveragePooling2D()(x) for _ in range(nb_layers): x = Dense(hidden_size, activation=activation)( x) #Hidden layer for classification if dropout > 0: x = Dropout(rate=dropout)(x) predictions = Dense(len(self.categories), activation='softmax')(x) #Output layer model = Model(inputs=base_model.input, outputs=predictions) #Set only the top layers as trainable (if we want to do fine-tuning, #we can train the base layers as a second step) for layer in base_model.layers: layer.trainable = False #Define the optimizer and the loss, and compile the model loss = 'categorical_crossentropy' if use_TPU: #if we want to try out the TPU, it looks like we currently need to use #tensorflow optimizers...see https://stackoverflow.com/questions/52940552/valueerror-operation-utpu-140462710602256-varisinitializedop-has-been-marked #...and https://www.youtube.com/watch?v=jgNwywYcH4w optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) tpu_optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) model.compile(optimizer=tpu_optimizer, loss=loss, metrics=['categorical_accuracy']) TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR'] model = tf.contrib.tpu.keras_to_tpu_model( model, strategy=tf.contrib.tpu.TPUDistributionStrategy( tf.contrib.cluster_resolver.TPUClusterResolver( TPU_WORKER))) tf.logging.set_verbosity(tf.logging.INFO) else: optimizer = Adam(lr=learning_rate) model.compile(optimizer=optimizer, loss=loss, metrics=['categorical_accuracy']) #if we want to weight the classes given the imbalanced number of images if include_class_weight: from sklearn.utils.class_weight import compute_class_weight cls_train = self.categories class_weight = compute_class_weight(class_weight='balanced', classes=np.unique(cls_train), y=cls_train) else: class_weight = None steps_per_epoch = int( sum([ len(files) for r, d, files in os.walk(parentdir + '/data/image_dataset/train') ]) / batch_size) validation_steps = int( sum([ len(files) for r, d, files in os.walk(parentdir + '/data/image_dataset/val') ]) / batch_size) #Fit the model if use_TPU: history = model.fit(get_training_dataset, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset, validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) else: history = model.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset(), validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) #Fine-tune the model, if we wish so if fine_tuning and not model.stop_training: print('============') print('Begin fine-tuning') print('============') #declare the first layers as trainable for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]: layer.trainable = False for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]: layer.trainable = True model.compile(optimizer=Adam(lr=learning_rate * 0.1), loss=loss, metrics=['categorical_accuracy']) #Fit the model if use_TPU: history = model.fit(get_training_dataset, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset, validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) else: history = model.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset(), validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) #Evaluate the model, just to be sure self.fitness = history.history['val_categorical_accuracy'][-1] #Save the model if save_model: if not os.path.exists(parentdir + '/data/trained_models'): os.makedirs(parentdir + '/data/trained_models') model.save(parentdir + '/data/trained_models/trained_model.h5') print('Model saved!') #save model in production format if extract_SavedModel: export_path = "./image_classifier/1/" with K.get_session() as sess: tf.saved_model.simple_save( sess, export_path, inputs={'input_image': model.input}, outputs={t.name: t for t in model.outputs}) else: self.model = model del history del model
class JointBertCRFModel(JointBertModel): def __init__(self, slots_num, intents_num, bert_hub_path, sess, num_bert_fine_tune_layers=10, is_bert=True, is_crf=True, learning_rate=5e-5): super(JointBertCRFModel, self).__init__(slots_num, intents_num, bert_hub_path, sess, num_bert_fine_tune_layers, is_bert, is_crf, learning_rate) def compile_model(self): # Instead of `using categorical_crossentropy`, # we use `sparse_categorical_crossentropy`, which does expect integer targets. optimizer = tf.keras.optimizers.Adam(lr=self.learning_rate) losses = { 'slots_tagger': self.crf.loss, 'intent_classifier': 'sparse_categorical_crossentropy', } loss_weights = {'slots_tagger': 3.0, 'intent_classifier': 1.0} metrics = {'intent_classifier': 'acc'} self.model.compile(optimizer=optimizer, loss=losses, loss_weights=loss_weights, metrics=metrics) self.model.summary() def build_model(self): in_id = Input(shape=(None,), name='input_ids') in_mask = Input(shape=(None,), name='input_masks') in_segment = Input(shape=(None,), name='segment_ids') in_valid_positions = Input(shape=(None, self.slots_num), name='valid_positions') sequence_lengths = Input(shape=(1), dtype='int32', name='sequence_lengths') bert_inputs = [in_id, in_mask, in_segment, in_valid_positions] if self.is_bert: bert_pooled_output, bert_sequence_output = BertLayer( n_fine_tune_layers=self.num_bert_fine_tune_layers, bert_path=self.bert_hub_path, pooling='mean', name='BertLayer')(bert_inputs) else: bert_pooled_output, bert_sequence_output = AlbertLayer( fine_tune=True if self.num_bert_fine_tune_layers > 0 else False, albert_path=self.bert_hub_path, pooling='mean', name='AlbertLayer')(bert_inputs) intents_fc = Dense(self.intents_num, activation='softmax', name='intent_classifier')(bert_pooled_output) self.crf = CRFLayer(name='slots_tagger') slots_output = self.crf(inputs=[bert_sequence_output, sequence_lengths]) self.model = Model(inputs=bert_inputs + [sequence_lengths], outputs=[slots_output, intents_fc]) def fit(self, X, Y, validation_data=None, epochs=5, batch_size=32): """ X: batch of [input_ids, input_mask, segment_ids, valid_positions] """ X = (X[0], X[1], X[2], self.prepare_valid_positions(X[3]), X[4]) if validation_data is not None: X_val, Y_val = validation_data validation_data = ((X_val[0], X_val[1], X_val[2], self.prepare_valid_positions(X_val[3]), X_val[4]), Y_val) self.model.fit(X, Y, validation_data=validation_data, epochs=epochs, batch_size=batch_size) def predict_slots_intent(self, x, slots_vectorizer, intent_vectorizer, remove_start_end=True): valid_positions = x[3] x = (x[0], x[1], x[2], self.prepare_valid_positions(valid_positions), x[4]) y_slots, y_intent = self.predict(x) slots = slots_vectorizer.inverse_transform(y_slots, valid_positions) if remove_start_end: slots = [x[1:-1] for x in slots] intents = np.array([intent_vectorizer.inverse_transform([np.argmax(y_intent[i])])[0] for i in range(y_intent.shape[0])]) return slots, intents def save(self, model_path): with open(os.path.join(model_path, 'params.json'), 'w') as json_file: json.dumps(self.model_params, json_file, indent=2) self.model.save(os.path.join(model_path, 'joint_bert_crf_model.h5')) def load(load_folder_path, sess): with open(os.path.join(load_folder_path, 'params.json'), 'r') as json_file: model_params = json.load(json_file) slots_num = model_params['slots_num'] intents_num = model_params['intents_num'] bert_hub_path = model_params['bert_hub_path'] num_bert_fine_tune_layers = model_params['num_bert_fine_tune_layers'] is_bert = model_params['is_bert'] if 'is_crf' in model_params: is_crf = model_params['is_crf'] else: is_crf = True if 'learning_rate' in model_params: learning_rate = model_params['learning_rate'] else: learning_rate = 5e-5 new_model = JointBertCRFModel(slots_num, intents_num, bert_hub_path, sess, num_bert_fine_tune_layers, is_bert, is_crf, learning_rate) new_model.model.load_weights(os.path.join(load_folder_path,'joint_bert_crf_model.h5')) return new_model
def main(cvset=0, n_features=5000, batch_size=1000, p_drop=0.5, latent_dim=2, n_epoch=5000, run_iter=0, exp_name='nagent', model_id='nagent_model'): train_dict, val_dict, full_dict, dir_pth = dataIO(cvset=0, n_features=n_features, exp_name=exp_name, train_size=25000) #Architecture parameters ------------------------------ input_dim = train_dict['X'].shape[1] print(input_dim) fc_dim = 50 fileid = model_id + \ '_cv_' + str(cvset) + \ '_ng_' + str(n_features) + \ '_pd_' + str(p_drop) + \ '_bs_' + str(batch_size) + \ '_ld_' + str(latent_dim) + \ '_ne_' + str(n_epoch) + \ '_ri_' + str(run_iter) fileid = fileid.replace('.', '-') print(fileid) n_agents = 1 #Model definition ----------------------------------------------- M = {} M['in_ae'] = Input(shape=(input_dim, ), name='in_ae') M['mask_ae'] = Input(shape=(input_dim, ), name='mask_ae') for i in range(n_agents): M['dr_ae_' + str(i)] = Dropout(p_drop, name='dr_ae_' + str(i))(M['in_ae']) M['fc01_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc01_ae_' + str(i))(M['dr_ae_' + str(i)]) M['fc02_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc02_ae_' + str(i))(M['fc01_ae_' + str(i)]) M['fc03_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc03_ae_' + str(i))(M['fc02_ae_' + str(i)]) M['fc04_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc04_ae_' + str(i))(M['fc03_ae_' + str(i)]) M['fc05_ae_' + str(i)] = Dense(latent_dim, activation='linear', name='fc05_ae_' + str(i))(M['fc04_ae_' + str(i)]) M['ld_ae_' + str(i)] = BatchNormalization(scale=False, center=False, epsilon=1e-10, momentum=0., name='ld_ae_' + str(i))( M['fc05_ae_' + str(i)]) M['fc06_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc06_ae_' + str(i))(M['ld_ae_' + str(i)]) M['fc07_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc07_ae_' + str(i))(M['fc06_ae_' + str(i)]) M['fc08_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc08_ae_c' + str(i))( M['fc07_ae_' + str(i)]) M['fc09_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc09_ae_' + str(i))(M['fc08_ae_' + str(i)]) M['ou_ae_' + str(i)] = Dense(input_dim, activation='linear', name='ou_ae_' + str(i))(M['fc09_ae_' + str(i)]) AE = Model(inputs=[M['in_ae'], M['mask_ae']], outputs=[M['ou_ae_' + str(i)] for i in range(n_agents)]) def masked_mse(X, Y, mask): loss_val = tf.reduce_mean( tf.multiply(tf.math.squared_difference(X, Y), mask)) def masked_loss(y_true, y_pred): return loss_val return masked_loss #Create loss dictionary loss_dict = { 'ou_ae_' + str(i): masked_mse(M['in_ae'], M['ou_ae_0'], M['mask_ae']) for i in range(n_agents) } #Loss weights dictionary loss_wt_dict = {'ou_ae_' + str(i): 1.0 for i in range(n_agents)} #Add loss definitions to the model AE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict) #Custom logging cb_obj = CSVLogger(filename=dir_pth['logs'] + fileid + '.csv') train_input_dict = { 'in_ae': train_dict['X'], 'mask_ae': train_dict['mask'] } train_output_dict = { 'ou_ae_' + str(i): train_dict['X'] for i in range(n_agents) } val_input_dict = {'in_ae': val_dict['X'], 'mask_ae': val_dict['mask']} val_output_dict = { 'ou_ae_' + str(i): val_dict['X'] for i in range(n_agents) } #Model training start_time = timeit.default_timer() AE.fit(train_input_dict, train_output_dict, batch_size=batch_size, initial_epoch=0, epochs=n_epoch, validation_data=(val_input_dict, val_output_dict), verbose=2, callbacks=[cb_obj]) elapsed = timeit.default_timer() - start_time print('-------------------------------') print('Training time:', elapsed) print('-------------------------------') #Save weights AE.save_weights(dir_pth['result'] + fileid + '-modelweights' + '.h5') #Generate summaries summary = {} for i in range(n_agents): encoder = Model(inputs=M['in_ae'], outputs=M['ld_ae_' + str(i)]) summary['z'] = encoder.predict(full_dict['X']) sio.savemat(dir_pth['result'] + fileid + '-summary.mat', summary) return
def fit(self, learning_rate=1e-4, epochs=5, activation='relu', dropout=0, hidden_size=1024, nb_layers=1, include_class_weight=False, batch_size=20, save_model=False, verbose=True, fine_tuning=False, NB_IV3_LAYERS_TO_FREEZE=279, use_TPU=False, transfer_model='Inception', min_accuracy=None, extract_SavedModel=False): if transfer_model in ['Inception', 'Xception', 'Inception_Resnet']: target_size = (299, 299) else: target_size = (224, 224) #We expect the classes to be the name of the folders in the training set self.categories = os.listdir(TRAIN_DIR) """ helper functions to to build tensors inspired by https://www.tensorflow.org/tutorials/load_data/images """ def prepare_image(img_path): #reshape the image image = Image.open(img_path) image = image.resize(target_size, PIL.Image.BILINEAR).convert("RGB") #convert the image into a numpy array, and expend to a size 4 tensor image = img_to_array(image) #rescale the pixels to a 0-1 range image = image.astype(np.float32) / 255 return image def generate_tuples(img_folder): #loop through all the images # Get all file names of images present in folder classes = os.listdir(img_folder) classes_paths = [ os.path.abspath(os.path.join(img_folder, i)) for i in classes ] x = [] y = [] for i, j in enumerate(classes): #for all the classes, get the list of pictures img_paths = os.listdir(classes_paths[i]) img_paths = [ os.path.abspath(os.path.join(classes_paths[i], x)) for x in img_paths ] for img_path in img_paths: x.append(prepare_image(img_path)) y = y + [i] return (np.array(x), np.array(y).astype(np.int32)) #get training data (x_train, y_train) = generate_tuples(parentdir + '/data/image_dataset/train') (x_val, y_val) = generate_tuples(parentdir + '/data/image_dataset/val') #train input_function: see https://colab.research.google.com/drive/1F8txK1JLXKtAkcvSRQz2o7NSTNoksuU2#scrollTo=abbwQQfH0td3 def get_training_dataset(batch_size=batch_size): # Convert the inputs to a Dataset. dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) # Shuffle, repeat, and batch the examples. dataset = dataset.shuffle(1000).repeat().batch(batch_size, drop_remainder=True) return dataset def get_validation_dataset(batch_size=batch_size): # Convert the inputs to a Dataset. dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) # Shuffle, repeat, and batch the examples. dataset = dataset.shuffle(1000).repeat().batch(batch_size, drop_remainder=True) return dataset #if we want stop training when no sufficient improvement in accuracy has been achieved if min_accuracy is not None: callback = EarlyStopping(monitor='acc', baseline=min_accuracy) callback = [callback] else: callback = None #load the pretrained model, without the classification (top) layers if transfer_model == 'Xception': base_model = Xception(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) elif transfer_model == 'Inception_Resnet': base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) elif transfer_model == 'Resnet': base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) else: base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) #Add the classification layers using Keras functional API x = base_model.output x = GlobalAveragePooling2D()(x) for _ in range(nb_layers): x = Dense(hidden_size, activation=activation)( x) #Hidden layer for classification if dropout > 0: x = Dropout(rate=dropout)(x) predictions = Dense(len(self.categories), activation='softmax')(x) #Output layer model = Model(inputs=base_model.input, outputs=predictions) #Set only the top layers as trainable (if we want to do fine-tuning, #we can train the base layers as a second step) for layer in base_model.layers: layer.trainable = False #Define the optimizer and the loss, and compile the model loss = 'sparse_categorical_crossentropy' if use_TPU: #if we want to try out the TPU, it looks like we currently need to use #tensorflow optimizers...see https://stackoverflow.com/questions/52940552/valueerror-operation-utpu-140462710602256-varisinitializedop-has-been-marked #...and https://www.youtube.com/watch?v=jgNwywYcH4w optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) model.compile(optimizer=optimizer, loss=sparse_softmax_cross_entropy, metrics=['acc']) TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR'] model = tf.contrib.tpu.keras_to_tpu_model( model, strategy=tf.contrib.tpu.TPUDistributionStrategy( tf.contrib.cluster_resolver.TPUClusterResolver( TPU_WORKER))) tf.logging.set_verbosity(tf.logging.INFO) else: optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) model.compile(optimizer=optimizer, loss=loss, metrics=['acc']) #if we want to weight the classes given the imbalanced number of images if include_class_weight: from sklearn.utils.class_weight import compute_class_weight cls_train = self.categories class_weight = compute_class_weight(class_weight='balanced', classes=np.unique(cls_train), y=cls_train) else: class_weight = None steps_per_epoch = int( sum([ len(files) for r, d, files in os.walk(parentdir + '/data/image_dataset/train') ]) / batch_size) validation_steps = int( sum([ len(files) for r, d, files in os.walk(parentdir + '/data/image_dataset/val') ]) / batch_size) #Fit the model if use_TPU: history = model.fit(get_training_dataset, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset, validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) else: history = model.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset(), validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) #Fine-tune the model, if we wish so if fine_tuning and not model.stop_training: print('============') print('Begin fine-tuning') print('============') #declare the first layers as trainable for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]: layer.trainable = False for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]: layer.trainable = True model.compile(optimizer=tf.train.AdamOptimizer( learning_rate=learning_rate * 0.1), loss=loss, metrics=['acc']) #Fit the model if use_TPU: history = model.fit(get_training_dataset, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset, validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) else: history = model.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset(), validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) #Evaluate the model, just to be sure self.fitness = history.history['val_categorical_accuracy'][-1] #Save the model if save_model: if not os.path.exists(parentdir + '/data/trained_models'): os.makedirs(parentdir + '/data/trained_models') model.save(parentdir + '/data/trained_models/trained_model.h5') print('Model saved!') #save model in production format if extract_SavedModel: export_path = "./image_classifier/1/" with K.get_session() as sess: tf.saved_model.simple_save( sess, export_path, inputs={'input_image': model.input}, outputs={t.name: t for t in model.outputs}) else: self.model = model del history del model
def seq2seq_architecture(latent_size, vocabulary_size, embedding_matrix, batch_size, epochs, train_article, train_summary, train_target): # encoder encoder_inputs = Input(shape=(None, ), name='Encoder-Input') encoder_embeddings = Embedding(vocabulary_size + 1, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Encoder-Word-Embedding') norm_encoder_embeddings = BatchNormalization( name='Encoder-Batch-Normalization') encoder_lstm_1 = LSTM( latent_size, name='Encoder-LSTM-1', return_sequences=True, return_state=True, dropout=0.2, recurrent_dropout=0.2, ) encoder_lstm_2 = LSTM( latent_size, name='Encoder-LSTM-2', return_state=True, dropout=0.2, recurrent_dropout=0.2, ) # the sequence of the last layer is not returned because we want a single vector that stores everything e = encoder_embeddings(encoder_inputs) e = norm_encoder_embeddings(e) e, e_state_h_1, e_state_c_1 = encoder_lstm_1(e) e, e_state_h_2, e_state_c_2 = encoder_lstm_2( e) # e; the encoded fix-sized vector which seq2seq is all about encoder_states = [e_state_h_2, e_state_c_2] encoder_model = Model(inputs=encoder_inputs, outputs=encoder_states) # encoder_outputs = encoder_model(encoder_inputs) # decoder decoder_inputs = Input(shape=(None, ), name='Decoder-Input') decoder_embeddings = Embedding(vocabulary_size + 1, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Decoder-Word-Embedding') norm_decoder_embeddings = BatchNormalization( name='Decoder-Batch-Normalization-1') decoder_lstm_1 = LSTM( latent_size, name='Decoder-LSTM-1', return_sequences=True, return_state=True, dropout=0.2, recurrent_dropout=0.2, ) decoder_lstm_2 = LSTM( latent_size, name='Decoder-LSTM-2', return_sequences=True, return_state=True, dropout=0.2, recurrent_dropout=0.2, ) norm_decoder = BatchNormalization(name='Decoder-Batch-Normalization-2') decoder_dense = Dense(vocabulary_size + 1, activation='softmax', name="Final-Output-Dense") d = decoder_embeddings(decoder_inputs) d = norm_decoder_embeddings(d) d, d_state_h_1, d_state_c_1 = decoder_lstm_1(d, initial_state=encoder_states) d, d_state_h_2, d_state_c_2 = decoder_lstm_2(d, initial_state=encoder_states) d = norm_decoder(d) decoder_outputs = decoder_dense(d) seq2seq_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_outputs) seq2seq_model.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) seq2seq_model.summary() classes = [item for sublist in train_summary.tolist() for item in sublist] class_weights = class_weight.compute_class_weight('balanced', np.unique(classes), classes) e_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='min', restore_best_weights=True) history = seq2seq_model.fit(x=[train_article, train_summary], y=np.expand_dims(train_target, -1), batch_size=batch_size, epochs=epochs, validation_split=0.1, callbacks=[e_stopping], class_weight=class_weights) f = open("data/models/stacked_results.txt", "w", encoding="utf-8") f.write("Stacked LSTM \n layers: 2 \n latent size: " + str(latent_size) + "\n vocab size: " + str(vocabulary_size) + "\n") f.close() history_dict = history.history plot_loss(history_dict) # inference decoder_initial_state_h1 = Input(shape=(latent_size, ), name='Decoder-Init-H1') decoder_initial_state_c1 = Input(shape=(latent_size, ), name='Decoder-Init-C1') decoder_initial_state_h2 = Input(shape=(latent_size, ), name='Decoder-Init-H2') decoder_initial_state_c2 = Input(shape=(latent_size, ), name='Decoder-Init-C2') i = decoder_embeddings(decoder_inputs) i = norm_decoder_embeddings(i) i, h1, c1 = decoder_lstm_1( i, initial_state=[decoder_initial_state_h1, decoder_initial_state_c1]) i, h2, c2 = decoder_lstm_2( i, initial_state=[decoder_initial_state_h2, decoder_initial_state_c2]) i = norm_decoder(i) decoder_output = decoder_dense(i) decoder_states = [ h1, c1, h2, c2 ] # every layer keeps its own states, important at predicting decoder_model = Model(inputs=[decoder_inputs] + [ decoder_initial_state_h1, decoder_initial_state_c1, decoder_initial_state_h2, decoder_initial_state_c2 ], outputs=[decoder_output] + decoder_states) return encoder_model, decoder_model