Example #1
0
def fit_and_evaluate(model: Model, model_filename: str, t_x, val_x, t_y, val_y, epochs=20, batch_size=128) -> History:
    results = model.fit(
        t_x,
        t_y,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=get_callbacks(model_filename),
        verbose=1,
        validation_data=[val_x, val_y],
    )
    logging.info("Score against validation set: %s", model.evaluate(val_x, val_y))
    return results
Example #2
0
def vgg16(train_data, train_labels, val_data, val_labels):
    # 使用keras内置的vgg16
    # weights:指定模型初始化的权重检查点、
    # include_top: 指定模型最后是否包含密集连接分类器。
    # 默认情况下,这个密集连接分类器对应于ImageNet的100个类别。
    # 如果打算使用自己的密集连接分类器,可以不适用它,置为False。
    sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
    model_vgg16 = VGG16(weights='imagenet',
                        include_top=False,
                        input_shape=(224, 224, 3))
    # 查看model_vgg16的架构
    # model_vgg16.summary()
    # 权值冻结
    for layer in model_vgg16.layers:
        layer.trainable = False
    # 用于将输入层的数据压成一维的数据
    model = layers.Flatten(name='flatten')(model_vgg16.output)
    # 全连接层,输入的维度z
    model = layers.Dense(64, activation='relu')(model)
    # 每一批的前一层的激活值重新规范化,输出均值接近0,标准差接近1
    model = layers.BatchNormalization()(model)
    model = layers.Dropout(0.5)(model)
    model = layers.Dense(32, activation='relu')(model)
    model = layers.BatchNormalization()(model)
    model = layers.Dropout(0.5)(model)
    model = layers.Dense(16, activation='relu')(model)
    model = layers.BatchNormalization()(model)
    model = layers.Dropout(0.5)(model)
    model = layers.Dense(5, activation='softmax')(model)
    model = Model(inputs=model_vgg16.input, outputs=model, name='vgg16')

    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])
    model.fit(train_data,
              train_labels,
              batch_size=32,
              epochs=50,
              validation_data=(val_data, val_labels))
    def fit_model_on_fold(self, compiled_model: Model, curr_fold_indices,
                          train_sequences, test_sequences):
        """
        trains compiled (but previously unfitted) model against given indices
        :param compiled_model:
        :param curr_fold_indices:
        :param train_sequences:
        :param test_sequences:
        :return:
        """
        train_indices, val_indices = curr_fold_indices
        x_train = train_sequences[train_indices]
        y_train = self.raw_train_df[
            self.target_cols].iloc[train_indices].values
        x_val = train_sequences[val_indices]
        y_val = self.raw_train_df[self.target_cols].iloc[val_indices].values

        with tf.Session() as session:
            K.set_session(session)
            session.run(tf.global_variables_initializer())
            session.run(tf.tables_initializer())

            compiled_model.fit(x_train,
                               y_train,
                               batch_size=self.batch_size,
                               epochs=self.epochs,
                               validation_data=(x_val, y_val))

            val_pred = compiled_model.predict(x_val,
                                              batch_size=self.batch_size,
                                              verbose=0)
            val_roc_auc_score = roc_auc_score(y_val, val_pred)
            print('ROC-AUC val score: {0:.4f}'.format(val_roc_auc_score))

            val_df = pd.DataFrame(val_pred, index=val_indices)
            val_df.columns = self.target_cols

        return val_roc_auc_score, val_df
def mlp(x,
        y,
        activation="relu",
        nodes_per_layer=100,
        num_layers=5,
        epochs=1000):

    inputs = Input(shape=(1, ))
    nn = Dense(nodes_per_layer, activation=activation)(inputs)

    for __ in range(num_layers - 1):
        nn = Dense(nodes_per_layer, activation=activation)(nn)

    predictions = Dense(1, activation='linear')(nn)

    model = Model(inputs=inputs, outputs=predictions)

    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(x, y, batch_size=x.shape[0], epochs=epochs, verbose=0)

    fitted = model.predict(x, batch_size=x.shape[0], verbose=0)

    return model, fitted
def model1(x_train, y_train, x_test, y_test):
    #200维度的三元组(head,relation,tail)
    inputs = keras.Input(shape=(
        100,
        3,
        1,
    ))
    cnn1 = Conv2D(filters=50,
                  kernel_initializer=keras.initializers.TruncatedNormal(
                      mean=0.0, stddev=0.05, seed=None),
                  kernel_size=(1, 3),
                  padding='valid',
                  strides=1,
                  activation='relu')(inputs)
    flat = Flatten()(cnn1)

    drop = Dropout(0.2)(flat)
    #out1 = Dense(units=1,use_bias=False,kernel_regularizer=keras.regularizers.l2(0.0005))(drop)
    out1 = Dense(units=1, use_bias=False)(drop)

    # net
    model1 = Model(inputs, out1)
    #model1.compile()
    #model1.summary()
    #ou1_output=model1.predict(x_train,) #shape为(-1,1)
    model1.compile(loss=myLoss, optimizer=Adam(6e-6))
    model1.summary()
    loss_value = []
    history = model1.fit(x_train,
                         y_train,
                         batch_size=30,
                         epochs=200,
                         validation_data=(x_test, y_test))
    # plot history
    pyplot.plot(history.history['loss'], label='train')
    pyplot.plot(history.history['val_loss'], label='valid')
    pyplot.legend()
    pyplot.show()

    model1.save('../data/modelFile/originalConvKB_onlyType13_%s_%s.h5' %
                (round(history.history['loss'][-1],
                       4), round(history.history['val_loss'][-1], 4)))
Example #6
0
def AE_train(encoding_dim, x_train, epochs_num):
    # 编码层
    input_data = Input(shape=[29])
    encoded = Dense(24, activation='relu')(input_data)
    encoded = Dense(16, activation='relu')(encoded)
    encoded = Dense(8, activation='relu')(encoded)
    encoder_output = Dense(encoding_dim)(encoded)
    # 解码层
    decoded = Dense(8, activation='relu')(encoder_output)
    decoded = Dense(16, activation='relu')(decoded)
    decoded = Dense(24, activation='relu')(decoded)
    decoded = Dense(29, activation='tanh')(decoded)

    autoencoder = Model(inputs=input_data, outputs=decoded)
    encoder = Model(inputs=input_data, outputs=encoder_output)

    autoencoder.compile(optimizer='adam', loss='mse')

    def step_decay(epoch):
        initial_lrate = 0.01
        drop = 0.5
        epochs_drop = 10.0
        _lrate = initial_lrate * math.pow(
            drop, math.floor((1 + epoch) / epochs_drop))
        return _lrate

    lrate = LearningRateScheduler(step_decay)

    history = autoencoder.fit(x_train,
                              x_train,
                              epochs=epochs_num,
                              batch_size=256,
                              callbacks=[lrate])

    loss = history.history['loss']
    epochs = range(1, epochs_num + 1)
    plt.title('Loss')
    plt.plot(epochs, loss, 'blue', label='loss')
    plt.legend()
    plt.show()
    encoder.save("encoder_model.h5")
Example #7
0
    def forward(self, X_train, X_test, y_train, y_test):
        X_shape = X_train.shape[1]
        y_shape = y_train.shape[1]
        X = Input(shape=(self.image_size, self.image_size, 1), name='input')
        label = Input(shape=(y_shape,), name='label')
        
        encoder, shape = self.encode(X, label)
        encoder.summary()

        z_inputs = Input(shape=(self.n_dim,), name='latent_input')
        decoder = self.decode(z_inputs, label, shape)
        decoder.summary()

        z_output = encoder([X, label])[2]
        outputs = decoder([z_output, label])
        cvae = Model([X, label], outputs, name='cvae')
        cvae.compile(optimizer=Adam(lr=self.learning_rate, decay=self.decay_rate, epsilon=1e-08), loss=self.vae_loss)
        cvae.summary()
        tensorboard = TensorBoard(log_dir="{}/{}".format(self.logs_dir,time()))
        cvae_hist = cvae.fit([X_train, y_train], X_train, verbose=1, batch_size=self.batch_size, epochs=self.epochs,
                     validation_data=([X_test, y_test], X_test), callbacks=[tensorboard], shuffle=True)
        decoder.save(self.args.save_model + '.h5')
        return cvae, cvae_hist
    def fit(self,hidden_nodes,activation="sigmoid"):
        start = time.time()
        input_img = Input(shape=(self.D,)) # this is our input placeholder
        
        encoded = Dense(hidden_nodes, activation=activation,
                        kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.05),
                        bias_initializer=keras.initializers.Zeros(),
                        kernel_regularizer=keras.regularizers.l1(self.Lambda))(input_img) # "encoded" is the encoded representation of the input
        decoded = Dense(self.D, activation='sigmoid',
                        kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.05),
                        bias_initializer=keras.initializers.Zeros(),
                        kernel_regularizer=keras.regularizers.l1(self.Lambda))(encoded) # "decoded" is the lossy reconstruction of the input
        
        # encode
        self.encoder = Model(inputs=input_img, outputs=encoded) # encoder model: maps an input to its encoded representation
        autoencoder = Model(inputs=input_img, outputs=decoded) # this model maps an input to its reconstruction

        encoded_input = Input(shape=(hidden_nodes,)) # placeholder for encoded (32-dimensional) input
        decoder_layer = autoencoder.layers[-1] # retrieve the last layer of the autoencoder model
        self.decoder = Model(inputs=encoded_input, outputs=decoder_layer(encoded_input)) # create the decoder model
        optimizer = keras.optimizers.SGD(lr=0.1, momentum=0.9, decay = 0, nesterov=False)
        # autoencoder.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy","mean_squared_error"])
        autoencoder.compile(optimizer=optimizer, loss="mse", metrics=["accuracy","mean_squared_error"])
        
        info = autoencoder.fit(self.X_train, self.X_train, # for training: x, y
                epochs=100,
                batch_size=50, # default: 32
                shuffle=True # shuffle each batch
                ,validation_data = (self.X_val, self.X_val) #no validation
                )
        
        # self.val_accs.append(info.history["val_acc"])
        # print(info.history["val_loss"])
        self.timeLst.append(time.time()-start)
        # self.val_mse.append(info.history["val_mean_squared_error"])
        self.train_mse.append(info.history["mean_squared_error"])
Example #9
0
def main():
    from tensorflow.examples.tutorials.mnist import input_data
    data = input_data.read_data_sets("data/MNIST/", one_hot=True)
    #    data_train = tfds.load(name="mnist", split="train")
    #    data_test = tfds.load(name="mnist", split="test")
    print("Size of:")
    print("- Training-set:\t\t{}".format(len(data.train.labels)))
    print("- Test-set:\t\t{}".format(data.test.labels))
    # Get the first images from the test-set.
    data.test.cls = np.array([label.argmax() for label in data.test.labels])

    #   images = data.x_test[0:9]
    images = data.test.images[0:9]
    #Get the true classes
    #   cls_true = data.y_test_cls[0:9]
    cls_true = data.test.cls[0:9]
    # Plot the images and labels using our helper-function above.
    plot_images(images=images, cls_true=cls_true)

    if using_seq_model:

        model = Sequential()
        # Add an input layer which is similar to a feed_dict in TensorFlow.
        # Note that the input-shape must be a tuple containing the image-size.
        model.add(InputLayer(input_shape=(img_size_flat, )))
        # The input is a flattened array with 784 elements,
        # but the convolutional layers expect images with shape (28, 28, 1)
        model.add(Reshape(img_shape_full))
        # x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='x')
        # x_image = tf.reshape(x, [-1, img_size, img_size, num_channels])
        # y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
        # y_true_cls = tf.argmax(y_true, axis=1)

        # First convolutional layer with ReLU-activation and max-pooling.
        model.add(
            Conv2D(kernel_size=5,
                   strides=1,
                   filters=16,
                   padding='same',
                   activation='relu',
                   name='layer_conv1'))
        model.add(MaxPooling2D(pool_size=2, strides=2))
        # layer_conv1, weights_conv1 = new_conv_layer(input=x_image,
        #                num_input_channels=num_channels,
        #                filter_size=filter_size1,
        #                num_filters=num_filters1,
        #                use_pooling=True)
        # print (layer_conv1)

        # Second convolutional layer with ReLU-activation and max-pooling.
        model.add(
            Conv2D(kernel_size=5,
                   strides=1,
                   filters=36,
                   padding='same',
                   activation='relu',
                   name='layer_conv2'))
        model.add(MaxPooling2D(pool_size=2, strides=2))

        # layer_conv2, weights_conv2 = new_conv_layer(input=layer_conv1,
        #                num_input_channels=num_filters1,
        #                filter_size=filter_size2,
        #                num_filters=num_filters2,
        #                use_pooling=True)
        # print (layer_conv2)

        # Flatten the 4-rank output of the convolutional layers
        # to 2-rank that can be input to a fully-connected / dense layer.
        model.add(Flatten())

        # layer_flat, num_features = flatten_layer(layer_conv2)
        # print (layer_flat)
        # print (num_features)

        # First fully-connected / dense layer with ReLU-activation.
        model.add(Dense(128, activation='relu'))
        # layer_fc1 = new_fc_layer(input=layer_flat,
        #                      num_inputs=num_features,
        #                      num_outputs=fc_size,
        #                      use_relu=True)
        # print (layer_fc1)

        # Last fully-connected / dense layer with softmax-activation
        # for use in classification.
        model.add(Dense(num_classes, activation='softmax'))
        # layer_fc2 = new_fc_layer(input=layer_fc1,
        #                      num_inputs=fc_size,
        #                      num_outputs=num_classes,
        #                      use_relu=False)
        # print(layer_fc2)
        # y_pred = tf.nn.softmax(layer_fc2)
        # y_pred_cls = tf.argmax(y_pred, axis=1)
        # cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2,
        #                                                     labels=y_true)
        # cost = tf.reduce_mean(cross_entropy)
        from tensorflow.keras.optimizers import Adam

        optimizer = Adam(lr=1e-3)
        model.compile(optimizer=optimizer,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        # optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
        # correct_prediction = tf.equal(y_pred_cls, y_true_cls)
        # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        # session = tf.Session()
        # session.run(tf.global_variables_initializer())
        model.fit(x=data.train.images,
                  y=data.train.labels,
                  epochs=1,
                  batch_size=128)
        result = model.evaluate(x=data.test.images, y=data.test.labels)
        print('')
        for name, value in zip(model.metrics_names, result):
            print(name, value)
            print("{0}: {1:.2%}".format(model.metrics_names[1], result[1]))

        # `save_model` requires h5py
        model.save(path_model)

        del model
    if using_fun_model:
        # Create an input layer which is similar to a feed_dict in TensorFlow.
        # Note that the input-shape must be a tuple containing the image-size.
        inputs = Input(shape=(img_size_flat, ))

        # Variable used for building the Neural Network.
        net = inputs

        # The input is an image as a flattened array with 784 elements.
        # But the convolutional layers expect images with shape (28, 28, 1)
        net = Reshape(img_shape_full)(net)

        # First convolutional layer with ReLU-activation and max-pooling.
        net = Conv2D(kernel_size=5,
                     strides=1,
                     filters=16,
                     padding='same',
                     activation='relu',
                     name='layer_conv1')(net)
        net = MaxPooling2D(pool_size=2, strides=2)(net)

        # Second convolutional layer with ReLU-activation and max-pooling.
        net = Conv2D(kernel_size=5,
                     strides=1,
                     filters=36,
                     padding='same',
                     activation='relu',
                     name='layer_conv2')(net)
        net = MaxPooling2D(pool_size=2, strides=2)(net)

        # Flatten the output of the conv-layer from 4-dim to 2-dim.
        net = Flatten()(net)

        # First fully-connected / dense layer with ReLU-activation.
        net = Dense(128, activation='relu')(net)

        # Last fully-connected / dense layer with softmax-activation
        # so it can be used for classification.
        net = Dense(num_classes, activation='softmax')(net)

        # Output of the Neural Network.
        outputs = net

        from tensorflow.python.keras.models import Model
        model2 = Model(inputs=inputs, outputs=outputs)
        model2.compile(optimizer='rmsprop',
                       loss='categorical_crossentropy',
                       metrics=['accuracy'])
        model2.fit(x=data.train.images,
                   y=data.train.labels,
                   epochs=1,
                   batch_size=128)
        result = model2.evaluate(x=data.test.images, y=data.test.labels)
        print('')
        for name, value in zip(model2.metrics_names, result):
            print(name, value)
            print("{0}: {1:.2%}".format(model2.metrics_names[1], result[1]))

        # `save_model` requires h5py
        model2.save(path_model)

    if reload_model:

        from tensorflow.python.keras.models import load_model
        model3 = load_model(path_model)

        #images = data.x_test[0:9]
        images = data.test.images[0:9]
        #cls_true = data.y_test_cls[0:9]
        cls_true = data.test.labels[0:9]
        y_pred = model3.predict(x=images)
        cls_pred = np.argmax(y_pred, axis=1)
        plot_images(images=images, cls_true=cls_true, cls_pred=cls_pred)

        y_pred = model3.predict(x=data.test.images)
        cls_pred = np.argmax(y_pred, axis=1)
        cls_true = data.test.cls
        correct = (cls_true == cls_pred)
        plot_example_errors(data, cls_pred=cls_pred, correct=correct)

        model3.summary()
        #       Attention: the functional and sequential models are different in
        #       layers, for sequential ones:
        if reading_seq_model:
            layer_input = model3.layers[0]
            layer_conv1 = model3.layers[1]
            print(layer_conv1)
            layer_conv2 = model3.layers[3]
        elif reading_fun_model:
            layer_input = model3.layers[0]
            layer_conv1 = model3.layers[2]
            print(layer_conv1)
            layer_conv2 = model3.layers[4]
        weights_conv1 = layer_conv1.get_weights()[0]
        print(weights_conv1.shape)
        plot_conv_weights(weights=weights_conv1, input_channel=0)
        weights_conv2 = layer_conv2.get_weights()[0]
        plot_conv_weights(weights=weights_conv2, input_channel=0)
        image1 = data.test.images[0]
        plot_image(image1)

        # from tensorflow.keras import backend as K
        # output_conv1 = K.function(inputs=[layer_input.input],
        #                   outputs=[layer_conv1.output])
        # print(output_conv1)
        # print(output_conv1([[image1]]))
        # layer_output1 = output_conv1([[image1]])[0]
        # print(layer_output1.shape)
        # plot_conv_output(values=layer_output1)

        from tensorflow.keras.models import Model
        output_conv2 = Model(inputs=layer_input.input,
                             outputs=layer_conv2.output)
        layer_output2 = output_conv2.predict(np.array([image1]))
        layer_output2.shape
        plot_conv_output(values=layer_output2)
Example #10
0
class ConvMnist:
    def __init__(self, filename=None):
        '''
		学習済みモデルファイルをロードする (optional)
		'''
        self.model = None
        if filename is not None:
            print('load model: ', filename)
            self.model = load_model(filename)
            self.model.summary()

    def train(self):
        '''
		学習する
		'''
        # MNISTの学習用データ、テストデータをロードする
        (x_train_org, y_train), (x_test_org, y_test) = mnist.load_data()

        # 学習データの前処理
        # X: 6000x28x28x1のTensorに変換し、値を0~1.0に正規化
        # Y: one-hot化(6000x1 -> 6000x10)
        x_train = np.empty((x_train_org.shape[0], x_train_org.shape[1],
                            x_train_org.shape[2], 3))
        x_train[:, :, :, 0] = x_train_org
        x_train[:, :, :, 1] = x_train_org
        x_train[:, :, :, 2] = x_train_org
        x_test = np.empty(
            (x_test_org.shape[0], x_test_org.shape[1], x_test_org.shape[2], 3))
        x_test[:, :, :, 0] = x_test_org
        x_test[:, :, :, 1] = x_test_org
        x_test[:, :, :, 2] = x_test_org
        x_train = x_train / 255.
        x_test = x_test / 255.
        y_train = to_categorical(y_train, 10)
        y_test = to_categorical(y_test, 10)

        # 学習状態は悪用のTensorBoard設定
        #		tsb = TensorBoard(log_dir='./logs')

        # Convolutionモデルの作成
        input = Input(shape=(28, 28, 3))
        conv1 = Conv2D(filters=8,
                       kernel_size=(3, 3),
                       strides=(1, 1),
                       padding='same',
                       activation='relu')(input)
        pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
        conv2 = Conv2D(filters=4,
                       kernel_size=(3, 3),
                       strides=(1, 1),
                       padding='same',
                       activation='relu')(pool1)
        dropout1 = Dropout(0.2)(conv2)
        flatten1 = Flatten()(dropout1)
        output = Dense(units=10, activation='softmax')(flatten1)
        self.model = Model(inputs=[input], outputs=[output])

        self.model.summary()

        self.model.compile(optimizer='adam',
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])

        # Convolutionモデルの学習
        self.model.fit(
            x_train,
            y_train,
            batch_size=128,
            epochs=10,
            validation_split=0.2,
            #			callbacks=[tsb],
        )

        # 学習したモデルを使用して、テスト用データで評価する
        score = self.model.evaluate(x_test, y_test, verbose=0)
        print("test data score: ", score)

    def save_trained_model(self, filename):
        '''
		学習済みモデルをファイル(h5)に保存する
		'''
        self.model.save(filename)

    def predict(self, input_image):
        '''
		1つのカラー入力画像(28x28のndarray)に対して、数字(0~9)を判定する
		ret: result, score
		'''
        if input_image.shape != (28, 28, 3):
            return -1, -1
        input_image = input_image.reshape(1, input_image.shape[0],
                                          input_image.shape[1], 3)
        input_image = input_image / 255.

        probs = self.model.predict(input_image)
        result = np.argmax(probs[0])
        return result, probs[0][result]
Example #11
0
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)
X_train = X_train.astype("float") / 255.0
X_test = X_test.astype("float") / 255.0

model = VGG16(weights='imagenet',
              include_top=False,
              input_shape=(image_size, image_size, 3))

top_model = Sequential()
top_model.add(Flatten(input_shape=model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(num_classes, activation="softmax"))

model = Model(inputs=model.input, outputs=top_model(model.output))

for layer in model.layers[:15]:
    layer.trainable = False

opt = Adam(lr=0.0001)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

model.fit(X_train, y_train, batch_size=32, epochs=17)

score = model.evaluate(X_test, y_test, batch_size=32)

model.save('./vgg16_transfer.h5')
class BuildModel():
    def __init__(self):
        self.TIMESTEP = 20
        self.DATA_DIM = 1
        self.model = None
        self.input = None
        self.output = None
        self.x_train = None
        self.y_train = None
        self.history = None
        self.filename = r"../../../data/all_part"
        self.original_data = pd.read_csv(self.filename, sep="\t")

    def __get_data(self):
        x = []
        y = []
        data = self.original_data.iloc[:, -1].values
        for i in range(len(data) - self.TIMESTEP * 2 - 1):
            x.append(data[i:i + self.TIMESTEP])
            y.append(data[i + self.TIMESTEP:i + self.TIMESTEP * 2 + 1])
        x = np.asarray(x, dtype=np.float32)
        x = (x - x.min()) / (x.max() - x.min())
        y = np.asarray(y, dtype=np.float32)
        y = (y - y.min()) / (y.max() - y.min())
        self.x_train = x.reshape([x.shape[0], x.shape[1], 1])
        self.y_train = y.reshape([y.shape[0], 21])
        self.input = Input(shape=(20, 1), name="input_tensor")

    def __built_multi_cell_Layer(self):
        """
        :return: the output tensor.
        """
        o1 = RNN(MinimalRNNCell(32, "tanh"), return_sequences=True)(self.input)
        o1 = BatchNormalization(1)(o1)
        o2 = RNN(MinimalRNNCell(32, "tanh"), return_sequences=True)(o1)
        o2 = BatchNormalization(1)(o2)
        o3 = RNN(MinimalRNNCell(32, "tanh"), return_sequences=True)(o2)
        o3 = BatchNormalization(1)(o3)
        o4 = RNN(MinimalRNNCell(32, "tanh"), return_sequences=False)(o3)
        o5 = Dense(21, activation="relu")(o4)
        self.output = o5

    def build_model(self, if_load_old_model=False):
        if self.input is None:
            self.__get_data()
        if self.output is None:
            self.__built_multi_cell_Layer()
        if self.model is None:
            try:
                if if_load_old_model:
                    self.model = load_model(
                        "./model_tensorboard_3.h5",
                        custom_objects={'MinimalRNNCell': MinimalRNNCell})
                    print("train prepare model.......")
                    history = self.model.fit(self.x_train,
                                             self.y_train,
                                             20,
                                             epochs=1000,
                                             verbose=1,
                                             callbacks=[TensorBoard('./log3')])
                    self.history = history.history
                    self.model.save("./model_tensorboard_4.h5")
                    self._write_val_loss_to_csv('./val_loss_4.csv',
                                                'mean_absolute_error')
                else:
                    if not isinstance(self.model, Sequential):
                        print("train new model .......")
                        print(self.input.shape, self.output.shape)
                        self.model = Model(inputs=self.input,
                                           outputs=self.output)
                        self.model.compile("adam", loss="mae", metrics=["mae"])
                        print(self.model.summary())
                        print(self.x_train.shape, self.y_train.shape)
                        history = self.model.fit(self.x_train,
                                                 self.y_train,
                                                 50,
                                                 1000,
                                                 1,
                                                 validation_split=0.2,
                                                 callbacks=[TensorBoard()])
                        self.history = history.history
                        self.model.save("./model_tensorboard_1.h5")
                        self._write_val_loss_to_csv('./val_loss_2.csv')
            except:
                raise BaseException

    def _write_val_loss_to_csv(self, file_name, keys):
        val_loss = self.history[keys]
        val_loss = np.asarray(val_loss, dtype=np.float32)
        df = pd.DataFrame(val_loss)
        df.to_csv(file_name, mode='a', header=False)
                       return_state=True)
    dec_output, _ = dec_gru(dec_one_hot, initial_state=enc_state)
else:
    dec_gru = GRU(units=dec_size, return_sequences=True, return_state=True)
    dec_output, _ = dec_gru(dec_one_hot, initial_state=enc_state)

dec_dense = Dense(spa_vocab_size, activation='softmax')
pred = dec_dense(dec_output)

# compile and fit
model = Model(inputs=[enc_inp, dec_inp], outputs=pred)
model.compile(optimizer=Adam(0.005),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit([enc_sequence_inps, dec_sequence_inps],
          dec_sequence_outputs,
          batch_size=128,
          epochs=100)

# save model
model.save('s2s.hd5')

################################################################################
################################################################################
################################################################################
# retrieve model
model = load_model('s2s.hd5')

encoder_inputs = model.input[0]  # input_1
_, encoder_states = model.layers[4].output  # gru_1
encoder_model = Model(encoder_inputs, encoder_states)
Example #14
0
class JointBertModel1(NLUModel):
    def __init__(self,
                 intents_num,
                 bert_hub_path,
                 num_bert_fine_tune_layers=10,
                 is_bert=True):
        #self.slots_num = slots_num
        self.intents_num = intents_num
        self.bert_hub_path = bert_hub_path
        self.num_bert_fine_tune_layers = num_bert_fine_tune_layers
        self.is_bert = is_bert

        self.model_params = {
            'intents_num': intents_num,
            'bert_hub_path': bert_hub_path,
            'num_bert_fine_tune_layers': num_bert_fine_tune_layers,
            'is_bert': is_bert
        }

        self.build_model()
        self.compile_model()

    def compile_model(self):
        # Instead of `using categorical_crossentropy`,
        # we use `sparse_categorical_crossentropy`, which does expect integer targets.

        optimizer = tf.keras.optimizers.Adam(lr=5e-5)  #0.001)

        losses = {
            'intent_classifier': 'sparse_categorical_crossentropy',
        }
        loss_weights = {'intent_classifier': 1.0}
        metrics = {'intent_classifier': 'acc'}
        self.model.compile(optimizer=optimizer,
                           loss=losses,
                           loss_weights=loss_weights,
                           metrics=metrics)
        self.model.summary()

    def build_model(self):
        in_id = Input(shape=(None, ), name='input_word_ids', dtype=tf.int32)
        in_mask = Input(shape=(None, ), name='input_mask', dtype=tf.int32)
        in_segment = Input(shape=(None, ),
                           name='input_type_ids',
                           dtype=tf.int32)
        #in_valid_positions = Input(shape=(None, self.slots_num), name='valid_positions')
        bert_inputs = [in_id, in_mask, in_segment]
        inputs = bert_inputs

        if self.is_bert:
            name = 'BertLayer'
        else:
            name = 'AlbertLayer'
        bert_pooled_output, bert_sequence_output = hub.KerasLayer(
            self.bert_hub_path, trainable=True, name=name)(bert_inputs)

        intents_fc = Dense(self.intents_num,
                           activation='softmax',
                           name='intent_classifier')(bert_pooled_output)

        self.model = Model(inputs=inputs, outputs=intents_fc)

    def fit(self, X, Y, validation_data=None, epochs=5, batch_size=32):
        """
        X: batch of [input_ids, input_mask, segment_ids, valid_positions]
        """
        X = (X[0], X[1], X[2])
        if validation_data is not None:
            print("INSIDE")
            X_val, Y_val = validation_data
            validation_data = ((X_val[0], X_val[1], X_val[2]), Y_val)

        history = self.model.fit(X,
                                 Y,
                                 validation_data=validation_data,
                                 epochs=epochs,
                                 batch_size=batch_size)
        #self.visualize_metric(history.history, 'slots_tagger_loss')
        #self.visualize_metric(history.history, 'intent_classifier_loss')
        #self.visualize_metric(history.history, 'loss')
        #self.visualize_metric(history.history, 'intent_classifier_acc')

    def prepare_valid_positions(self, in_valid_positions):
        in_valid_positions = np.expand_dims(in_valid_positions, axis=2)
        in_valid_positions = np.tile(in_valid_positions,
                                     (1, 1, self.slots_num))
        return in_valid_positions

    def predict_slots_intent(self,
                             x,
                             slots_vectorizer,
                             intent_vectorizer,
                             remove_start_end=True,
                             include_intent_prob=False):
        valid_positions = x[3]
        x = (x[0], x[1], x[2], self.prepare_valid_positions(valid_positions))
        y_slots, y_intent = self.predict(x)
        slots = slots_vectorizer.inverse_transform(y_slots, valid_positions)
        if remove_start_end:
            slots = [x[1:-1] for x in slots]

        if not include_intent_prob:
            intents = np.array([
                intent_vectorizer.inverse_transform([np.argmax(i)])[0]
                for i in y_intent
            ])
        else:
            intents = np.array([
                (intent_vectorizer.inverse_transform([np.argmax(i)])[0],
                 round(float(np.max(i)), 4)) for i in y_intent
            ])
        return slots, intents

    def save(self, model_path):
        with open(os.path.join(model_path, 'params.json'), 'w') as json_file:
            json.dump(self.model_params, json_file)
        self.model.save(os.path.join(model_path, 'joint_bert_model.h5'))

    def load(load_folder_path):
        with open(os.path.join(load_folder_path, 'params.json'),
                  'r') as json_file:
            model_params = json.load(json_file)

        #slots_num = model_params['slots_num']
        intents_num = model_params['intents_num']
        bert_hub_path = model_params['bert_hub_path']
        num_bert_fine_tune_layers = model_params['num_bert_fine_tune_layers']
        is_bert = model_params['is_bert']

        new_model = JointBertModel(intents_num, bert_hub_path,
                                   num_bert_fine_tune_layers, is_bert)
        new_model.model.load_weights(
            os.path.join(load_folder_path, 'joint_bert_model.h5'))
        return new_model
Example #15
0
class Pmodel:
    def __init__(self, fl, mode, hparams):
        """
        Initialises new DNN model based on input features_dim, labels_dim, hparams
        :param features_dim: Number of input feature nodes. Integer
        :param labels_dim: Number of output label nodes. Integer
        :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function.
        hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes.
        """
        # self.features_dim = fl.features_c_dim
        # self.labels_dim = fl.labels_dim  # Assuming that each task has only 1 dimensional output
        self.features_dim = fl.features_c_dim + 1  # 1 for the positional argument
        self.labels_dim = 1
        self.numel = fl.labels.shape[1] + 1
        self.hparams = hparams
        self.mode = mode
        self.normalise_labels = fl.normalise_labels
        self.labels_scaler = fl.labels_scaler
        features_in = Input(shape=(self.features_dim, ),
                            name='main_features_c_input')

        # Selection of model
        if mode == 'ann':
            model = ann(self.features_dim, self.labels_dim, self.hparams)
            x = model(features_in)
            self.model = Model(inputs=features_in, outputs=x)
        elif mode == 'ann2':
            model_1 = ann(self.features_dim, 50, self.hparams)
            x = model_1(features_in)
            model_end = ann(50, 50, self.hparams)
            end = model_end(x)
            end_node = Dense(units=1,
                             activation='linear',
                             kernel_regularizer=regularizers.l1_l2(
                                 l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                             name='output_layer')(end)

            model_2 = ann(50, self.labels_dim - 1, self.hparams)

            x = model_2(x)
            self.model = Model(inputs=features_in, outputs=[end_node, x])
        elif mode == 'ann3':
            x = Dense(units=hparams['pre'],
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(0))(features_in)
            x = Dense(units=hparams['pre'],
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(1))(x)
            x = Dense(units=hparams['pre'],
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(2))(x)
            # x = BatchNormalization()(x)
            x = Dense(units=1,
                      activation='linear',
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_set_19')(x)

            self.model = Model(inputs=features_in, outputs=x)
        elif mode == 'conv1':
            x = Dense(units=hparams['pre'],
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='shared' + str(1))(features_in)
            x = Dense(units=hparams['pre'],
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(1))(x)
            #x = BatchNormalization()(x)
            x = Dense(units=19,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_set_19')(x)
            #x = BatchNormalization()(x)

            x = Reshape(target_shape=(19, 1))(x)
            x = Conv1D(filters=hparams['filters'],
                       kernel_size=3,
                       strides=1,
                       padding='same',
                       activation='relu')(x)
            #x = BatchNormalization()(x)
            x = Conv1D(filters=hparams['filters'] * 2,
                       kernel_size=3,
                       strides=1,
                       padding='same',
                       activation='relu')(x)
            x = Conv1D(filters=hparams['filters'] * 4,
                       kernel_size=3,
                       strides=1,
                       padding='same',
                       activation='relu')(x)
            #x = Permute((2,1))(x)
            #x = GlobalAveragePooling1D()(x)
            x = TimeDistributed(Dense(1, activation='linear'))(x)
            x = Reshape(target_shape=(19, ))(x)

            self.model = Model(inputs=features_in, outputs=x)

        elif mode == 'conv2':
            x = Dense(units=10,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Shared_e_' + str(1))(features_in)
            x = Dense(units=10,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Shared_e_' + str(2))(x)
            end = Dense(units=10,
                        activation=hparams['activation'],
                        kernel_regularizer=regularizers.l1_l2(
                            l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                        name='Dense_e_' + str(1))(x)
            end = Dense(units=10,
                        activation=hparams['activation'],
                        kernel_regularizer=regularizers.l1_l2(
                            l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                        name='Dense_e_' + str(2))(end)
            end_node = Dense(units=1,
                             activation='linear',
                             kernel_regularizer=regularizers.l1_l2(
                                 l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                             name='output_layer')(end)

            x = Dense(units=80,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(1))(x)
            x = Reshape(target_shape=(80, 1))(x)
            x = Conv1D(filters=8,
                       kernel_size=3,
                       strides=1,
                       padding='same',
                       activation='relu')(x)

            x = MaxPooling1D(pool_size=2)(x)
            x = Conv1D(filters=16,
                       kernel_size=3,
                       strides=1,
                       padding='same',
                       activation='relu')(x)
            x = MaxPooling1D(pool_size=2)(x)
            #x = Permute((2,1))(x)
            #x = GlobalAveragePooling1D()(x)
            x = TimeDistributed(Dense(1, activation='linear'))(x)
            x = Reshape(target_shape=(20, ))(x)

            self.model = Model(inputs=features_in, outputs=[end_node, x])

        elif mode == 'lstm':
            x = Dense(units=20,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Shared_e_' + str(1))(features_in)
            x = Dense(units=20,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Shared_e_' + str(2))(x)
            end = Dense(units=20,
                        activation=hparams['activation'],
                        kernel_regularizer=regularizers.l1_l2(
                            l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                        name='Dense_e_' + str(1))(x)
            end = Dense(units=20,
                        activation=hparams['activation'],
                        kernel_regularizer=regularizers.l1_l2(
                            l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                        name='Dense_e_' + str(2))(end)
            end_node = Dense(units=1,
                             activation='linear',
                             kernel_regularizer=regularizers.l1_l2(
                                 l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                             name='output_layer')(end)

            x = Dense(units=20,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(1))(x)
            x = Dense(units=20,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(2))(x)

            x = RepeatVector(n=20)(x)
            x = LSTM(units=30, activation='relu', return_sequences=True)(x)
            x = LSTM(units=30, activation='relu', return_sequences=True)(x)

            x = TimeDistributed(Dense(1))(x)
            x = Reshape(target_shape=(20, ))(x)
            '''
            x = Permute((2,1))(x)
            x = GlobalAveragePooling1D()(x)
            '''
            self.model = Model(inputs=features_in, outputs=[end_node, x])

        optimizer = Adam(clipnorm=1)

        self.model.compile(optimizer=optimizer, loss='mean_squared_error')
        #self.model.summary()

    def train_model(self,
                    fl,
                    i_fl,
                    save_name='mt.h5',
                    save_dir='./save/models/',
                    save_mode=False,
                    plot_name=None):
        # Training model
        training_features = fl.features_c_norm
        val_features = i_fl.features_c_norm

        if self.normalise_labels:
            training_labels = fl.labels_norm
            val_labels = i_fl.labels_norm
        else:
            training_labels = fl.labels
            val_labels = i_fl.labels

        p_features = []
        for features in training_features.tolist():
            for idx in list(range(1, self.numel)):
                p_features.append(features + [idx])

        training_features = np.array(p_features)

        training_labels = training_labels.flatten()[:, None]

        # Plotting
        if plot_name:
            p_features = []
            for features in val_features.tolist():
                for idx in list(range(1, self.numel)):
                    p_features.append(features + [idx])

            val_features = np.array(p_features)

            val_labels = val_labels.flatten()[:, None]

            history = self.model.fit(training_features,
                                     training_labels,
                                     validation_data=(val_features,
                                                      val_labels),
                                     epochs=self.hparams['epochs'],
                                     batch_size=self.hparams['batch_size'],
                                     verbose=self.hparams['verbose'])
            # Debugging check to see features and prediction
            # pprint.pprint(training_features)
            # pprint.pprint(self.model.predict(training_features))
            # pprint.pprint(training_labels)

            # summarize history for accuracy
            plt.semilogy(history.history['loss'], label=['train'])
            plt.semilogy(history.history['val_loss'], label=['test'])
            plt.plot([], [],
                     ' ',
                     label='Final train: {:.3e}'.format(
                         history.history['loss'][-1]))
            plt.plot([], [],
                     ' ',
                     label='Final val: {:.3e}'.format(
                         history.history['val_loss'][-1]))
            plt.title('model loss')
            plt.ylabel('loss')
            plt.xlabel('epoch')
            plt.legend(loc='upper right')
            plt.savefig(plot_name, bbox_inches='tight')
            plt.close()
        else:
            history = self.model.fit(training_features,
                                     training_labels,
                                     epochs=self.hparams['epochs'],
                                     batch_size=self.hparams['batch_size'],
                                     verbose=self.hparams['verbose'])

        # Saving Model
        if save_mode:
            self.model.save(save_dir + save_name)

        return self.model, history

    def eval(self, eval_fl):
        eval_features = eval_fl.features_c_norm

        predictions = []
        for features in eval_features.tolist():
            single_expt = []
            for idx in list(range(1, self.numel)):
                single_expt.append(
                    self.model.predict(np.array(features + [idx])[None,
                                                                  ...])[0][0])
            predictions.append(single_expt)

        predictions = np.array(predictions)

        if self.normalise_labels:
            mse_norm = mean_squared_error(eval_fl.labels_norm, predictions)
            mse = mean_squared_error(
                eval_fl.labels,
                self.labels_scaler.inverse_transform(predictions))
        else:
            mse = mean_squared_error(eval_fl.labels, predictions)
            mse_norm = mse
        return predictions, mse, mse_norm
train_data_gen = train_image_generator.flow_from_directory(batch_size=16,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(150,
                                                                        150),
                                                           class_mode='binary')

test_data_gen = test_image_generator.flow_from_directory(batch_size=16,
                                                         directory=test_dir,
                                                         target_size=(150,
                                                                      150),
                                                         class_mode='binary')

# 모델 학습
history = new_model.fit(train_data_gen,
                        epochs=5,
                        validation_data=test_data_gen)

new_model.save("newVGG16")

# 최종 결과 리포트
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))

from matplotlib import pyplot as plt

plt.plot(epochs, acc, 'r', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='testing acc')
Example #17
0
class JointBertModel(NLUModel):
    def __init__(self,
                 slots_num,
                 intents_num,
                 sess,
                 num_bert_fine_tune_layers=12):
        self.slots_num = slots_num
        self.intents_num = intents_num
        self.num_bert_fine_tune_layers = num_bert_fine_tune_layers

        self.model_params = {
            'slots_num': slots_num,
            'intents_num': intents_num,
            'num_bert_fine_tune_layers': num_bert_fine_tune_layers
        }

        self.build_model()
        self.compile_model()

        self.initialize_vars(sess)

    def build_model(self):

        in_id = Input(shape=(None, ), name='input_ids')
        in_mask = Input(shape=(None, ), name='input_masks')
        in_segment = Input(shape=(None, ), name='segment_ids')
        in_valid_positions = Input(shape=(None, self.slots_num),
                                   name='valid_positions')
        bert_inputs = [in_id, in_mask, in_segment, in_valid_positions]

        # the output of trained Bert
        bert_pooled_output, bert_sequence_output = BertLayer(
            n_fine_tune_layer=self.num_bert_fine_tune_layers,
            name='BertLayer')(bert_inputs)

        # add the additional layer for intent classification and slot filling
        intents_drop = Dropout(rate=0.1)(bert_pooled_output)
        intents_fc = Dense(self.intents_num,
                           activation='softmax',
                           name='intent_classifier')(intents_drop)

        slots_drop = Dropout(rate=0.1)(bert_sequence_output)
        slots_output = TimeDistributed(
            Dense(self.slots_num, activation='softmax'))(slots_drop)
        slots_output = Multiply(name='slots_tagger')(
            [slots_output, in_valid_positions])

        self.model = Model(inputs=bert_inputs,
                           outputs=[slots_output, intents_fc])

    def compile_model(self):
        optimizer = tf.keras.optimizers.Adam(lr=5e-5)
        # if the targets are one-hot labels, using 'categorical_crossentropy'; while if targets are integers, using 'sparse_categorical_crossentropy'
        losses = {
            'slots_tagger': 'sparse_categorical_crossentropy',
            'intent_classifier': 'sparse_categorical_crossentropy'
        }
        ## loss_weights: to weight the loss contributions of different model outputs.
        loss_weights = {'slots_tagger': 3.0, 'intent_classifier': 1.0}
        metrics = {'intent_classifier': 'acc'}
        self.model.compile(optimizer=optimizer,
                           loss=losses,
                           loss_weights=loss_weights,
                           metrics=metrics)
        self.model.summary()

    def fit(self, X, Y, validation_data=None, epochs=5, batch_size=32):
        X = (X[0], X[1], X[2], self.prepare_valid_positions(X[3]))
        if validation_data is not None:
            X_val, Y_val = validation_data
            validation_data = ((X_val[0], X_val[1], X_val[2],
                                self.prepare_valid_positions(X_val[3])), Y_val)

        history = self.model.fit(X,
                                 Y,
                                 validation_data=validation_data,
                                 epochs=epochs,
                                 batch_size=batch_size)

        self.visualize_metric(history.history, 'slots_tagger_loss')
        self.visualize_metric(history.history, 'intent_classifier_loss')
        self.visualize_metric(history.history, 'loss')
        self.visualize_metric(history.history, 'intent_classifier_acc')

    def prepare_valid_positions(self, in_valid_positions):
        ## the input is 2-D in_valid_position
        in_valid_positions = np.expand_dims(
            in_valid_positions,
            axis=2)  ## expand the shape of the array to axis=2
        ## 3-D in_valid_position
        in_valid_positions = np.tile(in_valid_positions,
                                     (1, 1, self.slots_num))  ##
        return in_valid_positions

    def predict_slots_intent(self,
                             x,
                             slots_vectorizer,
                             intent_vectorizer,
                             remove_start_end=True):
        valid_positions = x[3]
        x = (x[0], x[1], x[2], self.prepare_valid_positions(valid_positions))

        y_slots, y_intent = self.predict(x)

        ### get the real slot-tags using 'inverse_transform' of slots-vectorizer
        slots = slots_vectorizer.inverse_transform(y_slots, valid_positions)
        if remove_start_end:  ## remove the first '[CLS]' and the last '[SEP]' tokens.
            slots = np.array([x[1:-1] for x in slots])

        ### get the real intents using 'inverse-transform' of intents-vectorizer
        intents = np.array([
            intent_vectorizer.inverse_transform([np.argmax(y_intent[i])])[0]
            for i in range(y_intent.shape[0])
        ])
        return slots, intents

    def initialize_vars(self, sess):
        sess.run(tf.compat.v1.local_variables_initializer())
        sess.run(tf.compat.v1.global_variables_initializer())
        K.set_session(sess)

    def save(self, model_path):
        with open(os.path.join(model_path, 'params.json'), 'w') as json_file:
            json.dump(self.model_params, json_file)
        self.model.save(os.path.join(model_path, 'joint_bert_model.h5'))

    def load(load_folder_path, sess):
        with open(os.path.join(load_folder_path, 'params.json'),
                  'r') as json_file:
            model_params = json.load(json_file)

        slots_num = model_params['slots_num']
        intents_num = model_params['intents_num']
        num_bert_fine_tune_layers = model_params['num_bert_fine_tune_layers']

        new_model = JointBertModel(slots_num, intents_num, sess,
                                   num_bert_fine_tune_layers)
        new_model.model.load_weights(
            os.path.join(load_folder_path, 'joint_bert_model.h5'))
        return new_model
x = Flatten()(x)
encoded = Dense(units=10)(x)

y = Dense(units=1152, activation='relu')(encoded)
y = Reshape((3, 3, 128))(y)
y = Conv2DTranspose(filters=64, kernel_size=(3, 3), strides=(2, 2), padding='valid', name ='decoder_deconv1', activation='relu')(y)
y = Conv2DTranspose(filters=32, kernel_size=(5, 5), strides=(2, 2), padding='same', name ='decoder_deconv2', activation='relu')(y)
decoded_image = Conv2DTranspose(filters=1, kernel_size=(5, 5), strides=(2, 2), padding='same', name ='decoder_deconv3', activation='relu')(y)


CAE = Model(inputs = input_image, outputs = decoded_image, name = 'CAE')


# In[4]:


tb = TensorBoard(log_dir='logs', write_graph=True)
mc = ModelCheckpoint(filepath='models/top_weights.h5', monitor='acc', save_best_only='True', save_weights_only='True', verbose=1)
es = EarlyStopping(monitor='loss', patience=15, verbose=1)
rlr = ReduceLROnPlateau(monitor='loss')
callbacks = [tb, mc, es, rlr]
CAE.compile(optimizer='adam', loss='mse', metrics=['accuracy'])


# In[ ]:

# CAE.load_weights('models/top_weights.h5')
# CAE.save('CAE.h5')
CAE.fit(X, X, epochs=1000, batch_size=256, callbacks=callbacks)

Example #19
0
def main(batch_size=150,
         p_drop=0.4,
         latent_dim=2,
         cpl_fn='minvar',
         cpl_str=1e-3,
         n_epoch=500,
         run_iter=0,
         model_id='cnn',
         exp_name='MNIST'):


    fileid = model_id + \
        '_cf_' + cpl_fn + \
        '_cs_' + str(cpl_str) + \
        '_pd_' + str(p_drop) + \
        '_bs_' + str(batch_size) + \
        '_ld_' + str(latent_dim) + \
        '_ne_' + str(n_epoch) + \
        '_ri_' + str(run_iter)

    fileid = fileid.replace('.', '-')
    train_dat, train_lbl, val_dat, val_lbl, dir_pth = dataIO(exp_name=exp_name)

    #Architecture parameters ------------------------------
    input_dim = train_dat.shape[1]
    n_arms = 2
    fc_dim = 49

    #Model definition -------------------------------------
    M = {}
    M['in_ae'] = Input(shape=(28, 28, 1), name='in_ae')
    for i in range(n_arms):
        M['co1_ae_' + str(i)] = Conv2D(10, (3, 3),
                                       activation='relu',
                                       padding='same',
                                       name='co1_ae_' + str(i))(M['in_ae'])
        M['mp1_ae_' + str(i)] = MaxPooling2D(
            (2, 2), padding='same',
            name='mp1_ae_' + str(i))(M['co1_ae_' + str(i)])
        M['dr1_ae_' + str(i)] = Dropout(rate=p_drop, name='dr1_ae_' + str(i))(
            M['mp1_ae_' + str(i)])
        M['fl1_ae_' + str(i)] = Flatten(name='fl1_ae_' + str(i))(M['dr1_ae_' +
                                                                   str(i)])
        M['fc01_ae_' + str(i)] = Dense(fc_dim,
                                       activation='relu',
                                       name='fc01_ae_' + str(i))(M['fl1_ae_' +
                                                                   str(i)])
        M['fc02_ae_' + str(i)] = Dense(fc_dim,
                                       activation='relu',
                                       name='fc02_ae_' + str(i))(M['fc01_ae_' +
                                                                   str(i)])
        M['fc03_ae_' + str(i)] = Dense(fc_dim,
                                       activation='relu',
                                       name='fc03_ae_' + str(i))(M['fc02_ae_' +
                                                                   str(i)])

        if cpl_fn in ['mse']:
            M['ld_ae_' + str(i)] = Dense(latent_dim,
                                         activation='linear',
                                         name='ld_ae_' + str(i))(M['fc03_ae_' +
                                                                   str(i)])
        elif cpl_fn in ['mseBN', 'fullcov', 'minvar']:
            M['fc04_ae_' + str(i)] = Dense(latent_dim,
                                           activation='linear',
                                           name='fc04_ae_' + str(i))(
                                               M['fc03_ae_' + str(i)])
            M['ld_ae_' + str(i)] = BatchNormalization(
                scale=False,
                center=False,
                epsilon=1e-10,
                momentum=0.99,
                name='ld_ae_' + str(i))(M['fc04_ae_' + str(i)])

        M['fc05_ae_' + str(i)] = Dense(fc_dim,
                                       activation='relu',
                                       name='fc05_ae_' + str(i))(M['ld_ae_' +
                                                                   str(i)])
        M['fc06_ae_' + str(i)] = Dense(fc_dim,
                                       activation='relu',
                                       name='fc06_ae_' + str(i))(M['fc05_ae_' +
                                                                   str(i)])
        M['fc07_ae_' + str(i)] = Dense(fc_dim * 4,
                                       activation='relu',
                                       name='fc07_ae_' + str(i))(M['fc06_ae_' +
                                                                   str(i)])
        M['re1_ae_' + str(i)] = Reshape(
            (14, 14, 1), name='re1_ae_' + str(i))(M['fc07_ae_' + str(i)])
        M['us1_ae_' + str(i)] = UpSampling2D(
            (2, 2), name='us1_ae_' + str(i))(M['re1_ae_' + str(i)])
        M['co2_ae_' + str(i)] = Conv2D(10, (3, 3),
                                       activation='relu',
                                       padding='same',
                                       name='co2_ae_' + str(i))(M['us1_ae_' +
                                                                  str(i)])
        M['ou_ae_' + str(i)] = Conv2D(1, (3, 3),
                                      activation='sigmoid',
                                      padding='same',
                                      name='ou_ae_' + str(i))(M['co2_ae_' +
                                                                str(i)])

    cplAE = Model(inputs=M['in_ae'],
                  outputs=[M['ou_ae_' + str(i)] for i in range(n_arms)] +
                  [M['ld_ae_' + str(i)] for i in range(n_arms)])

    if cpl_fn in ['mse', 'mseBN']:
        cpl_fn_loss = mse
    elif cpl_fn == 'fullcov':
        cpl_fn_loss = fullcov
    elif cpl_fn == 'minvar':
        cpl_fn_loss = minvar

    assert type(cpl_fn)
    #Create loss dictionary
    loss_dict = {
        'ou_ae_0': mse(M['in_ae'], M['ou_ae_0']),
        'ou_ae_1': mse(M['in_ae'], M['ou_ae_1']),
        'ld_ae_0': cpl_fn_loss(M['ld_ae_0'], M['ld_ae_1']),
        'ld_ae_1': cpl_fn_loss(M['ld_ae_1'], M['ld_ae_0'])
    }

    #Loss weights dictionary
    loss_wt_dict = {
        'ou_ae_0': 1.0,
        'ou_ae_1': 1.0,
        'ld_ae_0': cpl_str,
        'ld_ae_1': cpl_str
    }

    #Add loss definitions to the model
    cplAE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict)

    #Data feed
    train_input_dict = {'in_ae': train_dat}
    val_input_dict = {'in_ae': val_dat}
    train_output_dict = {
        'ou_ae_0': train_dat,
        'ou_ae_1': train_dat,
        'ld_ae_0': np.empty((train_dat.shape[0], latent_dim)),
        'ld_ae_1': np.empty((train_dat.shape[0], latent_dim))
    }
    val_output_dict = {
        'ou_ae_0': val_dat,
        'ou_ae_1': val_dat,
        'ld_ae_0': np.empty((val_dat.shape[0], latent_dim)),
        'ld_ae_1': np.empty((val_dat.shape[0], latent_dim))
    }

    log_cb = CSVLogger(filename=dir_pth['logs'] + fileid + '.csv')

    #Train model
    cplAE.fit(train_input_dict,
              train_output_dict,
              validation_data=(val_input_dict, val_output_dict),
              batch_size=batch_size,
              initial_epoch=0,
              epochs=n_epoch,
              verbose=2,
              shuffle=True,
              callbacks=[log_cb])

    #Saving weights
    cplAE.save_weights(dir_pth['result'] + fileid + '-modelweights' + '.h5')

    matsummary = {}
    #Trained model prediction
    for i in range(n_arms):
        encoder = Model(inputs=M['in_ae'], outputs=M['ld_ae_' + str(i)])
        matsummary['z_val_' + str(i)] = encoder.predict({'in_ae': val_dat})
        matsummary['z_train_' + str(i)] = encoder.predict({'in_ae': train_dat})
    matsummary['train_lbl'] = train_lbl
    matsummary['val_lbl'] = val_lbl
    sio.savemat(dir_pth['result'] + fileid + '-summary.mat', matsummary)
    return
Example #20
0
hidden1 = LSTM(32, return_sequences=True, name='firstLSTMLayer')(visible)
hidden2 = LSTM(16, name='secondLSTMLayer',return_sequences=True)(hidden1)
#left branch decides second agent action
hiddenLeft = LSTM(10, name='leftBranch')(hidden2)
agent2 = Dense(5,activation='softmax',name='agent2classifier')(hiddenLeft)
#right branch decides third agent action
hiddenRight = LSTM(10, name='rightBranch')(hidden2)
agent3 = Dense(5,activation='softmax',name='agent3classifier')(hiddenRight)

model = Model(inputs=visible,outputs=[agent2,agent3])

model.compile(optimizer='adam',
              loss={'agent2classifier': 'categorical_crossentropy',
                    'agent3classifier': 'categorical_crossentropy'},
              metrics={'agent2classifier': ['acc'],
                        'agent3classifier': ['acc']})
print(model.summary())


history = model.fit(trainX,
                    y={'agent2classifier': trainY1,'agent3classifier':trainY2}, epochs=3000, batch_size=5000, verbose=2,
                    validation_data = (valX,
                                       {'agent2classifier': valY1,'agent3classifier':valY2}),shuffle=False)

model.save('Agent0ObsNetwork.keras')


#model = load_model("actionMultiClassNetwork.keras")


np.save("agent0obs_history.npy", history.history, allow_pickle=True)
Example #21
0
    [shared_model(left_input),
     shared_model(right_input)])
model = Model(inputs=[left_input, right_input], outputs=[malstm_distance])
model.compile(loss='mean_squared_error',
              optimizer=tf.keras.optimizers.SGD(),
              metrics=['accuracy'])
model.summary()
shared_model.summary()

batch_size = 1024 * 2
n_epoch = 50
training_start_time = time()
malstm_trained = model.fit(
    [X_train['left'], X_train['right']],
    Y_train,
    batch_size=batch_size,
    epochs=n_epoch,
    validation_data=([X_validation['left'],
                      X_validation['right']], Y_validation))
training_end_time = time()
print("Training time finished.\n%d epochs in %12.2f" %
      (n_epoch, training_end_time - training_start_time))
model.save('../data/model.h5')

#========
plt.subplot(211)
plt.plot(malstm_trained.history['acc'])
plt.plot(malstm_trained.history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
Example #22
0
def main(
    batch_size=16,
    episode_length=16,
    filters=16,
    width=64,
    height=64,
    memory_size=32,
):
    # Prevent TensorFlow from allocating all available GPU memory
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    tf.keras.backend.set_session(tf.Session(config=config))

    input_layer = Input([episode_length, width, height, 1])
    layer = input_layer

    layer = Conv3D(filters=filters,
                   kernel_size=3,
                   strides=(1, 2, 2),
                   padding="same")(layer)
    layer = Conv3D(filters=filters,
                   kernel_size=3,
                   strides=(1, 2, 2),
                   padding="same")(layer)
    layer = Conv3D(filters=filters,
                   kernel_size=3,
                   strides=(1, 2, 2),
                   padding="same")(layer)
    layer = Conv3D(filters=filters,
                   kernel_size=3,
                   strides=(1, 2, 2),
                   padding="same")(layer)
    layer = Conv3D(filters=filters,
                   kernel_size=3,
                   strides=(1, 2, 2),
                   padding="same")(layer)

    tmp_shape = layer.shape.as_list()[1:]
    code_size = tmp_shape[1] * tmp_shape[2] * tmp_shape[3]

    layer = Reshape([episode_length, code_size])(layer)
    layer = KanervaMemory(code_size=code_size, memory_size=memory_size)(layer)
    layer = Reshape(tmp_shape)(layer)

    layer = Conv3DTranspose(filters=filters,
                            kernel_size=3,
                            strides=(1, 2, 2),
                            padding="same")(layer)
    layer = Conv3DTranspose(filters=filters,
                            kernel_size=3,
                            strides=(1, 2, 2),
                            padding="same")(layer)
    layer = Conv3DTranspose(filters=filters,
                            kernel_size=3,
                            strides=(1, 2, 2),
                            padding="same")(layer)
    layer = Conv3DTranspose(filters=filters,
                            kernel_size=3,
                            strides=(1, 2, 2),
                            padding="same")(layer)
    layer = Conv3DTranspose(filters=filters,
                            kernel_size=3,
                            strides=(1, 2, 2),
                            padding="same")(layer)
    layer = Conv3DTranspose(filters=1,
                            kernel_size=1,
                            strides=1,
                            padding="same",
                            activation="sigmoid")(layer)

    output_layer = layer

    model = Model(inputs=input_layer, outputs=output_layer)

    model.compile("adam", loss="mse", metrics=["mse"])
    model.summary()

    dataset_input_tensor = tf.random.normal(
        shape=[episode_length, width, height, 1])
    dataset_input_tensor = tf.clip_by_value(dataset_input_tensor, 0.0, 1.0)
    dataset = tf.data.Dataset.from_tensors(dataset_input_tensor)
    dataset = dataset.repeat(-1)
    dataset = dataset.map(lambda x: (x, x))
    dataset = dataset.batch(batch_size)

    log_dir = "../logs/KanervaMachine/log_{}".format(int(time()))
    os.makedirs(log_dir)
    tensorboard = TensorBoard(log_dir=log_dir, update_freq="batch")

    model.fit(dataset,
              callbacks=[tensorboard],
              steps_per_epoch=500,
              epochs=100)
Example #23
0
class Kmodel:
    def __init__(self, fl, mode, hparams):
        """
        Initialises new DNN model based on input features_dim, labels_dim, hparams
        :param features_dim: Number of input feature nodes. Integer
        :param labels_dim: Number of output label nodes. Integer
        :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function.
        hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes.
        """
        self.features_dim = fl.features_c_dim
        self.labels_dim = fl.labels_dim  # Assuming that each task has only 1 dimensional output
        self.hparams = hparams
        self.mode = mode
        self.normalise_labels = fl.normalise_labels
        self.labels_scaler = fl.labels_scaler
        features_in = Input(shape=(self.features_dim, ),
                            name='main_features_c_input')

        # Selection of model
        if mode == 'ann':
            model = ann(self.features_dim, self.labels_dim, self.hparams)
            x = model(features_in)
            self.model = Model(inputs=features_in, outputs=x)
        elif mode == 'ann2':
            model_1 = ann(self.features_dim, 50, self.hparams)
            x = model_1(features_in)
            model_end = ann(50, 50, self.hparams)
            end = model_end(x)
            end_node = Dense(units=1,
                             activation='linear',
                             kernel_regularizer=regularizers.l1_l2(
                                 l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                             name='output_layer')(end)

            model_2 = ann(50, self.labels_dim - 1, self.hparams)

            x = model_2(x)
            self.model = Model(inputs=features_in, outputs=[end_node, x])
        elif mode == 'ann3':
            x = Dense(units=hparams['pre'],
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(0))(features_in)
            x = Dense(units=hparams['pre'],
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(1))(x)
            x = Dense(units=hparams['pre'],
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(2))(x)
            # x = BatchNormalization()(x)
            x = Dense(units=self.labels_dim,
                      activation='linear',
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Final')(x)

            self.model = Model(inputs=features_in, outputs=x)
        elif mode == 'conv1':
            if fl.label_type == 'gf20':
                final_dim = 20
            else:
                final_dim = 19
            x = Dense(units=hparams['pre'],
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='shared' + str(1))(features_in)
            x = Dense(units=hparams['pre'],
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(1))(x)
            #x = BatchNormalization()(x)
            x = Dense(units=final_dim,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_set_19')(x)
            #x = BatchNormalization()(x)

            x = Reshape(target_shape=(final_dim, 1))(x)
            x = Conv1D(filters=hparams['filters'],
                       kernel_size=3,
                       strides=1,
                       padding='same',
                       activation='relu')(x)
            #x = BatchNormalization()(x)
            x = Conv1D(filters=hparams['filters'] * 2,
                       kernel_size=3,
                       strides=1,
                       padding='same',
                       activation='relu')(x)
            x = Conv1D(filters=hparams['filters'] * 4,
                       kernel_size=3,
                       strides=1,
                       padding='same',
                       activation='relu')(x)
            #x = Permute((2,1))(x)
            #x = GlobalAveragePooling1D()(x)
            x = TimeDistributed(Dense(1, activation='linear'))(x)
            x = Reshape(target_shape=(final_dim, ))(x)

            self.model = Model(inputs=features_in, outputs=x)

        elif mode == 'conv2':
            x = Dense(units=10,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Shared_e_' + str(1))(features_in)
            x = Dense(units=10,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Shared_e_' + str(2))(x)
            end = Dense(units=10,
                        activation=hparams['activation'],
                        kernel_regularizer=regularizers.l1_l2(
                            l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                        name='Dense_e_' + str(1))(x)
            end = Dense(units=10,
                        activation=hparams['activation'],
                        kernel_regularizer=regularizers.l1_l2(
                            l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                        name='Dense_e_' + str(2))(end)
            end_node = Dense(units=1,
                             activation='linear',
                             kernel_regularizer=regularizers.l1_l2(
                                 l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                             name='output_layer')(end)

            x = Dense(units=80,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(1))(x)
            x = Reshape(target_shape=(80, 1))(x)
            x = Conv1D(filters=8,
                       kernel_size=3,
                       strides=1,
                       padding='same',
                       activation='relu')(x)

            x = MaxPooling1D(pool_size=2)(x)
            x = Conv1D(filters=16,
                       kernel_size=3,
                       strides=1,
                       padding='same',
                       activation='relu')(x)
            x = MaxPooling1D(pool_size=2)(x)
            #x = Permute((2,1))(x)
            #x = GlobalAveragePooling1D()(x)
            x = TimeDistributed(Dense(1, activation='linear'))(x)
            x = Reshape(target_shape=(20, ))(x)

            self.model = Model(inputs=features_in, outputs=[end_node, x])

        elif mode == 'lstm':
            x = Dense(units=20,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Shared_e_' + str(1))(features_in)
            x = Dense(units=20,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Shared_e_' + str(2))(x)
            end = Dense(units=20,
                        activation=hparams['activation'],
                        kernel_regularizer=regularizers.l1_l2(
                            l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                        name='Dense_e_' + str(1))(x)
            end = Dense(units=20,
                        activation=hparams['activation'],
                        kernel_regularizer=regularizers.l1_l2(
                            l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                        name='Dense_e_' + str(2))(end)
            end_node = Dense(units=1,
                             activation='linear',
                             kernel_regularizer=regularizers.l1_l2(
                                 l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                             name='output_layer')(end)

            x = Dense(units=20,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(1))(x)
            x = Dense(units=20,
                      activation=hparams['activation'],
                      kernel_regularizer=regularizers.l1_l2(
                          l1=hparams['reg_l1'], l2=hparams['reg_l2']),
                      name='Pre_' + str(2))(x)

            x = RepeatVector(n=20)(x)
            x = LSTM(units=30, activation='relu', return_sequences=True)(x)
            x = LSTM(units=30, activation='relu', return_sequences=True)(x)

            x = TimeDistributed(Dense(1))(x)
            x = Reshape(target_shape=(20, ))(x)
            '''
            x = Permute((2,1))(x)
            x = GlobalAveragePooling1D()(x)
            '''
            self.model = Model(inputs=features_in, outputs=[end_node, x])

        optimizer = Adam(learning_rate=hparams['learning_rate'], clipnorm=1)

        def weighted_mse(y_true, y_pred):
            loss_weights = np.sqrt(np.arange(1, 20))
            #loss_weights = np.arange(1, 20)
            return K.mean(K.square(y_pred - y_true) * loss_weights, axis=-1)

        def haitao_error(y_true, y_pred):
            diff = K.abs(
                (y_true - y_pred) /
                K.reshape(K.clip(K.abs(y_true[:, -1]), K.epsilon(), None),
                          (-1, 1)))
            return 100. * K.mean(diff, axis=-1)

        if hparams['loss'] == 'mape':
            self.model.compile(optimizer=optimizer,
                               loss=MeanAbsolutePercentageError())
        elif hparams['loss'] == 'haitao':
            self.model.compile(optimizer=optimizer, loss=haitao_error)
        elif hparams['loss'] == 'mse':
            self.model.compile(optimizer=optimizer, loss='mean_squared_error')
        #self.model.summary()

    def train_model(self,
                    fl,
                    i_fl,
                    save_name='mt.h5',
                    save_dir='./save/models/',
                    save_mode=False,
                    plot_name=None):
        # Training model
        training_features = fl.features_c_norm
        val_features = i_fl.features_c_norm
        if self.normalise_labels:
            training_labels = fl.labels_norm
            val_labels = i_fl.labels_norm
        else:
            training_labels = fl.labels
            val_labels = i_fl.labels

        # Plotting
        if plot_name:
            history = self.model.fit(training_features,
                                     training_labels,
                                     validation_data=(val_features,
                                                      val_labels),
                                     epochs=self.hparams['epochs'],
                                     batch_size=self.hparams['batch_size'],
                                     verbose=self.hparams['verbose'])
            # Debugging check to see features and prediction
            # pprint.pprint(training_features)
            # pprint.pprint(self.model.predict(training_features))
            # pprint.pprint(training_labels)

            # summarize history for accuracy
            plt.semilogy(history.history['loss'], label=['train'])
            plt.semilogy(history.history['val_loss'], label=['test'])
            plt.plot([], [],
                     ' ',
                     label='Final train: {:.3e}'.format(
                         history.history['loss'][-1]))
            plt.plot([], [],
                     ' ',
                     label='Final val: {:.3e}'.format(
                         history.history['val_loss'][-1]))
            plt.title('model loss')
            plt.ylabel('loss')
            plt.xlabel('epoch')
            plt.legend(loc='upper right')
            plt.savefig(plot_name, bbox_inches='tight')
            plt.close()
        else:
            history = self.model.fit(training_features,
                                     training_labels,
                                     epochs=self.hparams['epochs'],
                                     batch_size=self.hparams['batch_size'],
                                     verbose=self.hparams['verbose'])

        # Saving Model
        if save_mode:
            self.model.save(save_dir + save_name)

        return self.model, history

    def eval(self, eval_fl):
        features = eval_fl.features_c_norm
        predictions = self.model.predict(features)
        if self.normalise_labels:
            mse_norm = mean_squared_error(eval_fl.labels_norm, predictions)
            mse = mean_squared_error(
                eval_fl.labels,
                self.labels_scaler.inverse_transform(predictions))
        else:
            mse = mean_squared_error(eval_fl.labels, predictions)
            mse_norm = mse
        return predictions, mse, mse_norm
Example #24
0
# conv_3 = MaxPooling2D(pool_size=2, strides=2)(conv_3)
#
# merged = concatenate([conv_2, conv_3], axis=1)
# net = Flatten()(merged)
# net = Dense(128, activation='relu')(net)
# net = Dense(num_classes, activation='softmax')(net)
#
# outputs = net

# Model Compilation
model = Model(inputs=inputs, outputs=outputs)

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

# Training
model.fit(x=data.train.images, y=data.train.labels, epochs=1, batch_size=128)

# Evaluation
result = model.evaluate(x=data.test.images, y=data.test.labels)

for name, value in zip(model.metrics_names, result):
    print(name, value)

y_pred = model.predict(x=data.test.images)
print(y_pred)
cls_pred = np.argmax(y_pred, axis=1)


def plot_example_errors(cls_pred):
    incorrect = (cls_pred != data.test.cls)
    images = data.test.images[incorrect]
Example #25
0
class MTmodel:
    def __init__(self, fl, mode, hparams, labels_norm=True):
        """
        Initialises new DNN model based on input features_dim, labels_dim, hparams
        :param features_dim: Number of input feature nodes. Integer
        :param labels_dim: Number of output label nodes. Integer
        :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function.
        hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes.
        """
        self.features_dim = fl.features_c_dim
        self.labels_dim = [
            1 for _ in range(fl.labels_dim)
        ]  # Assuming that each task has only 1 dimensional output
        self.hparams = hparams
        self.labels_norm = labels_norm
        features_in = Input(shape=(self.features_dim, ),
                            name='main_features_c_input')

        # Selection of model
        if mode == 'hps':
            hps_model = hps(self.features_dim, self.labels_dim, self.hparams)
            x = hps_model(features_in)
        elif mode == 'cs':
            cs_model = cross_stitch(self.features_dim, self.labels_dim,
                                    self.hparams)
            x = cs_model(features_in)

        self.model = Model(inputs=features_in, outputs=x)
        self.model.compile(optimizer=hparams['optimizer'],
                           loss='mean_squared_error')

    def train_model(self,
                    fl,
                    i_fl,
                    save_name='mt.h5',
                    save_dir='./save/models/',
                    save_mode=False,
                    plot_name=None):
        # Training model
        training_features = fl.features_c_norm
        if self.labels_norm:
            training_labels = fl.labels_norm.T.tolist()
        else:
            training_labels = fl.labels.T.tolist()

        if plot_name:
            history = self.model.fit(training_features,
                                     training_labels,
                                     epochs=self.hparams['epochs'],
                                     batch_size=self.hparams['batch_size'],
                                     verbose=self.hparams['verbose'])
            # Debugging check to see features and prediction
            # pprint.pprint(training_features)
            # pprint.pprint(self.model.predict(training_features))
            # pprint.pprint(training_labels)
            # Saving Model
            # summarize history for accuracy
            plt.plot(history.history['loss'])
            plt.title('model loss')
            plt.ylabel('loss')
            plt.xlabel('epoch')
            plt.legend(['train'], loc='upper left')
            plt.savefig(plot_name, bbox_inches='tight')
            plt.close()
        else:
            self.model.fit(training_features,
                           training_labels,
                           epochs=self.hparams['epochs'],
                           batch_size=self.hparams['batch_size'],
                           verbose=self.hparams['verbose'])

        if save_mode:
            self.model.save(save_dir + save_name)

        return self.model

    def eval(self, eval_fl):
        features = eval_fl.features_c_norm
        if self.labels_norm:
            labels = eval_fl.labels_norm.tolist()
            labels_actual = eval_fl.labels.tolist()
            predictions = self.model.predict(features)
            predictions = [prediction.T for prediction in predictions]
            predictions = np.vstack(predictions).T
            predictions = predictions.tolist()
            predictions_actual = eval_fl.labels_scaler.inverse_transform(
                predictions)
            # Calculating metrics
            mse = mean_squared_error(labels_actual, predictions_actual)
            mse_norm = mean_squared_error(labels, predictions)
        else:
            labels = eval_fl.labels.tolist()
            predictions = self.model.predict(features)
            predictions = [prediction.T for prediction in predictions]
            predictions = np.vstack(predictions).T
            predictions_actual = predictions.tolist()
            mse = mean_squared_error(labels, predictions_actual)
            mse_norm = mse
        return predictions_actual, mse, mse_norm
Example #26
0
    def fit(self,
            learning_rate=1e-4,
            epochs=5,
            activation='relu',
            dropout=0,
            hidden_size=1024,
            nb_layers=1,
            include_class_weight=False,
            batch_size=20,
            save_model=False,
            verbose=True,
            fine_tuning=False,
            NB_IV3_LAYERS_TO_FREEZE=279,
            use_TPU=False,
            transfer_model='Inception',
            min_accuracy=None,
            extract_SavedModel=False):

        #read the tfrecords data
        TRAIN_DATA = tf.data.TFRecordDataset(['train.tfrecord'])
        VAL_DATA = tf.data.TFRecordDataset(['val.tfrecord'])
        print('Read the TFrecords')

        if transfer_model in ['Inception', 'Xception', 'Inception_Resnet']:
            target_size = (299, 299)
        else:
            target_size = (224, 224)

        #We expect the classes to be the name of the folders in the training set
        self.categories = os.listdir(TRAIN_DIR)
        """
        helper functions to load tfrecords. Strongly inspired by
        https://colab.research.google.com/github/GoogleCloudPlatform/training-data-analyst/blob/master/courses/fast-and-lean-data-science/07_Keras_Flowers_TPU_playground.ipynb#scrollTo=LtAVr-4CP1rp
        """
        def read_tfrecord(example):
            features = {
                "image": tf.FixedLenFeature(
                    (), tf.string),  # tf.string means byte string
                "label": tf.FixedLenFeature((), tf.int64)
            }
            example = tf.parse_single_example(example, features)
            image = tf.image.decode_jpeg(example['image'])
            image = tf.cast(
                image,
                tf.float32) / 255.0  # convert image to floats in [0, 1] range
            image = tf.image.resize_images(
                image,
                size=[*target_size],
                method=tf.image.ResizeMethod.BILINEAR)
            feature = tf.reshape(image, [*target_size, 3])
            label = tf.cast(example['label'], tf.int32)  # byte string
            target = tf.one_hot(label, len(self.categories))
            return feature, target

        def get_training_dataset():
            dataset = TRAIN_DATA.map(read_tfrecord)
            dataset = dataset.cache()
            dataset = dataset.repeat()
            dataset = dataset.shuffle(1000)
            dataset = dataset.batch(
                batch_size,
                drop_remainder=True)  # drop_remainder needed on TPU
            dataset = dataset.prefetch(
                -1
            )  # prefetch next batch while training (-1: autotune prefetch buffer size)
            return dataset

        def get_validation_dataset():
            dataset = VAL_DATA.map(read_tfrecord)
            dataset = dataset.cache()
            dataset = dataset.repeat()
            dataset = dataset.shuffle(1000)
            dataset = dataset.batch(
                batch_size,
                drop_remainder=True)  # drop_remainder needed on TPU
            dataset = dataset.prefetch(
                -1
            )  # prefetch next batch while training (-1: autotune prefetch buffer size)
            return dataset

        #if we want stop training when no sufficient improvement in accuracy has been achieved
        if min_accuracy is not None:
            callback = EarlyStopping(monitor='categorical_accuracy',
                                     baseline=min_accuracy)
            callback = [callback]
        else:
            callback = None

        #load the pretrained model, without the classification (top) layers
        if transfer_model == 'Xception':
            base_model = Xception(weights='imagenet',
                                  include_top=False,
                                  input_shape=(299, 299, 3))
        elif transfer_model == 'Inception_Resnet':
            base_model = InceptionResNetV2(weights='imagenet',
                                           include_top=False,
                                           input_shape=(299, 299, 3))
        elif transfer_model == 'Resnet':
            base_model = ResNet50(weights='imagenet',
                                  include_top=False,
                                  input_shape=(224, 224, 3))
        else:
            base_model = InceptionV3(weights='imagenet',
                                     include_top=False,
                                     input_shape=(299, 299, 3))

        #Add the classification layers using Keras functional API
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        for _ in range(nb_layers):
            x = Dense(hidden_size, activation=activation)(
                x)  #Hidden layer for classification
            if dropout > 0:
                x = Dropout(rate=dropout)(x)

        predictions = Dense(len(self.categories),
                            activation='softmax')(x)  #Output layer
        model = Model(inputs=base_model.input, outputs=predictions)

        #Set only the top layers as trainable (if we want to do fine-tuning,
        #we can train the base layers as a second step)
        for layer in base_model.layers:
            layer.trainable = False

        #Define the optimizer and the loss, and compile the model
        loss = 'categorical_crossentropy'
        if use_TPU:
            #if we want to try out the TPU, it looks like we currently need to use
            #tensorflow optimizers...see https://stackoverflow.com/questions/52940552/valueerror-operation-utpu-140462710602256-varisinitializedop-has-been-marked
            #...and https://www.youtube.com/watch?v=jgNwywYcH4w
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            tpu_optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
            model.compile(optimizer=tpu_optimizer,
                          loss=loss,
                          metrics=['categorical_accuracy'])

            TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
            model = tf.contrib.tpu.keras_to_tpu_model(
                model,
                strategy=tf.contrib.tpu.TPUDistributionStrategy(
                    tf.contrib.cluster_resolver.TPUClusterResolver(
                        TPU_WORKER)))
            tf.logging.set_verbosity(tf.logging.INFO)

        else:
            optimizer = Adam(lr=learning_rate)
            model.compile(optimizer=optimizer,
                          loss=loss,
                          metrics=['categorical_accuracy'])

        #if we want to weight the classes given the imbalanced number of images
        if include_class_weight:
            from sklearn.utils.class_weight import compute_class_weight
            cls_train = self.categories
            class_weight = compute_class_weight(class_weight='balanced',
                                                classes=np.unique(cls_train),
                                                y=cls_train)
        else:
            class_weight = None

        steps_per_epoch = int(
            sum([
                len(files)
                for r, d, files in os.walk(parentdir +
                                           '/data/image_dataset/train')
            ]) / batch_size)
        validation_steps = int(
            sum([
                len(files)
                for r, d, files in os.walk(parentdir +
                                           '/data/image_dataset/val')
            ]) / batch_size)

        #Fit the model
        if use_TPU:
            history = model.fit(get_training_dataset,
                                steps_per_epoch=steps_per_epoch,
                                epochs=epochs,
                                validation_data=get_validation_dataset,
                                validation_steps=validation_steps,
                                verbose=verbose,
                                callbacks=callback,
                                class_weight=class_weight)
        else:
            history = model.fit(get_training_dataset(),
                                steps_per_epoch=steps_per_epoch,
                                epochs=epochs,
                                validation_data=get_validation_dataset(),
                                validation_steps=validation_steps,
                                verbose=verbose,
                                callbacks=callback,
                                class_weight=class_weight)

        #Fine-tune the model, if we wish so
        if fine_tuning and not model.stop_training:
            print('============')
            print('Begin fine-tuning')
            print('============')

            #declare the first layers as trainable
            for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]:
                layer.trainable = False
            for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]:
                layer.trainable = True

            model.compile(optimizer=Adam(lr=learning_rate * 0.1),
                          loss=loss,
                          metrics=['categorical_accuracy'])

            #Fit the model
            if use_TPU:
                history = model.fit(get_training_dataset,
                                    steps_per_epoch=steps_per_epoch,
                                    epochs=epochs,
                                    validation_data=get_validation_dataset,
                                    validation_steps=validation_steps,
                                    verbose=verbose,
                                    callbacks=callback,
                                    class_weight=class_weight)
            else:
                history = model.fit(get_training_dataset(),
                                    steps_per_epoch=steps_per_epoch,
                                    epochs=epochs,
                                    validation_data=get_validation_dataset(),
                                    validation_steps=validation_steps,
                                    verbose=verbose,
                                    callbacks=callback,
                                    class_weight=class_weight)

        #Evaluate the model, just to be sure
        self.fitness = history.history['val_categorical_accuracy'][-1]

        #Save the model
        if save_model:
            if not os.path.exists(parentdir + '/data/trained_models'):
                os.makedirs(parentdir + '/data/trained_models')
            model.save(parentdir + '/data/trained_models/trained_model.h5')
            print('Model saved!')

        #save model in production format
        if extract_SavedModel:
            export_path = "./image_classifier/1/"

            with K.get_session() as sess:
                tf.saved_model.simple_save(
                    sess,
                    export_path,
                    inputs={'input_image': model.input},
                    outputs={t.name: t
                             for t in model.outputs})

        else:
            self.model = model
            del history
            del model
Example #27
0
class JointBertCRFModel(JointBertModel):
    

    def __init__(self, slots_num, intents_num, bert_hub_path, sess, num_bert_fine_tune_layers=10,
                 is_bert=True, is_crf=True, learning_rate=5e-5):
        super(JointBertCRFModel, self).__init__(slots_num, intents_num, bert_hub_path, sess, 
             num_bert_fine_tune_layers, is_bert, is_crf, learning_rate)
        
        
    def compile_model(self):
        # Instead of `using categorical_crossentropy`, 
        # we use `sparse_categorical_crossentropy`, which does expect integer targets.
        
        optimizer = tf.keras.optimizers.Adam(lr=self.learning_rate)

        losses = {
        	'slots_tagger': self.crf.loss,
        	'intent_classifier': 'sparse_categorical_crossentropy',
        }
        loss_weights = {'slots_tagger': 3.0, 'intent_classifier': 1.0}
        metrics = {'intent_classifier': 'acc'}
        self.model.compile(optimizer=optimizer, loss=losses, loss_weights=loss_weights, metrics=metrics)
        self.model.summary()
        

    def build_model(self):
        in_id = Input(shape=(None,), name='input_ids')
        in_mask = Input(shape=(None,), name='input_masks')
        in_segment = Input(shape=(None,), name='segment_ids')
        in_valid_positions = Input(shape=(None, self.slots_num), name='valid_positions')
        sequence_lengths = Input(shape=(1), dtype='int32', name='sequence_lengths')
        
        bert_inputs = [in_id, in_mask, in_segment, in_valid_positions]
        
        if self.is_bert:
            bert_pooled_output, bert_sequence_output = BertLayer(
                n_fine_tune_layers=self.num_bert_fine_tune_layers,
                bert_path=self.bert_hub_path,
                pooling='mean', name='BertLayer')(bert_inputs)
        else:
            bert_pooled_output, bert_sequence_output = AlbertLayer(
                fine_tune=True if self.num_bert_fine_tune_layers > 0 else False,
                albert_path=self.bert_hub_path,
                pooling='mean', name='AlbertLayer')(bert_inputs)
        
        intents_fc = Dense(self.intents_num, activation='softmax', name='intent_classifier')(bert_pooled_output)
        
        self.crf = CRFLayer(name='slots_tagger')
        slots_output = self.crf(inputs=[bert_sequence_output, sequence_lengths])
        
        self.model = Model(inputs=bert_inputs + [sequence_lengths], outputs=[slots_output, intents_fc])

        
    def fit(self, X, Y, validation_data=None, epochs=5, batch_size=32):
        """
        X: batch of [input_ids, input_mask, segment_ids, valid_positions]
        """
        X = (X[0], X[1], X[2], self.prepare_valid_positions(X[3]), X[4])
        if validation_data is not None:
            X_val, Y_val = validation_data
            validation_data = ((X_val[0], X_val[1], X_val[2], 
                                self.prepare_valid_positions(X_val[3]), X_val[4]), Y_val)
        
        self.model.fit(X, Y, validation_data=validation_data, 
                                 epochs=epochs, batch_size=batch_size)

        
        
    def predict_slots_intent(self, x, slots_vectorizer, intent_vectorizer, remove_start_end=True):
        valid_positions = x[3]
        x = (x[0], x[1], x[2], self.prepare_valid_positions(valid_positions), x[4])
        y_slots, y_intent = self.predict(x)
        slots = slots_vectorizer.inverse_transform(y_slots, valid_positions)
        if remove_start_end:
            slots = [x[1:-1] for x in slots]
            
        intents = np.array([intent_vectorizer.inverse_transform([np.argmax(y_intent[i])])[0] for i in range(y_intent.shape[0])])
        return slots, intents
    

    def save(self, model_path):
        with open(os.path.join(model_path, 'params.json'), 'w') as json_file:
            json.dumps(self.model_params, json_file, indent=2)
        self.model.save(os.path.join(model_path, 'joint_bert_crf_model.h5'))
        
        
    def load(load_folder_path, sess):
        with open(os.path.join(load_folder_path, 'params.json'), 'r') as json_file:
            model_params = json.load(json_file)
            
        slots_num = model_params['slots_num'] 
        intents_num = model_params['intents_num']
        bert_hub_path = model_params['bert_hub_path']
        num_bert_fine_tune_layers = model_params['num_bert_fine_tune_layers']
        is_bert = model_params['is_bert']
        if 'is_crf' in model_params:
            is_crf = model_params['is_crf']
        else:
            is_crf = True
        if 'learning_rate' in model_params:
            learning_rate = model_params['learning_rate']
        else:
            learning_rate = 5e-5
            
        new_model = JointBertCRFModel(slots_num, intents_num, bert_hub_path, sess, num_bert_fine_tune_layers, is_bert, is_crf, learning_rate)
        new_model.model.load_weights(os.path.join(load_folder_path,'joint_bert_crf_model.h5'))

        return new_model
Example #28
0
def main(cvset=0,
         n_features=5000,
         batch_size=1000,
         p_drop=0.5,
         latent_dim=2,
         n_epoch=5000,
         run_iter=0,
         exp_name='nagent',
         model_id='nagent_model'):
    train_dict, val_dict, full_dict, dir_pth = dataIO(cvset=0,
                                                      n_features=n_features,
                                                      exp_name=exp_name,
                                                      train_size=25000)

    #Architecture parameters ------------------------------
    input_dim = train_dict['X'].shape[1]
    print(input_dim)
    fc_dim = 50

    fileid = model_id + \
        '_cv_' + str(cvset) + \
        '_ng_' + str(n_features) + \
        '_pd_' + str(p_drop) + \
        '_bs_' + str(batch_size) + \
        '_ld_' + str(latent_dim) + \
        '_ne_' + str(n_epoch) + \
        '_ri_' + str(run_iter)
    fileid = fileid.replace('.', '-')
    print(fileid)

    n_agents = 1
    #Model definition -----------------------------------------------
    M = {}
    M['in_ae'] = Input(shape=(input_dim, ), name='in_ae')
    M['mask_ae'] = Input(shape=(input_dim, ), name='mask_ae')
    for i in range(n_agents):

        M['dr_ae_' + str(i)] = Dropout(p_drop,
                                       name='dr_ae_' + str(i))(M['in_ae'])
        M['fc01_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc01_ae_' + str(i))(M['dr_ae_' +
                                                                   str(i)])
        M['fc02_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc02_ae_' + str(i))(M['fc01_ae_' +
                                                                   str(i)])
        M['fc03_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc03_ae_' + str(i))(M['fc02_ae_' +
                                                                   str(i)])
        M['fc04_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc04_ae_' + str(i))(M['fc03_ae_' +
                                                                   str(i)])
        M['fc05_ae_' + str(i)] = Dense(latent_dim,
                                       activation='linear',
                                       name='fc05_ae_' + str(i))(M['fc04_ae_' +
                                                                   str(i)])
        M['ld_ae_' + str(i)] = BatchNormalization(scale=False,
                                                  center=False,
                                                  epsilon=1e-10,
                                                  momentum=0.,
                                                  name='ld_ae_' + str(i))(
                                                      M['fc05_ae_' + str(i)])

        M['fc06_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc06_ae_' + str(i))(M['ld_ae_' +
                                                                   str(i)])
        M['fc07_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc07_ae_' + str(i))(M['fc06_ae_' +
                                                                   str(i)])
        M['fc08_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc08_ae_c' + str(i))(
                                           M['fc07_ae_' + str(i)])
        M['fc09_ae_' + str(i)] = Dense(fc_dim,
                                       activation='elu',
                                       name='fc09_ae_' + str(i))(M['fc08_ae_' +
                                                                   str(i)])
        M['ou_ae_' + str(i)] = Dense(input_dim,
                                     activation='linear',
                                     name='ou_ae_' + str(i))(M['fc09_ae_' +
                                                               str(i)])

    AE = Model(inputs=[M['in_ae'], M['mask_ae']],
               outputs=[M['ou_ae_' + str(i)] for i in range(n_agents)])

    def masked_mse(X, Y, mask):
        loss_val = tf.reduce_mean(
            tf.multiply(tf.math.squared_difference(X, Y), mask))

        def masked_loss(y_true, y_pred):
            return loss_val

        return masked_loss

    #Create loss dictionary
    loss_dict = {
        'ou_ae_' + str(i): masked_mse(M['in_ae'], M['ou_ae_0'], M['mask_ae'])
        for i in range(n_agents)
    }

    #Loss weights dictionary
    loss_wt_dict = {'ou_ae_' + str(i): 1.0 for i in range(n_agents)}

    #Add loss definitions to the model
    AE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict)

    #Custom logging
    cb_obj = CSVLogger(filename=dir_pth['logs'] + fileid + '.csv')

    train_input_dict = {
        'in_ae': train_dict['X'],
        'mask_ae': train_dict['mask']
    }
    train_output_dict = {
        'ou_ae_' + str(i): train_dict['X']
        for i in range(n_agents)
    }

    val_input_dict = {'in_ae': val_dict['X'], 'mask_ae': val_dict['mask']}
    val_output_dict = {
        'ou_ae_' + str(i): val_dict['X']
        for i in range(n_agents)
    }

    #Model training
    start_time = timeit.default_timer()
    AE.fit(train_input_dict,
           train_output_dict,
           batch_size=batch_size,
           initial_epoch=0,
           epochs=n_epoch,
           validation_data=(val_input_dict, val_output_dict),
           verbose=2,
           callbacks=[cb_obj])

    elapsed = timeit.default_timer() - start_time

    print('-------------------------------')
    print('Training time:', elapsed)
    print('-------------------------------')

    #Save weights
    AE.save_weights(dir_pth['result'] + fileid + '-modelweights' + '.h5')

    #Generate summaries
    summary = {}
    for i in range(n_agents):
        encoder = Model(inputs=M['in_ae'], outputs=M['ld_ae_' + str(i)])
        summary['z'] = encoder.predict(full_dict['X'])

    sio.savemat(dir_pth['result'] + fileid + '-summary.mat', summary)
    return
    def fit(self,
            learning_rate=1e-4,
            epochs=5,
            activation='relu',
            dropout=0,
            hidden_size=1024,
            nb_layers=1,
            include_class_weight=False,
            batch_size=20,
            save_model=False,
            verbose=True,
            fine_tuning=False,
            NB_IV3_LAYERS_TO_FREEZE=279,
            use_TPU=False,
            transfer_model='Inception',
            min_accuracy=None,
            extract_SavedModel=False):

        if transfer_model in ['Inception', 'Xception', 'Inception_Resnet']:
            target_size = (299, 299)
        else:
            target_size = (224, 224)

        #We expect the classes to be the name of the folders in the training set
        self.categories = os.listdir(TRAIN_DIR)
        """
        helper functions to to build tensors
        inspired by https://www.tensorflow.org/tutorials/load_data/images
        """
        def prepare_image(img_path):
            #reshape the image
            image = Image.open(img_path)
            image = image.resize(target_size,
                                 PIL.Image.BILINEAR).convert("RGB")
            #convert the image into a numpy array, and expend to a size 4 tensor
            image = img_to_array(image)
            #rescale the pixels to a 0-1 range
            image = image.astype(np.float32) / 255
            return image

        def generate_tuples(img_folder):
            #loop through all the images
            # Get all file names of images present in folder
            classes = os.listdir(img_folder)
            classes_paths = [
                os.path.abspath(os.path.join(img_folder, i)) for i in classes
            ]
            x = []
            y = []

            for i, j in enumerate(classes):
                #for all the classes, get the list of pictures
                img_paths = os.listdir(classes_paths[i])
                img_paths = [
                    os.path.abspath(os.path.join(classes_paths[i], x))
                    for x in img_paths
                ]

                for img_path in img_paths:
                    x.append(prepare_image(img_path))
                    y = y + [i]

            return (np.array(x), np.array(y).astype(np.int32))

        #get training data
        (x_train,
         y_train) = generate_tuples(parentdir + '/data/image_dataset/train')
        (x_val, y_val) = generate_tuples(parentdir + '/data/image_dataset/val')

        #train input_function: see https://colab.research.google.com/drive/1F8txK1JLXKtAkcvSRQz2o7NSTNoksuU2#scrollTo=abbwQQfH0td3
        def get_training_dataset(batch_size=batch_size):
            # Convert the inputs to a Dataset.
            dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))

            # Shuffle, repeat, and batch the examples.
            dataset = dataset.shuffle(1000).repeat().batch(batch_size,
                                                           drop_remainder=True)

            return dataset

        def get_validation_dataset(batch_size=batch_size):
            # Convert the inputs to a Dataset.
            dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))

            # Shuffle, repeat, and batch the examples.
            dataset = dataset.shuffle(1000).repeat().batch(batch_size,
                                                           drop_remainder=True)

            return dataset

        #if we want stop training when no sufficient improvement in accuracy has been achieved
        if min_accuracy is not None:
            callback = EarlyStopping(monitor='acc', baseline=min_accuracy)
            callback = [callback]
        else:
            callback = None

        #load the pretrained model, without the classification (top) layers
        if transfer_model == 'Xception':
            base_model = Xception(weights='imagenet',
                                  include_top=False,
                                  input_shape=(299, 299, 3))
        elif transfer_model == 'Inception_Resnet':
            base_model = InceptionResNetV2(weights='imagenet',
                                           include_top=False,
                                           input_shape=(299, 299, 3))
        elif transfer_model == 'Resnet':
            base_model = ResNet50(weights='imagenet',
                                  include_top=False,
                                  input_shape=(224, 224, 3))
        else:
            base_model = InceptionV3(weights='imagenet',
                                     include_top=False,
                                     input_shape=(299, 299, 3))

        #Add the classification layers using Keras functional API
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        for _ in range(nb_layers):
            x = Dense(hidden_size, activation=activation)(
                x)  #Hidden layer for classification
            if dropout > 0:
                x = Dropout(rate=dropout)(x)

        predictions = Dense(len(self.categories),
                            activation='softmax')(x)  #Output layer
        model = Model(inputs=base_model.input, outputs=predictions)

        #Set only the top layers as trainable (if we want to do fine-tuning,
        #we can train the base layers as a second step)
        for layer in base_model.layers:
            layer.trainable = False

        #Define the optimizer and the loss, and compile the model
        loss = 'sparse_categorical_crossentropy'
        if use_TPU:
            #if we want to try out the TPU, it looks like we currently need to use
            #tensorflow optimizers...see https://stackoverflow.com/questions/52940552/valueerror-operation-utpu-140462710602256-varisinitializedop-has-been-marked
            #...and https://www.youtube.com/watch?v=jgNwywYcH4w
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            model.compile(optimizer=optimizer,
                          loss=sparse_softmax_cross_entropy,
                          metrics=['acc'])

            TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
            model = tf.contrib.tpu.keras_to_tpu_model(
                model,
                strategy=tf.contrib.tpu.TPUDistributionStrategy(
                    tf.contrib.cluster_resolver.TPUClusterResolver(
                        TPU_WORKER)))
            tf.logging.set_verbosity(tf.logging.INFO)

        else:
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            model.compile(optimizer=optimizer, loss=loss, metrics=['acc'])

        #if we want to weight the classes given the imbalanced number of images
        if include_class_weight:
            from sklearn.utils.class_weight import compute_class_weight
            cls_train = self.categories
            class_weight = compute_class_weight(class_weight='balanced',
                                                classes=np.unique(cls_train),
                                                y=cls_train)
        else:
            class_weight = None

        steps_per_epoch = int(
            sum([
                len(files)
                for r, d, files in os.walk(parentdir +
                                           '/data/image_dataset/train')
            ]) / batch_size)
        validation_steps = int(
            sum([
                len(files)
                for r, d, files in os.walk(parentdir +
                                           '/data/image_dataset/val')
            ]) / batch_size)

        #Fit the model
        if use_TPU:
            history = model.fit(get_training_dataset,
                                steps_per_epoch=steps_per_epoch,
                                epochs=epochs,
                                validation_data=get_validation_dataset,
                                validation_steps=validation_steps,
                                verbose=verbose,
                                callbacks=callback,
                                class_weight=class_weight)
        else:
            history = model.fit(get_training_dataset(),
                                steps_per_epoch=steps_per_epoch,
                                epochs=epochs,
                                validation_data=get_validation_dataset(),
                                validation_steps=validation_steps,
                                verbose=verbose,
                                callbacks=callback,
                                class_weight=class_weight)

        #Fine-tune the model, if we wish so
        if fine_tuning and not model.stop_training:
            print('============')
            print('Begin fine-tuning')
            print('============')

            #declare the first layers as trainable
            for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]:
                layer.trainable = False
            for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]:
                layer.trainable = True

            model.compile(optimizer=tf.train.AdamOptimizer(
                learning_rate=learning_rate * 0.1),
                          loss=loss,
                          metrics=['acc'])

            #Fit the model
            if use_TPU:
                history = model.fit(get_training_dataset,
                                    steps_per_epoch=steps_per_epoch,
                                    epochs=epochs,
                                    validation_data=get_validation_dataset,
                                    validation_steps=validation_steps,
                                    verbose=verbose,
                                    callbacks=callback,
                                    class_weight=class_weight)
            else:
                history = model.fit(get_training_dataset(),
                                    steps_per_epoch=steps_per_epoch,
                                    epochs=epochs,
                                    validation_data=get_validation_dataset(),
                                    validation_steps=validation_steps,
                                    verbose=verbose,
                                    callbacks=callback,
                                    class_weight=class_weight)

        #Evaluate the model, just to be sure
        self.fitness = history.history['val_categorical_accuracy'][-1]

        #Save the model
        if save_model:
            if not os.path.exists(parentdir + '/data/trained_models'):
                os.makedirs(parentdir + '/data/trained_models')
            model.save(parentdir + '/data/trained_models/trained_model.h5')
            print('Model saved!')

        #save model in production format
        if extract_SavedModel:
            export_path = "./image_classifier/1/"

            with K.get_session() as sess:
                tf.saved_model.simple_save(
                    sess,
                    export_path,
                    inputs={'input_image': model.input},
                    outputs={t.name: t
                             for t in model.outputs})

        else:
            self.model = model
            del history
            del model
def seq2seq_architecture(latent_size, vocabulary_size, embedding_matrix,
                         batch_size, epochs, train_article, train_summary,
                         train_target):
    # encoder
    encoder_inputs = Input(shape=(None, ), name='Encoder-Input')
    encoder_embeddings = Embedding(vocabulary_size + 1,
                                   300,
                                   weights=[embedding_matrix],
                                   trainable=False,
                                   mask_zero=True,
                                   name='Encoder-Word-Embedding')
    norm_encoder_embeddings = BatchNormalization(
        name='Encoder-Batch-Normalization')
    encoder_lstm_1 = LSTM(
        latent_size,
        name='Encoder-LSTM-1',
        return_sequences=True,
        return_state=True,
        dropout=0.2,
        recurrent_dropout=0.2,
    )
    encoder_lstm_2 = LSTM(
        latent_size,
        name='Encoder-LSTM-2',
        return_state=True,
        dropout=0.2,
        recurrent_dropout=0.2,
    )
    # the sequence of the last layer is not returned because we want a single vector that stores everything

    e = encoder_embeddings(encoder_inputs)
    e = norm_encoder_embeddings(e)
    e, e_state_h_1, e_state_c_1 = encoder_lstm_1(e)
    e, e_state_h_2, e_state_c_2 = encoder_lstm_2(
        e)  # e; the encoded fix-sized vector which seq2seq is all about
    encoder_states = [e_state_h_2, e_state_c_2]

    encoder_model = Model(inputs=encoder_inputs, outputs=encoder_states)
    # encoder_outputs = encoder_model(encoder_inputs)

    # decoder
    decoder_inputs = Input(shape=(None, ), name='Decoder-Input')
    decoder_embeddings = Embedding(vocabulary_size + 1,
                                   300,
                                   weights=[embedding_matrix],
                                   trainable=False,
                                   mask_zero=True,
                                   name='Decoder-Word-Embedding')
    norm_decoder_embeddings = BatchNormalization(
        name='Decoder-Batch-Normalization-1')
    decoder_lstm_1 = LSTM(
        latent_size,
        name='Decoder-LSTM-1',
        return_sequences=True,
        return_state=True,
        dropout=0.2,
        recurrent_dropout=0.2,
    )
    decoder_lstm_2 = LSTM(
        latent_size,
        name='Decoder-LSTM-2',
        return_sequences=True,
        return_state=True,
        dropout=0.2,
        recurrent_dropout=0.2,
    )
    norm_decoder = BatchNormalization(name='Decoder-Batch-Normalization-2')
    decoder_dense = Dense(vocabulary_size + 1,
                          activation='softmax',
                          name="Final-Output-Dense")

    d = decoder_embeddings(decoder_inputs)
    d = norm_decoder_embeddings(d)
    d, d_state_h_1, d_state_c_1 = decoder_lstm_1(d,
                                                 initial_state=encoder_states)
    d, d_state_h_2, d_state_c_2 = decoder_lstm_2(d,
                                                 initial_state=encoder_states)
    d = norm_decoder(d)
    decoder_outputs = decoder_dense(d)

    seq2seq_model = Model(inputs=[encoder_inputs, decoder_inputs],
                          outputs=decoder_outputs)
    seq2seq_model.compile(optimizer="adam",
                          loss='sparse_categorical_crossentropy',
                          metrics=['sparse_categorical_accuracy'])
    seq2seq_model.summary()

    classes = [item for sublist in train_summary.tolist() for item in sublist]
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(classes),
                                                      classes)

    e_stopping = EarlyStopping(monitor='val_loss',
                               patience=4,
                               verbose=1,
                               mode='min',
                               restore_best_weights=True)
    history = seq2seq_model.fit(x=[train_article, train_summary],
                                y=np.expand_dims(train_target, -1),
                                batch_size=batch_size,
                                epochs=epochs,
                                validation_split=0.1,
                                callbacks=[e_stopping],
                                class_weight=class_weights)

    f = open("data/models/stacked_results.txt", "w", encoding="utf-8")
    f.write("Stacked LSTM \n layers: 2 \n latent size: " + str(latent_size) +
            "\n vocab size: " + str(vocabulary_size) + "\n")
    f.close()

    history_dict = history.history
    plot_loss(history_dict)

    # inference
    decoder_initial_state_h1 = Input(shape=(latent_size, ),
                                     name='Decoder-Init-H1')
    decoder_initial_state_c1 = Input(shape=(latent_size, ),
                                     name='Decoder-Init-C1')
    decoder_initial_state_h2 = Input(shape=(latent_size, ),
                                     name='Decoder-Init-H2')
    decoder_initial_state_c2 = Input(shape=(latent_size, ),
                                     name='Decoder-Init-C2')

    i = decoder_embeddings(decoder_inputs)
    i = norm_decoder_embeddings(i)
    i, h1, c1 = decoder_lstm_1(
        i, initial_state=[decoder_initial_state_h1, decoder_initial_state_c1])
    i, h2, c2 = decoder_lstm_2(
        i, initial_state=[decoder_initial_state_h2, decoder_initial_state_c2])
    i = norm_decoder(i)
    decoder_output = decoder_dense(i)
    decoder_states = [
        h1, c1, h2, c2
    ]  # every layer keeps its own states, important at predicting

    decoder_model = Model(inputs=[decoder_inputs] + [
        decoder_initial_state_h1, decoder_initial_state_c1,
        decoder_initial_state_h2, decoder_initial_state_c2
    ],
                          outputs=[decoder_output] + decoder_states)

    return encoder_model, decoder_model