Example #1
0
def create_model(verbose=True):
    """Function which creates a good example model using transfer learning"""

    img_width, img_height = 250, 250

    if K.image_data_format() == 'channels_first':
        input_shape = (3, img_width, img_height)
    else:
        input_shape = (img_width, img_height, 3)

    # maybe pop some layers with model.layers.pop()
    base_model = applications.VGG16(include_top=False, input_shape=input_shape)
    # base_model = applications.ResNet50(include_top=False, input_shape=input_shape)

    # freeze all layers except last two
    for layer in base_model.layers[:-2]:
        layer.trainable = False

    # model topology
    top_model = Sequential()
    top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
    top_model.add(Dense(256))
    top_model.add(Activation('relu'))
    top_model.add(Dropout(0.2))
    top_model.add(Dense(120))
    top_model.add(Activation('softmax'))

    model = Model(inputs=base_model.input,
                  outputs=top_model(base_model.output))

    if verbose:
        print(model.summary())

    # functional method example 1

    # x0 = Flatten()(top_model.output)
    # x1 = Dropout(0.2)(x0)
    # x2 = Dense(units=512, activation='relu')(x1)
    # x3 = Dropout(0.5)(x2)
    # x4 = Dense(units=120, activation='softmax')(x3)

    # functional method example 2
    # x0 = Conv2D(128,(2,2),activation='relu', input_shape=top_model.output_shape[1:])\
    #                                   (top_model.output)
    # x1 = MaxPooling2D(pool_size=(2,2))(x0)
    # x2 = Flatten()(x1)
    # #x3 = Dropout(0.2)(x2)
    # x4 = Dense(units=128, activation='relu')(x2)
    # x5 = Dropout(0.5)(x4)
    # x6 = Dense(units=120, activation='softmax')(x5)

    # update to keras 2 api
    # model = Model(input=base_model.input, output=x4)
    # print(model.summary())

    # create custom metric function
    top20_acc = functools.partial(
        keras.metrics.top_k_categorical_accuracy, k=20)
    top20_acc.__name__ = 'top20_acc'

    model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(lr=0.001),
                  metrics=[metrics.categorical_accuracy, top20_acc])

    return model
Example #2
0
t = Dense(16, activation="tanh")(inputs)
t = Dense(8, activation="selu")(t)
t = Dense(8, activation="tanh")(t)
t = Dense(8, activation="tanh")(t)
t = Dense(8, activation="tanh")(t)
out_1 = Dense(1, activation="selu")(t)

t = Dense(16, activation="tanh")(inputs)
t = Dense(8, activation="selu")(t)
t = Dense(8, activation="tanh")(t)
t = Dense(8, activation="tanh")(t)
t = Dense(8, activation="tanh")(t)
out_2 = Dense(1, activation="selu")(t)

nn = Model(inputs=inputs, outputs=[out_1, out_2], name="ode_system")
nn.compile(optimizer="adam", loss=custom_loss)

t = np.random.choice(np.linspace(0, 2, 100), 20)
t = np.expand_dims(t, axis=1)

nn.fit(t, t, epochs=10000)

t = np.linspace(0, 2, 100)
nnx, nny = nn.predict(t)
nnx = nnx.flatten()
nny = nny.flatten()
x_trial = -2.0 + t * nnx
y_trial = t * nny

plt.plot(t, x(t), color="black")
plt.plot(t, y(t), color="black")
#%%

model.summary()

#%%

# Compile the model

#%%

from tensorflow.keras.optimizers import RMSprop

# compile the model
model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(lr=0.001),
              metrics=['acc'])

#%%

# Train the model

#%%

from tensorflow.keras.callbacks import ModelCheckpoint

# checkpint settings
model_checkpoint = ModelCheckpoint(folder_location + 'weights.hdf5',
                                   monitor='loss',
                                   verbose=1,
                                   save_best_only=True)
# Optional Attention Mechanisms
if config == 1:
    encoder_output, attention_weights = SelfAttention(
        size=50, num_hops=16, use_penalization=False)(encoder_output)
elif config == 2:
    encoder_output, attention_weights = Attention(
        context='many-to-one', alignment_type='global')(attention_input)
    encoder_output = Flatten()(encoder_output)
elif config == 3:
    encoder_output, attention_weights = Attention(
        context='many-to-one', alignment_type='local-p*',
        window_width=25)(attention_input)
    encoder_output = Flatten()(encoder_output)

# Prediction Layer
Y = Dense(units=vocabulary_size, activation='softmax')(encoder_output)

# Compile model
model = Model(inputs=X, outputs=Y)
model.compile(loss=loss,
              optimizer='adam',
              metrics=[perplexity, categorical_accuracy])
print(model.summary())

# Train multi-class classification model
model.fit(x=X_train,
          y=Y_train,
          validation_data=(X_test, Y_test),
          epochs=num_epochs,
          batch_size=batch_size)
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

# Flatten the output layer to 1 dimension
x = layers.Flatten()(last_output)
# Add a fully connected layer with 1,024 hidden units and ReLU activation
x = layers.Dense(1024, activation='relu')(x)
# Add a dropout rate of 0.2
x = layers.Dropout(0.2)(x)
# Add a final sigmoid layer for classification
x = layers.Dense(1, activation='sigmoid')(x)

model = Model(pre_trained_model.input, x)

model.compile(optimizer=RMSprop(lr=0.0001),
              loss='binary_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_generator,
                    validation_data=validation_generator,
                    steps_per_epoch=100,
                    epochs=20,
                    validation_steps=50,
                    verbose=2)

###########################################
#             Display Output              #
###########################################
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
               activation='relu')(layer)
layer = AveragePooling2D()(layer)
layer = Conv2D(filters=8,
               kernel_size=(3, 3),
               padding='same',
               activation='relu')(layer)
layer = Flatten()(layer)
layer = Dropout(0.01)(layer)
layer = Dense(units=10,
              activation='softmax')(layer)

model = Model(input_layer, layer)

model.summary()

model.compile('adam', 'categorical_crossentropy', ['accuracy'])

# Train model with backprop.
model.fit(x_train, y_train, batch_size=64, epochs=1, verbose=2,
          validation_data=(x_test, y_test))

# Store model so SNN Toolbox can find it.
model_name = 'mnist_cnn'
keras.models.save_model(model, os.path.join(path_wd, model_name + '.h5'))

# SNN TOOLBOX CONFIGURATION #
#############################

reset_mode = 'soft'

# Create a config file with experimental setup for SNN Toolbox.
Example #7
0
    image_input = Input(shape=(2048, ))
    im1 = Dropout(0.5)(image_input)
    im2 = Dense(256, activation='relu')(im1)

    text_input = Input(shape=(MAX_LEN, ))
    sent1 = Embedding(vocab_size, EMBEDDING_DIM,
                      input_length=MAX_LEN)(text_input)
    sent3 = Bidirectional(LSTM(128, return_sequences=False))(sent1)

    decoder1 = Add()([im2, sent3])
    pred = Dense(vocab_size, activation='softmax')(decoder1)

    model = Model(inputs=[image_input, text_input], outputs=pred)
    model.compile(loss='categorical_crossentropy',
                  optimizer="Adam",
                  metrics=['accuracy'])

    model.summary()

    callbacks = [
        EarlyStopping(patience=10, verbose=1),
        ReduceLROnPlateau(factor=0.1, patience=3, min_lr=0.00001, verbose=1),
        ModelCheckpoint(os.path.join(
            os.path.join(model_workspace_dir, 'weights_best.hdf5')),
                        verbose=1,
                        save_best_only=False),
        CSVLogger(os.path.join(model_workspace_dir, 'training.csv')),
        PerformanceMetrics(os.path.join(model_workspace_dir,
                                        'performance.csv')),
    ]
Example #8
0
inp = Input(shape=(image_size, image_size, 3))
x = base_model(inp)
x = Flatten()(x)

# OUTPUT FUNNELLING TO SWAG_model
# “binary_crossentropy” as loss function and “sigmoid” as the final layer activation
output11 = Dense(1, activation='sigmoid')(x)

# SWAG_MODEL
SWAG_model = Model(inp, [output11])

# STOCHASTIC GRADIENT DESCENT
sgd = SGD(lr=learn_rate, momentum=.9, nesterov=False)

SWAG_model.compile(optimizer=sgd,
                   loss="binary_crossentropy",
                   metrics=["accuracy"])

STEP_SIZE_TRAIN_SWAG = SWAG_dftrain_generator.n // SWAG_dfvalid_generator.batch_size
STEP_SIZE_VALID_SWAG = SWAG_dfvalid_generator.n // SWAG_dfvalid_generator.batch_size
STEP_SIZE_TEST_SWAG = test_generator.n // test_generator.batch_size

SWAG_history = SWAG_model.fit(generator_wrapper(SWAG_dftrain_generator, 0, 1),
                              steps_per_epoch=STEP_SIZE_TRAIN_SWAG,
                              validation_data=generator_wrapper(
                                  SWAG_dfvalid_generator, 0, 1),
                              validation_steps=STEP_SIZE_VALID_SWAG,
                              epochs=num_epochs,
                              verbose=2)

test_generator.reset()
Example #9
0
    for _ in range(adjs):
        input_a = Input((*input_shape, 1))
        x = Conv2D(8, 3)(input_a)
        x = MaxPool2D(2)(x)
        x = Flatten()(x)
        x = Model(inputs=[input_a], outputs=x)
        cnns.append(x)

    combine = Concatenate()([x.output for x in cnns])
    reshape = Reshape((len(cnns), cnns[0].output_shape[1]))(combine)
    lstm = LSTM(32)(reshape)
    z = Dense(classes, activation='softmax')(lstm)

    model = Model(inputs=[x.input for x in cnns], outputs=z)
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.fit(x_train,
              y_train_cat,
              batch_size=32,
              epochs=50,
              shuffle=True,
              validation_split=0.1)
    y_pred = model.predict(x_test).argmax(axis=-1)
    acc = accuracy_score(y_test, y_pred)
    print("Fold accuracy: {:2.2f}".format(acc))
    matrix = confusion_matrix(y_test, y_pred)
    print("Fold confusion matrix: \n {}".format(matrix))
    total_acc += acc / n_splits

print("Total model accuracy: {:2.2f}".format(total_acc))
Example #10
0
class MyModel:

    def __init__(self,
                 vocab_size,
                 greedy=False,
                 beam_width=10,
                 top_paths=1,
                 stop_tolerance=20,
                 reduce_tolerance=15):

        self.input_size = config.target_image_size
        self.vocab_size = vocab_size

        self.model = None
        self.greedy = greedy
        self.beam_width = beam_width
        self.top_paths = max(1, top_paths)

        self.stop_tolerance = stop_tolerance
        self.reduce_tolerance = reduce_tolerance

    def summary(self, output=None, target=None):

        self.model.summary()

        if target is not None:
            os.makedirs(output, exist_ok=True)

            with open(os.path.join(output, target), "w") as f:
                with redirect_stdout(f):
                    self.model.summary()

    def load_checkpoint(self, target):
        if os.path.isfile(target):
            if self.model is None:
                self.compile()

            self.model.load_weights(target)

    def get_callbacks(self, logdir, checkpoint, monitor="val_loss", verbose=0):

        callbacks = [
            CSVLogger(
                filename=os.path.join(logdir, "epochs.log"),
                separator=";",
                append=True),
            TensorBoard(
                log_dir=logdir,
                histogram_freq=10,
                profile_batch=0,
                write_graph=True,
                write_images=False,
                update_freq="epoch"),
            ModelCheckpoint(
                filepath=checkpoint,
                monitor=monitor,
                save_best_only=True,
                save_weights_only=True,
                verbose=verbose),
            EarlyStopping(
                monitor=monitor,
                min_delta=1e-8,
                patience=self.stop_tolerance,
                restore_best_weights=True,
                verbose=verbose),
            ReduceLROnPlateau(
                monitor=monitor,
                min_delta=1e-8,
                factor=0.2,
                patience=self.reduce_tolerance,
                verbose=verbose)
        ]

        return callbacks

    def compile(self, learning_rate=None, initial_step=0):

        # define inputs, outputs and optimizer of the chosen architecture
        inputs, outputs = self.architecture(self.input_size, self.vocab_size + 1)

        if learning_rate is None:
            learning_rate = CustomSchedule(d_model=self.vocab_size + 1, initial_step=initial_step)
            self.learning_schedule = True
        else:
            self.learning_schedule = False

        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

        # create and compile
        self.model = Model(inputs=inputs, outputs=outputs)
        self.model.compile(optimizer=optimizer, loss=lambda y1,y2: tf.py_function(self.ctc_loss_lambda_func, [y1,y2], [tf.float32]))

    def fit(self,
            x=None,
            y=None,
            batch_size=None,
            epochs=1,
            verbose=1,
            callbacks=None,
            validation_split=0.0,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None,
            validation_freq=1,
            max_queue_size=10,
            workers=1,
            use_multiprocessing=False,
            **kwargs):

        # remove ReduceLROnPlateau (if exist) when use schedule learning rate
        if callbacks and self.learning_schedule:
            callbacks = [x for x in callbacks if not isinstance(x, ReduceLROnPlateau)]

        if os.path.isfile(config.json_file):
            with open(config.json_file, "r") as f:
                initial_params = json.load(f)
                initial_epoch = initial_params["epoch"]+1

        out = self.model.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, verbose=verbose,
                             callbacks=callbacks, validation_split=validation_split,
                             validation_data=validation_data, shuffle=shuffle,
                             class_weight=class_weight, sample_weight=sample_weight,
                             initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch,
                             validation_steps=validation_steps, validation_freq=validation_freq,
                             max_queue_size=max_queue_size, workers=workers,
                             use_multiprocessing=use_multiprocessing, **kwargs)
        return out

    def predict(self,
                x,
                batch_size=None,
                verbose=0,
                steps=1,
                callbacks=None,
                max_queue_size=10,
                workers=1,
                use_multiprocessing=False,
                ctc_decode=True, 
                ensemble=False):

        if verbose == 1:
            print("Model Predict")

        if ensemble:
            outs = []

            for weights in config.ensemble_checkpoint_weights:
                self.load_checkpoint(weights)

                outs.append(self.model.predict(x=x, batch_size=batch_size, verbose=verbose, steps=steps,
                callbacks=callbacks, max_queue_size=max_queue_size,
                workers=workers, use_multiprocessing=use_multiprocessing))


            out = np.sum(outs, axis=0)/len(outs)

        else:
            out = self.model.predict(x=x, batch_size=batch_size, verbose=verbose, steps=steps,
            callbacks=callbacks, max_queue_size=max_queue_size,
            workers=workers, use_multiprocessing=use_multiprocessing)

        if not ctc_decode:
            return np.log(out.clip(min=1e-8)), []

        steps_done = 0
        if verbose == 1:
            print("CTC Decode")
            progbar = tf.keras.utils.Progbar(target=steps)

        batch_size = int(np.ceil(len(out) / steps))
        input_length = len(max(out, key=len))

        predicts, probabilities = [], []

        while steps_done < steps:
            index = steps_done * batch_size
            until = index + batch_size

            x_test = np.asarray(out[index:until])
            x_test_len = np.asarray([input_length for _ in range(len(x_test))])

            decode, log = self.ctc_decode(x_test,
            	x_test_len,
            	greedy=self.greedy,
            	beam_width=self.beam_width,
            	top_paths=self.top_paths)

            if not self.greedy:
            	probabilities.extend([np.exp(x) for x in log])
            else:
            	probabilities.extend([np.exp(-x) for x in log])
            decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode]
            predicts.extend(np.swapaxes(decode, 0, 1))

            steps_done += 1
            if verbose == 1:
                progbar.update(steps_done)
                
        return (predicts, probabilities)

    def ctc_decode(self, y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
    	input_shape = y_pred.shape
    	num_samples, num_steps = input_shape[0], input_shape[1]
    	y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + K.epsilon())
    	input_length = tf.cast(input_length, tf.int32)

    	if greedy:
    		(decoded, log_prob) = tf.nn.ctc_greedy_decoder(
				inputs=y_pred, sequence_length=input_length)
    	else:
    		(decoded, log_prob) = tf.nn.ctc_beam_search_decoder(
				inputs=y_pred,
				sequence_length=input_length,
				beam_width=beam_width,
				top_paths=top_paths)
    	decoded_dense = []
    	for st in decoded:
    		# st = tf.sparse.SparseTensor(
    		# 	st.indices, st.values, (num_samples, num_steps))
    		decoded_dense.append(
    			tf.sparse.to_dense(sp_input=st, default_value=-1))
    	return (decoded_dense, log_prob)


    @staticmethod
    def ctc_loss_lambda_func(y_true, y_pred):

        if len(y_true.shape) > 2:
            y_true = tf.squeeze(y_true)

        # y_pred.shape = (batch_size, string_length, alphabet_size_1_hot_encoded)
        # output of every model is softmax
        # so sum across alphabet_size_1_hot_encoded give 1
        #               string_length give string length
        input_length = tf.math.reduce_sum(y_pred, axis=-1, keepdims=False)
        input_length = tf.math.reduce_sum(input_length, axis=-1, keepdims=True)

        # y_true strings are padded with 0
        # so sum of non-zero gives number of characters in this string
        label_length = tf.math.count_nonzero(y_true, axis=-1, keepdims=True, dtype="int64")

        loss = K.ctc_batch_cost(y_true, y_pred, input_length, label_length)
        # average loss across all entries in the batch
        loss = tf.reduce_mean(loss)

        return loss

    def architecture(self, input_size, d_model):
    
        input_data = Input(name="input", shape=input_size)
        
        cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 2), padding="same", kernel_initializer="he_uniform")(input_data)
        cnn = PReLU(shared_axes=[1, 2])(cnn)
        cnn = BatchNormalization(renorm=True)(cnn)
        cnn = FullGatedConv2D(filters=16, kernel_size=(3, 3), padding="same")(cnn)
    
        cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn)
        cnn = PReLU(shared_axes=[1, 2])(cnn)
        cnn = BatchNormalization(renorm=True)(cnn)
        cnn = FullGatedConv2D(filters=32, kernel_size=(3, 3), padding="same")(cnn)
    
        cnn = Conv2D(filters=40, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn)
        cnn = PReLU(shared_axes=[1, 2])(cnn)
        cnn = BatchNormalization(renorm=True)(cnn)
        cnn = FullGatedConv2D(filters=40, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn)
        cnn = Dropout(rate=0.2)(cnn)
    
        cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn)
        cnn = PReLU(shared_axes=[1, 2])(cnn)
        cnn = BatchNormalization(renorm=True)(cnn)
        cnn = FullGatedConv2D(filters=48, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn)
        cnn = Dropout(rate=0.2)(cnn)
    
        cnn = Conv2D(filters=56, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn)
        cnn = PReLU(shared_axes=[1, 2])(cnn)
        cnn = BatchNormalization(renorm=True)(cnn)
        cnn = FullGatedConv2D(filters=56, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn)
        cnn = Dropout(rate=0.2)(cnn)
    
        cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn)
        cnn = PReLU(shared_axes=[1, 2])(cnn)
        cnn = BatchNormalization(renorm=True)(cnn)
    
        shape = cnn.get_shape()
        bgru = Reshape((shape[1], shape[2] * shape[3]))(cnn)
    
        bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru)
        bgru = Dense(units=256)(bgru)
    
        bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru)
        output_data = Dense(units=d_model, activation="softmax")(bgru)
    
        return (input_data, output_data)
Example #11
0
initial_model = VGG16(weights="imagenet",
                      include_top=False,
                      input_shape=(DIMENSION, DIMENSION, 3))
last = initial_model.output

x = Flatten()(last)
x = Dense(NODE, activation='relu')(x)
x = Dropout(0.4)(x)
x = Dense(NODE, activation='relu')(x)
x = Dropout(0.4)(x)
preds = Dense(1, activation='sigmoid')(x)
model = Model(initial_model.input, preds)

#7. compile
model.compile(optimizer=Adam(learning_rate=1e-5),
              loss='binary_crossentropy',
              metrics=['acc'])

#8. fit
es = EarlyStopping(monitor='val_loss', patience=10)
re = ReduceLROnPlateau(monitor='val_loss', patience=5)
hist = model.fit_generator(datagen.flow(X_train, y_train, batch_size=32),
                           steps_per_epoch=AUG_DATASET_LEN / 32,
                           epochs=100,
                           callbacks=[es, re],
                           validation_data=datagen2.flow(X_val,
                                                         y_val,
                                                         batch_size=32))
# validation_split=0.2
# ValueError: `validation_split` is only supported for Tensors or NumPy arrays
Example #12
0
# load weights if necessary
model.load_weights('582-4731.7905.hdf5')

# decay learning rate using cosine annealing
lr_decay_schedule = CosineDecayRestarts(initial_learning_rate=1e-4,
                                        first_decay_steps=2000)

current_step = 0
lr = lambda: lr_decay_schedule(current_step)

# construct AdamW optimizer
adamw_optimizer = AdamW(learning_rate=lr, weight_decay=1e-4)

# compile model
model.compile(adamw_optimizer, loss="mean_absolute_error", metrics=['acc'])

#############################################################
# Run training/testing
#############################################################

# save weights when loss improves
checkpoint = ModelCheckpoint('{epoch:04d}-{val_loss:.4f}.hdf5',
                             monitor='val_loss',
                             verbose=0,
                             save_best_only=True,
                             save_weights_only=True)

# callback that increments the global step value
increment_step_callback = IncrementStepCallback()
Example #13
0
def layer_test(layer_cls,
               kwargs={},
               input_shape=None,
               input_dtype=None,
               input_data=None,
               expected_output=None,
               expected_output_dtype=None,
               fixed_batch_size=False):
    """Test routine for a layer with a single input tensor
    and single output tensor.

    Copy of the function in keras-team/keras because it's not in the public API.
    If we use the one from keras-team/keras it won't work with tf.keras.
    """
    # generate input data
    if input_data is None:
        assert input_shape
        if not input_dtype:
            input_dtype = K.floatx()
        input_data_shape = list(input_shape)
        for i, e in enumerate(input_data_shape):
            if e is None:
                input_data_shape[i] = np.random.randint(1, 4)
        input_data = (10 * np.random.random(input_data_shape))
        input_data = input_data.astype(input_dtype)
    else:
        if input_shape is None:
            input_shape = input_data.shape
        if input_dtype is None:
            input_dtype = input_data.dtype
    if expected_output_dtype is None:
        expected_output_dtype = input_dtype

    # instantiation
    layer = layer_cls(**kwargs)

    # test get_weights , set_weights at layer level
    weights = layer.get_weights()
    layer.set_weights(weights)

    expected_output_shape = layer.compute_output_shape(input_shape)

    # test in functional API
    if fixed_batch_size:
        x = Input(batch_shape=input_shape, dtype=input_dtype)
    else:
        x = Input(shape=input_shape[1:], dtype=input_dtype)
    y = layer(x)
    assert K.dtype(y) == expected_output_dtype

    # check with the functional API
    model = Model(x, y)

    actual_output = model.predict(input_data)
    actual_output_shape = actual_output.shape
    for expected_dim, actual_dim in zip(expected_output_shape,
                                        actual_output_shape):
        if expected_dim is not None:
            assert expected_dim == actual_dim

    if expected_output is not None:
        assert_allclose(actual_output, expected_output, rtol=1e-3)

    # test serialization, weight setting at model level
    model_config = model.get_config()
    custom_objects = {layer.__class__.__name__: layer.__class__}
    recovered_model = model.__class__.from_config(model_config, custom_objects)
    if model.weights:
        weights = model.get_weights()
        recovered_model.set_weights(weights)
        _output = recovered_model.predict(input_data)
        assert_allclose(_output, actual_output, rtol=1e-3)

    # test training mode (e.g. useful when the layer has a
    # different behavior at training and testing time).
    if has_arg(layer.call, 'training'):
        model.compile('rmsprop', 'mse')
        model.train_on_batch(input_data, actual_output)

    # test instantiation from layer config
    layer_config = layer.get_config()
    layer_config['batch_input_shape'] = input_shape
    layer = layer.__class__.from_config(layer_config)

    # for further checks in the caller function
    return actual_output
Example #14
0
        kernel_regularizer=regularizers.l2(0.001),
        recurrent_regularizer=regularizers.l2(0.001),
        dropout=0.4)(x)
x = BatchNormalization()(x)
x = GRU(256, 
        return_sequences=True, 
        stateful=False, 
        kernel_regularizer=regularizers.l2(0.001),
        recurrent_regularizer=regularizers.l2(0.001),
        dropout=0.4)(x)
x = BatchNormalization()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.4)(x)
train_out = Dense(3, activation='softmax')(x)
training_model = Model(inputs=train_in, outputs=train_out)
training_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'], sample_weight_mode = 'temporal')
training_model.summary()
plot_model(streaming_model, to_file='streaming_model.png')
# training
history = training_model.fit(train_data, 
                             train_label, 
                             batch_size=16, 
                             epochs=30, 
                             validation_data=(val_data, val_label), 
                             sample_weight=weight_matrix                             
                             )

training_model.save_weights('weights_0_30_silence_balance.hd5', overwrite=True)

# define streaming model
streaming_in_shape = train_data.shape[2:]
Example #15
0
# --- Create logits
logits = {}
#logits['lbl'] = layers.AveragePooling3D(pool_size=(1, bf.shape[2], bf.shape[3]), padding='same', name='lbl')(bf)
logits['lbl'] = layers.Conv3D(filters=1,
                              kernel_size=(1, 1, 1),
                              activation='sigmoid',
                              name='lbl')(f0)

# --- Create model
model = Model(inputs=inputs, outputs=logits)

# --- Compile model
model.compile(
    optimizer=optimizers.Adam(learning_rate=5e-5),
    #loss=losses.Huber(delta=0.042),
    loss=losses.MeanAbsoluteError(),
    metrics=['mse', 'mae', 'mape'],
    experimental_run_tf_function=False)

# --- Load data into memory for faster training
client.load_data_in_memory()

# --- TensorBoard
#tensor_board = TensorBoard(log_dir='./graph', histogram_freq=0, write_graph=True, write_images=True)

# --- Learning rate scheduler
lr_scheduler = callbacks.LearningRateScheduler(lambda epoch, lr: lr * 0.996)

# --- csv Callback
num = '0091'
path = '/home/treuters/breast-density/dense-net/experiments/exp-' + num + '/'
Example #16
0
class Traintf:
    def __init__(self, df, split_rat=0.1, cross_valid=10, test_df=None,
                 batch_size=32, epochs=80):
        self.df = df
        self.ratio = split_rat
        if split_rat is None:
            # print("No split selected")
            assert test_df is not None
            self.traindf = self.df
            self.testdf = test_df
            print("Length of the test set ", len(self.traindf), len(test_df))
        else:
            self.testdf, self.traindf = self.__test_train_split()

        if cross_valid is not None:
            self.cross = True
        else:
            self.cross = False
        self.cvalid = cross_valid
        self.X = None
        self.Y = None
        self.trainmodel = None
        self.pred = None
        self.batch_size = batch_size
        self.epochs = epochs
        self.i = None
        self.model = None

    def get_labels(self, test_train=True):
        labels = ['nFix', 'FFD', 'GPT', 'TRT', 'fixProp']
        drop_labels = ['sentence_id', 'word',
                       'Unnamed: 0', 'otag', 'utag',
                       'crossreftime', 'GPT-FFD', 'TRT-GPT', 'pps',
                       'phonem']

        if test_train:
            df = self.traindf
        else:
            df = self.testdf
        df = df[df.columns[~df.columns.isin(drop_labels)]]
        self.Y = df[labels]
        self.X = df[df.columns[~df.columns.isin(labels)]]

    def lr_rate(self, epoch, lr):
        if epoch > 30:
            lr = lr * tf.math.exp(-0.01)
        else:
            lr = 1e-5
        return lr

    def norma(self, x):
        return (x - x.min()) / (x.max() - x.min()), x.min().to_numpy(), \
               x.max().to_numpy()

    def train(self, fields=None, load_previous=False,
              old=None, crnt=0):
        assert fields is not None
        if isinstance(fields, str):
            fields = [fields]
        # val = {}
        self.get_labels(test_train=False)
        X_test, Y_test = self.X, self.Y
        self.get_labels()
        print("Test size", X_test.shape)
        print("training size ", self.X.shape)
        for field in fields:
            print("Traing the NN for field - ", field)
            callback = tf.keras.callbacks.LearningRateScheduler(self.lr_rate,
                                                                verbose=False)
            if not load_previous:
                print("==============Building models=============")
                input = tf.keras.Input(shape=(self.X.to_numpy().shape[-1]),
                                       name='embed')
                x = layers.Dense(1024, activation='relu')(input)
                x = layers.Dropout(rate=0.2, seed=10)(x)
                x = layers.Dense(512, activation='relu')(x)
                x = layers.Dense(512, activation='relu')(x)
                x = layers.Dropout(rate=0.2, seed=10)(x)
                x = layers.Dense(256, activation='relu')(x)
                x = layers.Dense(256, activation='relu')(x)
                nfix = layers.Dropout(rate=0.2)(x)
                nfix = layers.Dense(64, activation='relu', name='nfix0')(nfix)
                # nfix = (layers.Dense(64, activation='relu', name='nfix0',
                #                      kernel_regularizer='l2')(x))
                nfix = layers.Dense(16, activation='relu', name='nfix2')(nfix)
                nfix = layers.Dense(1, activation='relu', name='NFIX')(nfix)
                self.model = Model(input, [nfix])
                self.model.compile(optimizer='adam',
                                   loss='mae'
                                   )
            else:
                print("==============Loading models=============")
                self.model = load_model("temp_model_" + field)
            print(self.model.summary(),
                  self.X.columns.to_list()[:20])
            keras.backend.set_value(self.model.optimizer.learning_rate, 1e-5)
            es = EarlyStopping(monitor='val_loss', mode='min', verbose=False,
                               patience=30)
            mc = ModelCheckpoint('fm_' + str(crnt) + "_" + field,
                                 monitor='val_loss',
                                 mode='min', verbose=0, save_best_only=True)
            self.model.fit(self.X.to_numpy(),
                           self.Y[field].to_numpy(),
                           epochs=self.epochs,
                           batch_size=self.batch_size,
                           verbose=True,
                           # validation_split=0.2,
                           validation_data=(X_test.to_numpy(),
                                            Y_test[field].to_numpy()),
                           callbacks=[callback, mc, es],
                           use_multiprocessing=True)
            self.model.save("temp_model_" + field)

    def test(self, fields=None):
        assert fields is not None
        self.get_labels(test_train=False)
        val = {}
        print(self.X.shape, self.X.columns.to_list())
        x = pd.DataFrame()
        for idx, field in enumerate(t):
            print("Test data size ", len(self.X))
            model = load_model("fm_0_" + field)
            v = model.predict(self.X.to_numpy())
            # print(v)
            x[field] = np.ravel(v)
            print("Mae for {}".format(field),
                  mean_absolute_error(v, np.ravel(self.Y)[idx::5]))
            # original_val = tr.Y['nFix'].to_list()
            # pred_val = np.ravel(tr.pred)[::5]
        # print("Metrics is ", val)
        return x

    def __test_train_split(self):
        tr, te = train_test_split(self.df, train_size=self.ratio)
        return tr, te

    def model_process(self, fields):
        if not self.cross:
            self.cvalid = 1
        final_val = pd.DataFrame()
        for i in range(self.cvalid):
            print("+++++++++++++++++++++++++++++++++++++++++++++++++++")
            print("Processing cross validation iteration {}".format(i))
            print("+++++++++++++++++++++++++++++++++++++++++++++++++++")
            # if i == 0:
            #     self.train(fields, load_previous=False, old=None, crnt=i)
            # else:
            #     self.train(fields, load_previous=True, old=i-1, crnt=i)
            fv = self.test(fields)
            final_val = pd.concat([final_val, fv], axis=1)
        # print(final_val, "Total val: ", pd.DataFrame(final_val).mean(axis=1))
        # print("Inference val: ", final_val)
        return final_val
Example #17
0
class RelevanceModel:
    def __init__(
        self,
        feature_config: FeatureConfig,
        tfrecord_type: str,
        file_io: FileIO,
        scorer: Optional[ScorerBase] = None,
        metrics: List[Union[Type[kmetrics.Metric], str]] = [],
        optimizer: Optional[Optimizer] = None,
        model_file: Optional[str] = None,
        initialize_layers_dict: dict = {},
        freeze_layers_list: list = [],
        compile_keras_model: bool = False,
        output_name: str = "score",
        logger=None,
    ):
        """
        Constructor to instantiate a RelevanceModel that can be used for
        training and evaluating the search ML task

        Parameters
        ----------
        feature_config : `FeatureConfig` object
            FeatureConfig object that defines the features to be loaded in the dataset
            and the preprocessing functions to be applied to each of them
        tfrecord_type : {"example", "sequence_example"}
            Type of the TFRecord protobuf message used for TFRecordDataset
        file_io : `FileIO` object
            file I/O handler objects for reading and writing data
        scorer : `ScorerBase` object
            Scorer object that wraps an InteractionModel and converts
            input features into scores
        metrics : list
            List of keras Metric classes that will be used for evaluating the trained model
        optimizer : `Optimizer`
            Tensorflow keras optimizer to be used for training the model
        model_file : str, optional
            Path to pretrained model file to be loaded for evaluation or retraining
        initialize_layers_dict : dict, optional
            Dictionary of tensorflow layer names mapped to the path of pretrained weights
            Use this for transfer learning with pretrained weights
        freeze_layers_list : list, optional
            List of model layer names to be frozen
            Use this for freezing pretrained weights from other ml4ir models
        compile_keras_model : bool, optional
            Whether the keras model loaded from disk should be compiled
            with loss, metrics and an optimizer
        output_name : str, optional
            Name of the output tensorflow node that captures the score
        logger : `Logger`, optional
            logging handler for status messages
        """
        self.feature_config: FeatureConfig = feature_config
        self.logger: Logger = logger
        self.output_name = output_name
        self.scorer = scorer
        self.tfrecord_type = tfrecord_type
        self.file_io = file_io

        if scorer:
            self.max_sequence_size = scorer.interaction_model.max_sequence_size
        else:
            self.max_sequence_size = 0

        # Load/Build Model
        if model_file and not compile_keras_model:
            """
            If a model file is specified, load it without compiling into a keras model

            NOTE:
            This will allow the model to be only used for inference and
            cannot be used for retraining.
            """
            self.model: Model = self.load(model_file)
            self.is_compiled = False
        else:

            """
            Specify inputs to the model

            Individual input nodes are defined for each feature
            Each data point represents features for all records in a single query
            """
            inputs: Dict[str, Input] = feature_config.define_inputs()
            scores, train_features, metadata_features = scorer(inputs)

            # Create model with functional Keras API
            self.model = Model(inputs=inputs, outputs={self.output_name: scores})
            self.model.output_names = [self.output_name]

            # Get loss fn
            loss_fn = scorer.loss.get_loss_fn(**metadata_features)

            # Get metric objects
            metrics_impl: List[Union[str, kmetrics.Metric]] = get_metrics_impl(
                metrics=metrics, feature_config=feature_config, metadata_features=metadata_features
            )

            # Compile model
            """
            NOTE:
            Related Github issue: https://github.com/tensorflow/probability/issues/519
            """
            self.model.compile(
                optimizer=optimizer,
                loss=loss_fn,
                metrics=metrics_impl,
                experimental_run_tf_function=False,
            )

            # Write model summary to logs
            model_summary = list()
            self.model.summary(print_fn=lambda x: model_summary.append(x))
            if self.logger:
                self.logger.info("\n".join(model_summary))

            if model_file:
                """
                If model file is specified, load the weights from the SavedModel

                NOTE:
                The architecture, loss and metrics of self.model need to
                be the same as the loaded SavedModel
                """
                self.load_weights(model_file)

            # Initialize layer weights
            for layer_name, layer_file in initialize_layers_dict.items():
                layer = self.model.get_layer(layer_name)
                layer.set_weights(self.file_io.load_numpy_array(layer_file, unzip=True))
                self.logger.info("Setting {} weights from {}".format(layer_name, layer_file))

            # Freeze layer weights
            for layer_name in freeze_layers_list:
                layer = self.model.get_layer(layer_name)
                layer.trainable = False
                self.logger.info("Freezing {} layer".format(layer_name))

            self.is_compiled = True

    @classmethod
    def from_relevance_scorer(
        cls,
        feature_config: FeatureConfig,
        interaction_model: InteractionModel,
        model_config: dict,
        loss: RelevanceLossBase,
        metrics: List[Union[kmetrics.Metric, str]],
        optimizer: Optimizer,
        tfrecord_type: str,
        file_io: FileIO,
        model_file: Optional[str] = None,
        initialize_layers_dict: dict = {},
        freeze_layers_list: list = [],
        compile_keras_model: bool = False,
        output_name: str = "score",
        logger=None,
    ):
        """
        Create a RelevanceModel with default Scorer function
        constructed from an InteractionModel

        Parameters
        ----------
        feature_config : `FeatureConfig` object
            FeatureConfig object that defines the features to be loaded in the dataset
            and the preprocessing functions to be applied to each of them
        tfrecord_type : {"example", "sequence_example"}
            Type of the TFRecord protobuf message used for TFRecordDataset
        file_io : `FileIO` object
            file I/O handler objects for reading and writing data
        interaction_model : `InteractionModel` object
            InteractionModel object that converts input features into a
            dense feature representation
        loss : `RelevanceLossBase` object
            Loss object defining the final activation layer and the loss function
        metrics : list
            List of keras Metric classes that will be used for evaluating the trained model
        optimizer : `Optimizer`
            Tensorflow keras optimizer to be used for training the model
        model_file : str, optional
            Path to pretrained model file to be loaded for evaluation or retraining
        initialize_layers_dict : dict, optional
            Dictionary of tensorflow layer names mapped to the path of pretrained weights
            Use this for transfer learning with pretrained weights
        freeze_layers_list : list, optional
            List of model layer names to be frozen
            Use this for freezing pretrained weights from other ml4ir models
        compile_keras_model : bool, optional
            Whether the keras model loaded from disk should be compiled
            with loss, metrics and an optimizer
        output_name : str, optional
            Name of the output tensorflow node that captures the score
        logger : `Logger`, optional
            logging handler for status messages

        Returns
        -------
        RelevanceModel
            RelevanceModel object with a default scorer build with a custom
            InteractionModel
        """
        assert isinstance(interaction_model, InteractionModel)
        assert isinstance(loss, RelevanceLossBase)

        scorer: ScorerBase = RelevanceScorer(
            model_config=model_config,
            interaction_model=interaction_model,
            loss=loss,
            output_name=output_name,
        )

        return cls(
            scorer=scorer,
            feature_config=feature_config,
            metrics=metrics,
            optimizer=optimizer,
            tfrecord_type=tfrecord_type,
            model_file=model_file,
            initialize_layers_dict=initialize_layers_dict,
            freeze_layers_list=freeze_layers_list,
            compile_keras_model=compile_keras_model,
            output_name=output_name,
            file_io=file_io,
            logger=logger,
        )

    @classmethod
    def from_univariate_interaction_model(
        cls,
        model_config,
        feature_config: FeatureConfig,
        tfrecord_type: str,
        loss: RelevanceLossBase,
        metrics: List[Union[kmetrics.Metric, str]],
        optimizer: Optimizer,
        feature_layer_keys_to_fns: dict = {},
        model_file: Optional[str] = None,
        initialize_layers_dict: dict = {},
        freeze_layers_list: list = [],
        compile_keras_model: bool = False,
        output_name: str = "score",
        max_sequence_size: int = 0,
        file_io: FileIO = None,
        logger=None,
    ):
        """
        Create a RelevanceModel with default UnivariateInteractionModel

        Parameters
        ----------
        feature_config : `FeatureConfig` object
            FeatureConfig object that defines the features to be loaded in the dataset
            and the preprocessing functions to be applied to each of them
        model_config : dict
            dictionary defining the dense model architecture
        tfrecord_type : {"example", "sequence_example"}
            Type of the TFRecord protobuf message used for TFRecordDataset
        file_io : `FileIO` object
            file I/O handler objects for reading and writing data
        loss : `RelevanceLossBase` object
            Loss object defining the final activation layer and the loss function
        metrics : list
            List of keras Metric classes that will be used for evaluating the trained model
        optimizer : `Optimizer`
            Tensorflow keras optimizer to be used for training the model
        feature_layer_keys_to_fns : dict
            Dictionary of custom feature transformation functions to be applied
            on the input features as part of the InteractionModel
        model_file : str, optional
            Path to pretrained model file to be loaded for evaluation or retraining
        initialize_layers_dict : dict, optional
            Dictionary of tensorflow layer names mapped to the path of pretrained weights
            Use this for transfer learning with pretrained weights
        freeze_layers_list : list, optional
            List of model layer names to be frozen
            Use this for freezing pretrained weights from other ml4ir models
        compile_keras_model : bool, optional
            Whether the keras model loaded from disk should be compiled
            with loss, metrics and an optimizer
        output_name : str, optional
            Name of the output tensorflow node that captures the score
        max_sequence_size : int, optional
            Maximum length of the sequence to be used for SequenceExample protobuf objects
        logger : `Logger`, optional
            logging handler for status messages

        Returns
        -------
        RelevanceModel
            RelevanceModel object with a UnivariateInteractionModel
        """

        interaction_model: InteractionModel = UnivariateInteractionModel(
            feature_config=feature_config,
            feature_layer_keys_to_fns=feature_layer_keys_to_fns,
            tfrecord_type=tfrecord_type,
            max_sequence_size=max_sequence_size,
        )

        return cls.from_relevance_scorer(
            interaction_model=interaction_model,
            model_config=model_config,
            feature_config=feature_config,
            loss=loss,
            metrics=metrics,
            optimizer=optimizer,
            tfrecord_type=tfrecord_type,
            model_file=model_file,
            initialize_layers_dict=initialize_layers_dict,
            freeze_layers_list=freeze_layers_list,
            compile_keras_model=compile_keras_model,
            output_name=output_name,
            file_io=file_io,
            logger=logger,
        )

    def fit(
        self,
        dataset: RelevanceDataset,
        num_epochs: int,
        models_dir: str,
        logs_dir: Optional[str] = None,
        logging_frequency: int = 25,
        monitor_metric: str = "",
        monitor_mode: str = "",
        patience=2,
    ):
        """
        Trains model for defined number of epochs
        and returns the training and validation metrics as a dictionary

        Parameters
        ----------
        dataset : `RelevanceDataset` object
            RelevanceDataset object to be used for training and validation
        num_epochs : int
            Value specifying number of epochs to train for
        models_dir : str
            Directory to save model checkpoints
        logs_dir : str, optional
            Directory to save model logs
            If set to False, no progress logs will be written
        logging_frequency : int, optional
            Every #batches to log results
        monitor_metric : str, optional
            Name of the metric to monitor for early stopping, checkpointing
        monitor_mode : {"max", "min"}
            Whether to maximize or minimize the monitoring metric
        patience : int
            Number of epochs to wait before early stopping

        Returns
        -------
        train_metrics : dict
            Train and validation metrics in a single dictionary
            where key is metric name and value is floating point metric value.
            This dictionary will be used for experiment tracking for each ml4ir run
        """
        if not monitor_metric.startswith("val_"):
            monitor_metric = "val_{}".format(monitor_metric)
        callbacks_list: list = self._build_callback_hooks(
            models_dir=models_dir,
            logs_dir=logs_dir,
            is_training=True,
            logging_frequency=logging_frequency,
            monitor_mode=monitor_mode,
            monitor_metric=monitor_metric,
            patience=patience,
        )

        if self.is_compiled:
            history = self.model.fit(
                x=dataset.train,
                validation_data=dataset.validation,
                epochs=num_epochs,
                verbose=True,
                callbacks=callbacks_list,
            )

            # Write metrics for experiment tracking
            # Returns a dictionary
            train_metrics = dict()
            for metric, value in history.history.items():
                if not metric.startswith("val_"):
                    """
                    NOTE:
                    Prepend "train_" to metrics on training dataset
                    to differentiate from validation and test metrics
                    in the final experiment results
                    """
                    # History is a dict of key: list(values per epoch)
                    # We are capturing the metrics of the last epoch (-1)
                    train_metrics["train_{}".format(metric)] = value[-1]
                else:
                    train_metrics[metric] = value[-1]

            return train_metrics
        else:
            raise NotImplementedError(
                "The model could not be trained. "
                "Check if the model was compiled correctly."
                " Training loaded SavedModel is not currently supported."
            )

    def predict(
        self,
        test_dataset: data.TFRecordDataset,
        inference_signature: str = "serving_default",
        additional_features: dict = {},
        logs_dir: Optional[str] = None,
        logging_frequency: int = 25,
    ):
        """
        Predict the scores on the test dataset using the trained model

        Parameters
        ----------
        test_dataset : `Dataset` object
            `Dataset` object for which predictions are to be made
        inference_signature : str, optional
            If using a SavedModel for prediction, specify the inference signature to be used for computing scores
        additional_features : dict, optional
            Dictionary containing new feature name and function definition to
            compute them. Use this to compute additional features from the scores.
            For example, converting ranking scores for each document into ranks for
            the query
        logs_dir : str, optional
            Path to directory to save logs
        logging_frequency : int
            Value representing how often(in batches) to log status

        Returns
        -------
        `pd.DataFrame`
            pandas DataFrame containing the predictions on the test dataset
            made with the `RelevanceModel`
        """
        if logs_dir:
            outfile = os.path.join(logs_dir, RelevanceModelConstants.MODEL_PREDICTIONS_CSV_FILE)
            # Delete file if it exists
            self.file_io.rm_file(outfile)

        _predict_fn = get_predict_fn(
            model=self.model,
            tfrecord_type=self.tfrecord_type,
            feature_config=self.feature_config,
            inference_signature=inference_signature,
            is_compiled=self.is_compiled,
            output_name=self.output_name,
            features_to_return=self.feature_config.get_features_to_log(),
            additional_features=additional_features,
            max_sequence_size=self.max_sequence_size,
        )

        predictions_df_list = list()
        batch_count = 0
        for predictions_dict in test_dataset.map(_predict_fn).take(-1):
            predictions_df = pd.DataFrame(predictions_dict)
            if logs_dir:
                if os.path.isfile(outfile):
                    predictions_df.to_csv(outfile, mode="a", header=False, index=False)
                else:
                    # If writing first time, write headers to CSV file
                    predictions_df.to_csv(outfile, mode="w", header=True, index=False)
            else:
                predictions_df_list.append(predictions_df)

            batch_count += 1
            if batch_count % logging_frequency == 0:
                self.logger.info("Finished predicting scores for {} batches".format(batch_count))

        predictions_df = None
        if logs_dir:
            self.logger.info("Model predictions written to -> {}".format(outfile))
        else:
            predictions_df = pd.concat(predictions_df_list)

        return predictions_df

    def evaluate(
        self,
        test_dataset: data.TFRecordDataset,
        inference_signature: str = None,
        additional_features: dict = {},
        group_metrics_min_queries: int = 50,
        logs_dir: Optional[str] = None,
        logging_frequency: int = 25,
        compute_intermediate_stats: bool = True,
    ):
        """
        Evaluate the RelevanceModel

        Parameters
        ----------
        test_dataset: an instance of tf.data.dataset
        inference_signature : str, optional
            If using a SavedModel for prediction, specify the inference signature to be used for computing scores
        additional_features : dict, optional
            Dictionary containing new feature name and function definition to
            compute them. Use this to compute additional features from the scores.
            For example, converting ranking scores for each document into ranks for
            the query
        group_metrics_min_queries : int, optional
            Minimum count threshold per group to be considered for computing
            groupwise metrics
        logs_dir : str, optional
            Path to directory to save logs
        logging_frequency : int
            Value representing how often(in batches) to log status
        compute_intermediate_stats : bool
            Determines if group metrics and other intermediate stats on the test set should be computed

        Returns
        -------
        df_overall_metrics : `pd.DataFrame` object
            `pd.DataFrame` containing overall metrics
        df_groupwise_metrics : `pd.DataFrame` object
            `pd.DataFrame` containing groupwise metrics if
            group_metric_keys are defined in the FeatureConfig
        metrics_dict : dict
            metrics as a dictionary of metric names mapping to values

        Notes
        -----
        You can directly do a `model.evaluate()` only if the keras model is compiled

        Override this method to implement your own evaluation metrics.
        """
        if self.is_compiled:
            metrics_dict = self.model.evaluate(test_dataset)
            return None, None, dict(zip(self.model.metrics_names, metrics_dict))
        else:
            raise NotImplementedError

    def save(
        self,
        models_dir: str,
        preprocessing_keys_to_fns={},
        postprocessing_fn=None,
        required_fields_only: bool = True,
        pad_sequence: bool = False,
    ):
        """
        Save the RelevanceModel as a tensorflow SavedModel to the `models_dir`

        There are two different serving signatures currently used to save the model:

        * `default`: default keras model without any pre/post processing wrapper

        * `tfrecord`: serving signature that allows keras model to be served using TFRecord proto messages.
                  Allows definition of custom pre/post processing logic

        Additionally, each model layer is also saved as a separate numpy zipped
        array to enable transfer learning with other ml4ir models.

        Parameters
        ----------
        models_dir : str
            path to directory to save the model
        preprocessing_keys_to_fns : dict
            dictionary mapping function names to tf.functions that should be
            saved in the preprocessing step of the tfrecord serving signature
        postprocessing_fn: function
            custom tensorflow compatible postprocessing function to be used at serving time.
            Saved as part of the postprocessing layer of the tfrecord serving signature
        required_fields_only: bool
            boolean value defining if only required fields
            need to be added to the tfrecord parsing function at serving time
        pad_sequence: bool, optional
            Value defining if sequences should be padded for SequenceExample proto inputs at serving time.
            Set this to False if you want to not handle padded scores.

        Notes
        -----
        All the functions passed under `preprocessing_keys_to_fns` here must be
        serializable tensor graph operations
        """

        model_file = os.path.join(models_dir, "final")

        # Save model with default signature
        self.model.save(filepath=os.path.join(model_file, "default"))

        """
        Save model with custom signatures

        Currently supported
        - signature to read TFRecord SequenceExample inputs
        """
        self.model.save(
            filepath=os.path.join(model_file, "tfrecord"),
            signatures=define_serving_signatures(
                model=self.model,
                tfrecord_type=self.tfrecord_type,
                feature_config=self.feature_config,
                preprocessing_keys_to_fns=preprocessing_keys_to_fns,
                postprocessing_fn=postprocessing_fn,
                required_fields_only=required_fields_only,
                pad_sequence=pad_sequence,
                max_sequence_size=self.max_sequence_size,
            ),
        )

        # Save individual layer weights
        self.file_io.make_directory(os.path.join(model_file, "layers"), clear_dir=True)
        for layer in self.model.layers:
            try:
                self.file_io.save_numpy_array(
                    np_array=layer.get_weights(),
                    file_path=os.path.join(model_file, "layers", "{}.npz".format(layer.name)),
                    zip=True,
                )
            except FileNotFoundError:
                self.logger.warning("Error saving layer: {} due to FileNotFoundError. Skipping...".format(layer.name))

        self.logger.info("Final model saved to : {}".format(model_file))

    def load(self, model_file: str) -> Model:
        """
        Loads model from the SavedModel file specified

        Parameters
        ----------
        model_file : str
            path to file with saved tf keras model

        Returns
        -------
        `tf.keras.Model`
            Tensorflow keras model loaded from file

        Notes
        -----
        Retraining currently not supported!
        Would require compiling the model with the right loss and optimizer states
        """
        """
        NOTE:
        There is currently a bug in Keras Model with saving/loading
        models with custom losses and metrics.

        Therefore, we are currently loading the SavedModel with compile=False
        The saved model signatures can be used for inference at serving time

        Ref:
        https://github.com/keras-team/keras/issues/5916
        https://github.com/tensorflow/tensorflow/issues/32348
        https://github.com/keras-team/keras/issues/3977

        """
        model = tf.keras.models.load_model(model_file, compile=False)

        self.logger.info("Successfully loaded SavedModel from {}".format(model_file))
        self.logger.warning("Retraining is not yet supported. Model is loaded with compile=False")

        return model

    def load_weights(self, model_file: str):
        """
        Load saved model with compile=False

        Parameters
        ----------
        model_file : str
            path to file with saved tf keras model
        """
        loaded_model = self.load(model_file)

        # Set weights of Keras model from the loaded model weights
        self.model.set_weights(loaded_model.get_weights())
        self.logger.info("Weights have been set from SavedModel. RankingModel can now be trained.")

    def _build_callback_hooks(
        self,
        models_dir: str,
        logs_dir: Optional[str] = None,
        is_training=True,
        logging_frequency=25,
        monitor_metric: str = "",
        monitor_mode: str = "",
        patience=2,
    ):
        """
        Build callback hooks for the training and evaluation loop

        Parameters
        ----------
        models_dir : str
            Path to directory to save model checkpoints
        logs_dir : str
            Path to directory to save tensorboard logs
        is_training : bool, optional
            Whether we are building callbacks for training or evaluation
        logging_frequency : int, optional
            How often, in number of epochs, to log training and evaluation progress
        monitor_metric : str, optional
            Name of metric to be used for ModelCheckpoint and EarlyStopping callbacks
        monitor_mode : {"max", "min"}, optional
            Mode for maximizing or minimizing the ModelCheckpoint and EarlyStopping
        patience : int, optional
            Number of epochs to wait before early stopping if metric change is below tolerance

        Returns
        -------
        callbacks_list : list
            List of callbacks to be used with the RelevanceModel training and evaluation
        """
        callbacks_list: list = list()

        if is_training:
            # Model checkpoint
            if models_dir and monitor_metric:
                checkpoints_path = os.path.join(
                    models_dir, RelevanceModelConstants.CHECKPOINT_FNAME
                )
                cp_callback = callbacks.ModelCheckpoint(
                    filepath=checkpoints_path,
                    save_weights_only=False,
                    verbose=1,
                    save_best_only=True,
                    mode=monitor_mode,
                    monitor=monitor_metric,
                )
                callbacks_list.append(cp_callback)

            # Early Stopping
            if monitor_metric:
                early_stopping_callback = callbacks.EarlyStopping(
                    monitor=monitor_metric,
                    mode=monitor_mode,
                    patience=patience,
                    verbose=1,
                    restore_best_weights=True,
                )
                callbacks_list.append(early_stopping_callback)

        # TensorBoard
        if logs_dir:
            tensorboard_callback = callbacks.TensorBoard(
                log_dir=logs_dir, histogram_freq=1, update_freq=5
            )
            callbacks_list.append(tensorboard_callback)

        # Debugging/Logging
        callbacks_list.append(DebuggingCallback(self.logger, logging_frequency))

        # Add more here

        return callbacks_list
Example #18
0
def define_unet(img_rows, img_cols, optimizer):
    ''' Defines U-net with img_rows*img_cols input.
        Output: Keras Model.'''

    inputs = Input(shape=(img_rows, img_cols, 3))
    conv1 = Convolution2D(32, (3, 3), activation='relu',
                          padding='same')(inputs)
    conv1 = Convolution2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Convolution2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Convolution2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Convolution2D(128, (3, 3), activation='relu',
                          padding='same')(pool2)
    conv3 = Convolution2D(128, (3, 3), activation='relu',
                          padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Convolution2D(256, (3, 3), activation='relu',
                          padding='same')(pool3)
    conv4 = Convolution2D(256, (3, 3), activation='relu',
                          padding='same')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Convolution2D(512, (3, 3), activation='relu',
                          padding='same')(pool4)
    conv5 = Convolution2D(512, (3, 3), activation='relu',
                          padding='same')(conv5)

    up6 = Concatenate()([
        Convolution2D(256, (2, 2), activation='relu',
                      padding='same')(UpSampling2D(size=(2, 2))(conv5)), conv4
    ])
    conv6 = Convolution2D(256, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Convolution2D(256, (3, 3), activation='relu',
                          padding='same')(conv6)

    up7 = Concatenate()([
        Convolution2D(128, (2, 2), activation='relu',
                      padding='same')(UpSampling2D(size=(2, 2))(conv6)), conv3
    ])
    conv7 = Convolution2D(128, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Convolution2D(128, (3, 3), activation='relu',
                          padding='same')(conv7)

    up8 = Concatenate()([
        Convolution2D(64, (2, 2), activation='relu',
                      padding='same')(UpSampling2D(size=(2, 2))(conv7)), conv2
    ])
    conv8 = Convolution2D(64, (3, 3), activation='relu', padding='same')(up8)
    conv8 = Convolution2D(64, (3, 3), activation='relu', padding='same')(conv8)

    up9 = Concatenate()([
        Convolution2D(32, (2, 2), activation='relu',
                      padding='same')(UpSampling2D(size=(2, 2))(conv8)), conv1
    ])
    conv9 = Convolution2D(32, (3, 3), activation='relu', padding='same')(up9)
    conv9 = Convolution2D(32, (3, 3), activation='relu', padding='same')(conv9)

    conv10 = Convolution2D(3, (1, 1), activation='sigmoid')(conv9)

    model = Model(inputs=inputs, outputs=conv10)

    model.compile(optimizer=optimizer, loss='mse', metrics=['mse'])

    return model
                                    if pretrain is not None else None,
                                    input_shape=(stack_length, 224, 224,
                                                 3 if mode == 'rgb' else 2))
    x = tf.keras.layers.Reshape((1024, ))(backbone.output)
    x = Dense(64, activation='relu', kernel_initializer='he_uniform')(x)
    x = Dropout(0.5)(x)
    x = Dense(32, activation='relu', kernel_initializer='he_uniform')(x)
    x = Dropout(0.5)(x)
    x = Dense(1, kernel_initializer='he_uniform', use_bias=False)(x)
    x = BiasLayer(y_nums)(x)
    output = Activation('sigmoid')(x)
    model = Model(backbone.input, output)
    model_checkpoint = ModelCheckpoint(str(
        models_path.joinpath('{epoch:02d}-{val_mae_od:.2f}.h5')),
                                       period=1)
    model.compile(loss='binary_crossentropy',
                  optimizer=tf.keras.optimizers.Adam(0.0001,
                                                     decay=1e-3 /
                                                     STEP_SIZE_TRAIN),
                  metrics=[mae_od])
    his = model.fit(train_dataset,
                    validation_data=val_dataset,
                    epochs=epochs,
                    callbacks=[model_checkpoint, wandbcb],
                    verbose=1)

# %% Save history to csv and images
history = his.history
save_history(history_path, history)
plot_history(history_path, history)
Example #20
0
    def build_model(self, config: dict):
        bs = config['board_size']
        ks = config['kernel_size']
        nf = config['n_filters']
        input_shape = (bs, bs, 1)

        # Input convolutional layer
        inputs = Input(shape=input_shape)
        x = layers.Conv2D(filters=nf,
                          kernel_size=ks,
                          padding='same',
                          input_shape=input_shape,
                          kernel_regularizer=l2(
                              config['weight_decay']))(inputs)
        x = layers.BatchNormalization(axis=1)(x)
        conv_outputs = layers.Activation('relu')(x)

        for i in range(config['n_middle_blocks']):
            conv_outputs = layers.Conv2D(
                filters=nf,
                kernel_size=ks,
                padding='same',
                input_shape=input_shape,
                kernel_regularizer=l2(config['weight_decay']))(conv_outputs)
            conv_outputs = layers.BatchNormalization(axis=1)(conv_outputs)
            conv_outputs = layers.Activation('relu')(conv_outputs)

        head_inputs = input_shape if config.get('head_inputs_fixed',
                                                False) else input_shape[1:]

        # Policy head
        x = layers.Conv2D(filters=nf,
                          kernel_size=ks,
                          input_shape=head_inputs,
                          kernel_regularizer=l2(
                              config['weight_decay']))(conv_outputs)
        x = layers.BatchNormalization(axis=1)(x)
        x = layers.Activation('relu')(x)
        x = layers.Flatten()(x)
        x = layers.Dense(bs**2,
                         kernel_regularizer=l2(config['weight_decay']))(x)
        policy_outputs = layers.Activation('softmax')(x)

        # Value head
        x = layers.Conv2D(filters=1,
                          kernel_size=1,
                          input_shape=head_inputs,
                          kernel_regularizer=l2(
                              config['weight_decay']))(conv_outputs)
        x = layers.BatchNormalization(axis=1)(x)
        x = layers.Activation('relu')(x)
        x = layers.Flatten()(x)
        x = layers.Dense(config['value_head_dense_layer_size'],
                         kernel_regularizer=l2(config['weight_decay']))(x)
        x = layers.Activation('relu')(x)
        x = layers.Dense(1, kernel_regularizer=l2(config['weight_decay']))(x)
        value_output = layers.Activation('tanh')(x)

        model = Model(inputs, [policy_outputs, value_output])
        model.compile(loss=['categorical_crossentropy', 'mean_squared_error'],
                      optimizer=Adam(config['learning_rate']))
        return model
Example #21
0
x = resNet(inputs, training=True)
x = GlobalAveragePooling2D()(x)
# A Dense classifier with a single unit (binary classification)
outputs = Dense(num_classes, activation='softmax')(x)
model = Model(inputs, outputs)

model.summary()
# Horovod: adjust learning rate based on number of GPUs.
scaled_lr = 1. * hvd.size()
opt = tf.keras.optimizers.Adadelta(scaled_lr)

# Horovod: add Horovod Distributed Optimizer.
opt = hvd.DistributedOptimizer(opt)

model.compile(loss=tf.keras.losses.sparse_categorical_crossentropy,
              optimizer=opt,
              metrics=['accuracy'])

tensorboard = TensorBoard(log_dir='logs/{}'.format(time()))
callbacks = [
    # Horovod: broadcast initial variable states from rank 0 to all other processes.
    # This is necessary to ensure consistent initialization of all workers when
    # training is started with random weights or restored from a checkpoint.
    hvd.callbacks.BroadcastGlobalVariablesCallback(0),
    # Horovod: average metrics among workers at the end of every epoch.
    #
    # Note: This callback must be in the list before the ReduceLROnPlateau,
    # TensorBoard or other metrics-based callbacks.
    hvd.callbacks.MetricAverageCallback(),

    # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final
Example #22
0
class LanguageDEC:
    '''LanguageDEC Model consist of an encoder to extract features
    and a Deep Embedded Clustering layer (DECLayer) which accepts and
    assign the features into clusters according to a target distribution
    '''
    def __init__(self,
                 encoder=None,
                 dir_path=None,
                 languages=['en', 'de', 'cn', 'fr', 'ru'],
                 model_id='',
                 robust=False):
        self.model_id = model_id
        self.robust = robust

        # creating properties
        if dir_path == None:
            dir_path = os.path.dirname(os.path.realpath(__file__))
        self.dir_path = dir_path
        self.languages = languages
        self.n_lang = len(languages)
        self.encoder = encoder

        # creating model
        flattened = Flatten(name='flattened_encoder_output')(
            self.encoder.output)

        if robust:
            dec = MDECLayer(self.n_lang, name='clustering')
        else:
            dec = DECLayer(self.n_lang, name='clustering')

        prediction = dec(flattened)

        self.model = Model(inputs=self.encoder.input, outputs=prediction)

    def compile(self, optimizer='sgd', loss='kld'):
        self.model.compile(optimizer=optimizer, loss=loss, run_eagerly=True)
        self.model.summary()

    def extract_features(self, x):
        features = self.encoder.predict(x)
        features = features.reshape((features.shape[0], -1))
        return features

    def predict(self, x):  # give cluster prediction
        q = self.model.predict(x, verbose=0)
        return q.argmax(1)

    def calulate_target_distribution(self, q):
        weight = q**2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def write_training_log(self, key=None, value=''):
        '''
        Write to log file 
        '''
        dir_path = os.path.dirname(os.path.realpath(__file__))
        log_path = f'{dir_path}/logs/dec_logs_{self.model_id}.txt'
        if os.path.exists(log_path):
            open_mode = 'a'  # append if already exists
        else:
            open_mode = 'w'

        with open(log_path, open_mode) as text_file:
            if key == None:
                # write model summary
                self.model.summary(
                    print_fn=lambda x: text_file.write(x + '\n'))
            elif key == 'Distance':
                # write distance between cluster centers
                centroids = self.model.get_layer(
                    name='clustering').get_weights()[0]
                dists = euclidean_distances(centroids)
                print(f'Distances:\n{dists}', file=text_file)
            else:
                print(f'{key}: {value}', file=text_file)

    def initialize(self, training_data, training_label=[], robust=False):
        features = self.extract_features(training_data)

        if self.robust:
            mean_list = list()
            inv_cov_list = list()

            for i in range(len(self.languages)):
                lang_features = features[training_label == i, ]
                #is_inlier = isolation_forest_method(lang_features)

                # df = pd.DataFrame(lang_features)
                # outlier, _ = robust_mahalanobis_method(df)
                # is_inlier = np.ones(lang_features.shape[0], dtype=int)
                # is_inlier[outlier] = 0

                # before = int(lang_features.shape[0])
                # lang_features = lang_features[is_inlier == 1]
                # after = int(lang_features.shape[0])
                # n_removed = before - after
                # self.write_training_log(
                #     'Removed', f'{n_removed} outliers in {self.languages[i]}')

                df = pd.DataFrame(lang_features)
                mean, inv_cov = robust_mahalanobis_params(df)
                mean_list.append(mean)
                inv_cov_list.append(inv_cov)
            weights = [np.array(mean_list), np.array(inv_cov_list)]

        if not self.robust:
            cluster_centers = list()
            if len(training_label) == 0:
                # init using kmean
                kmeans = KMeans(n_clusters=self.n_lang)
                kmeans.fit_predict(features)
                cluster_centers = kmeans.cluster_centers_
            else:
                # init using outlier removal & averaging
                for i in range(len(self.languages)):
                    # remove outliers
                    lang_features = features[training_label == i, ]
                    is_inlier = isolation_forest_method(lang_features)

                    #df = pd.DataFrame(lang_features)
                    #outlier, _ = robust_mahalanobis_method(df)
                    #is_inlier = np.ones(lang_features.shape[0], dtype=int)
                    #is_inlier[outlier] = 0

                    before = int(lang_features.shape[0])
                    lang_features = lang_features[is_inlier == 1]
                    after = int(lang_features.shape[0])
                    n_removed = before - after
                    self.write_training_log('Removed', f'{n_removed} outliers')

                    lang_centroid = np.average(lang_features, axis=0)
                    cluster_centers.append(lang_centroid)
                weights = [np.array(cluster_centers)]

        self.model.get_layer(name='clustering').set_weights(weights)

    def fit(self,
            x,
            y,
            x_test=None,
            y_test=None,
            max_iteration=512,
            batch_size=128,
            update_interval=64,
            **kwargs):
        checkpoint_path = f'{self.dir_path}/model_checkpoints/dec_{self.model_id}'
        if not os.path.exists(checkpoint_path):
            os.makedirs(checkpoint_path)

        np.save(f'{checkpoint_path}/train_x.npy', x)
        np.save(f'{checkpoint_path}/train_y.npy', y)
        np.save(f'{checkpoint_path}/test_x.npy', x_test)
        np.save(f'{checkpoint_path}/test_y.npy', y_test)

        self.write_training_log()  # write the model summary
        index = 0
        best_acc = 0
        best_loss = float("inf")

        for ite in range(max_iteration):
            q = self.model.predict(x)
            p = self.calulate_target_distribution(q)

            # use idx to select batch from x & y
            # index_array = np.arange(x.shape[0])
            # from_index = index * batch_size
            # to_index = min((index+1) * batch_size, x.shape[0])
            # idx = index_array[from_index:to_index]
            # train_x = x[idx]
            # train_y = p[idx]

            # train all in 1 iteration
            train_x = x
            train_y = p

            loss = self.model.train_on_batch(x=train_x, y=train_y, **kwargs)

            # evaluate the clustering performance
            if ite % update_interval == 0:
                self.write_training_log(
                    '================================================ite', ite)
                self.write_training_log('Distance')
                self.write_training_log('loss: ', loss)

                self.write_training_log('Prediction on train set: ', )
                q = self.model.predict(x)
                y_pred = q.argmax(1)
                Metrics.evaluate(y,
                                 y_pred,
                                 languages=self.languages,
                                 model_id=self.model_id)

                self.write_training_log('Prediction on test set: ', )
                q = self.model.predict(x_test)
                y_pred_test = q.argmax(1)
                test_acc, pred_classes = Metrics.evaluate(
                    y_test,
                    y_pred_test,
                    languages=self.languages,
                    model_id=self.model_id)

                if test_acc > best_acc and np.unique(
                        pred_classes).size == self.n_lang:
                    best_acc = test_acc
                    self.encoder.save(
                        f'{checkpoint_path}/trained_encoder_ite{ite}.h5')

                    centroids = self.model.get_layer(
                        name='clustering').get_weights()
                    if self.robust:
                        # robust model has two weights
                        # first save the inversed variance matrix
                        np.save(f'{checkpoint_path}/VI_ite{ite}.npy',
                                centroids[1])
                        centroids = centroids[0]
                    np.save(f'{checkpoint_path}/centroids_ite{ite}.npy',
                            centroids)

            # update index
            index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0

        tf.compat.v1.keras.experimental.export_saved_model(
            self.model, f'{self.dir_path}/models/dec_{self.model_id}')
Example #23
0
    def _build_model(self, x, y):
        """Construct the model using feature and label statistics.
        
        Args:
            - x: temporal feature
            - y: labels
            
        Returns:
            - model: predictor model
        """
        # Parameters
        dim = len(x[0, 0, :])
        seq_len = len(x[0, :, 0])
        dim_y = len(y.shape)
        dilations = [2**(i) for i in range(int(np.log2(seq_len / 4)))]
        # Small hidden dimensions are better
        if self.h_dim > 30:
            self.h_dim = int(self.h_dim / 5)

        # Optimizer
        self.adam = tf.keras.optimizers.Adam(learning_rate=self.learning_rate,
                                             beta_1=0.9,
                                             beta_2=0.999,
                                             amsgrad=False)
        # Input
        inputs = Input(shape=(
            seq_len,
            dim,
        ))
        # First layer
        tcn_out = TCN(nb_filters=self.h_dim,
                      dilations=dilations,
                      return_sequences=True)(inputs)

        # Multi-layer
        for _ in range(self.n_layer - 2):
            tcn_out = TCN(nb_filters=self.h_dim,
                          dilations=dilations,
                          return_sequences=True)(tcn_out)

        # For classification
        if self.task == "classification":
            # For online prediction
            if dim_y == 3:
                tcn_out = TCN(nb_filters=self.h_dim,
                              dilations=dilations,
                              return_sequences=True)(tcn_out)
                output = TimeDistributed(
                    Dense(y.shape[-1], activation="sigmoid",
                          name="output"))(tcn_out)
            # For one-shot prediction
            elif dim_y == 2:
                tcn_out = TCN(nb_filters=self.h_dim,
                              dilations=dilations,
                              return_sequences=False)(tcn_out)
                output = Dense(y.shape[-1],
                               activation="sigmoid",
                               name="output")(tcn_out)
            # Model define and compile
            tcn_model = Model(inputs=[inputs], outputs=[output])
            tcn_model.compile(loss=binary_cross_entropy_loss,
                              optimizer=self.adam)
        # For regression
        elif self.task == "regression":
            # For online prediction
            if dim_y == 3:
                tcn_out = TCN(nb_filters=self.h_dim,
                              dilations=dilations,
                              return_sequences=True)(tcn_out)
                output = TimeDistributed(
                    Dense(y.shape[-1], activation="linear",
                          name="output"))(tcn_out)
            # For one-shot prediction
            elif dim_y == 2:
                tcn_out = TCN(nb_filters=self.h_dim,
                              dilations=dilations,
                              return_sequences=False)(tcn_out)
                output = Dense(y.shape[-1], activation="linear",
                               name="output")(tcn_out)
            # Model define and compile
            tcn_model = Model(inputs=[inputs], outputs=[output])
            tcn_model.compile(loss=mse_loss,
                              optimizer=self.adam,
                              metrics=["mse"])

        return tcn_model
Example #24
0
def train_networks(training_percentage, filename, experiment):

    stages = constants.training_stages

    (data, labels) = get_data(experiment, one_hot=True)

    total = len(data)
    step = total / stages

    # Amount of training data, from which a percentage is used for
    # validation.
    training_size = int(total * training_percentage)

    n = 0
    histories = []
    for k in range(stages):
        i = k * step
        j = int(i + training_size) % total
        i = int(i)

        if j > i:
            training_data = data[i:j]
            training_labels = labels[i:j]
            testing_data = np.concatenate((data[0:i], data[j:total]), axis=0)
            testing_labels = np.concatenate((labels[0:i], labels[j:total]),
                                            axis=0)
        else:
            training_data = np.concatenate((data[i:total], data[0:j]), axis=0)
            training_labels = np.concatenate((labels[i:total], labels[0:j]),
                                             axis=0)
            testing_data = data[j:i]
            testing_labels = labels[j:i]

        training_data, training_labels = expand_data(training_data,
                                                     training_labels)
        truly_training = int(training_size * truly_training_percentage)

        validation_data = training_data[truly_training:]
        validation_labels = training_labels[truly_training:]
        training_data = training_data[:truly_training]
        training_labels = training_labels[:truly_training]

        input_img = Input(shape=(img_columns, img_rows, img_colors))
        encoded = get_encoder(input_img)
        classified = get_classifier(encoded)
        decoded = get_decoder(encoded)

        model = Model(inputs=input_img, outputs=[classified, decoded])
        model.compile(loss=['categorical_crossentropy', 'binary_crossentropy'],
                      optimizer='adam',
                      metrics='accuracy')
        model.summary()

        history = model.fit(training_data, (training_labels, training_data),
                            batch_size=batch_size,
                            epochs=epochs,
                            validation_data=(validation_data, {
                                'classification': validation_labels,
                                'autoencoder': validation_data
                            }),
                            callbacks=[EarlyStoppingAtLossCrossing(patience)],
                            verbose=2)

        histories.append(history)
        history = model.evaluate(testing_data, (testing_labels, testing_data),
                                 return_dict=True)
        histories.append(history)

        model.save(constants.model_filename(filename, n))
        n += 1

    return histories
Example #25
0
class DeconvNet:
    def __init__(self, use_cpu=False, print_summary=False):
        self.maybe_download_and_extract()
        self.build(use_cpu=use_cpu, print_summary=print_summary)

    def maybe_download_and_extract(self):
        """Download and unpack VOC data if data folder only contains the .gitignore file"""
        if os.listdir('data') == ['.gitignore']:
            filenames = [
                'VOC_OBJECT.tar.gz', 'VOC2012_SEG_AUG.tar.gz',
                'stage_1_train_imgset.tar.gz', 'stage_2_train_imgset.tar.gz'
            ]
            url = 'http://cvlab.postech.ac.kr/research/deconvnet/data/'

            for filename in filenames:
                wget.download(url + filename,
                              out=os.path.join('data', filename))

                tar = tarfile.open(os.path.join('data', filename))
                tar.extractall(path='data')
                tar.close()

                os.remove(os.path.join('data', filename))

    def predict(self, image):
        return self.model.predict(np.array([image]))

    def save(self, file_path='model.h5'):
        print(self.model.to_json())
        self.model.save_weights(file_path)

    def load(self, file_path='model.h5'):
        self.model.load_weights(file_path)

    def random_crop_or_pad(self, image, truth, size=(224, 224)):
        assert image.shape[:2] == truth.shape[:2]

        if image.shape[0] > size[0]:
            crop_random_y = random.randint(0, image.shape[0] - size[0])
            image = image[crop_random_y:crop_random_y + size[0], :, :]
            truth = truth[crop_random_y:crop_random_y + size[0], :]
        else:
            zeros = np.zeros((size[0], image.shape[1], image.shape[2]),
                             dtype=np.float32)
            zeros[:image.shape[0], :image.shape[1], :] = image
            image = np.copy(zeros)
            zeros = np.zeros((size[0], truth.shape[1]), dtype=np.float32)
            zeros[:truth.shape[0], :truth.shape[1]] = truth
            truth = np.copy(zeros)

        if image.shape[1] > size[1]:
            crop_random_x = random.randint(0, image.shape[1] - size[1])
            image = image[:, crop_random_x:crop_random_x + 224, :]
            truth = truth[:, crop_random_x:crop_random_x + 224]
        else:
            zeros = np.zeros((image.shape[0], size[1], image.shape[2]))
            zeros[:image.shape[0], :image.shape[1], :] = image
            image = np.copy(zeros)
            zeros = np.zeros((truth.shape[0], size[1]))
            zeros[:truth.shape[0], :truth.shape[1]] = truth
            truth = np.copy(zeros)

        return image, truth

    #(0=background, 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle, 6=bus, 7=car , 8=cat, 9=chair,
    # 10=cow, 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person, 16=potted plant,
    # 17=sheep, 18=sofa, 19=train, 20=tv/monitor, 255=no_label)

    def max_pool_with_argmax(self, x):
        return tf.nn.max_pool_with_argmax(x,
                                          ksize=[1, 2, 2, 1],
                                          strides=[1, 2, 2, 1],
                                          padding='SAME')

    def BatchGenerator(self,
                       train_stage=1,
                       batch_size=8,
                       image_size=(224, 224, 3),
                       labels=21):
        if train_stage == 1:
            trainset = open('data/stage_1_train_imgset/train.txt').readlines()
        else:
            trainset = open('data/stage_2_train_imgset/train.txt').readlines()

        while True:
            images = np.zeros(
                (batch_size, image_size[0], image_size[1], image_size[2]))
            truths = np.zeros(
                (batch_size, image_size[0], image_size[1], labels))

            for i in range(batch_size):
                random_line = random.choice(trainset)
                image_file = random_line.split(' ')[0]
                truth_file = random_line.split(' ')[1]
                image = np.float32(cv2.imread('data' + image_file) / 255.0)

                truth_mask = cv2.imread('data' + truth_file[:-1],
                                        cv2.IMREAD_GRAYSCALE)
                truth_mask[truth_mask ==
                           255] = 0  # replace no_label with background
                images[i], truth = self.random_crop_or_pad(
                    image, truth_mask, image_size)
                truths[i] = (np.arange(labels) == truth[..., None] - 1).astype(
                    int)  # encode to one-hot-vector
            yield images, truths

    def train(self, steps_per_epoch=1000, epochs=10, batch_size=32):
        batch_generator = self.BatchGenerator(batch_size=batch_size)
        self.model.fit_generator(batch_generator,
                                 steps_per_epoch=steps_per_epoch,
                                 epochs=epochs)

    def buildConv2DBlock(self, block_input, filters, block, depth):
        for i in range(1, depth + 1):
            if i == 1:
                conv2d = Conv2D(filters,
                                3,
                                padding='same',
                                name='conv{}-{}'.format(block, i),
                                use_bias=False)(block_input)
            else:
                conv2d = Conv2D(filters,
                                3,
                                padding='same',
                                name='conv{}-{}'.format(block, i),
                                use_bias=False)(conv2d)

            conv2d = BatchNormalization(
                name='batchnorm{}-{}'.format(block, i))(conv2d)
            conv2d = Activation('relu', name='relu{}-{}'.format(block,
                                                                i))(conv2d)

        return conv2d

    def build(self, use_cpu=False, print_summary=False):
        vgg16 = VGG16(weights="imagenet",
                      include_top=False,
                      input_shape=(224, 224, 3))

        if use_cpu:
            device = '/cpu:0'
        else:
            device = '/gpu:0'

        with tf.device(device):
            inputs = Input(shape=(224, 224, 3))

            conv_block_1 = self.buildConv2DBlock(inputs, 64, 1, 2)
            pool1, pool1_argmax = Lambda(self.max_pool_with_argmax,
                                         name='pool1')(conv_block_1)

            conv_block_2 = self.buildConv2DBlock(pool1, 128, 2, 2)
            pool2, pool2_argmax = Lambda(self.max_pool_with_argmax,
                                         name='pool2')(conv_block_2)

            conv_block_3 = self.buildConv2DBlock(pool2, 256, 3, 3)
            pool3, pool3_argmax = Lambda(self.max_pool_with_argmax,
                                         name='pool3')(conv_block_3)

            conv_block_4 = self.buildConv2DBlock(pool3, 512, 4, 3)
            pool4, pool4_argmax = Lambda(self.max_pool_with_argmax,
                                         name='pool4')(conv_block_4)

            conv_block_5 = self.buildConv2DBlock(pool4, 512, 5, 3)
            pool5, pool5_argmax = Lambda(self.max_pool_with_argmax,
                                         name='pool5')(conv_block_5)

            fc6 = Conv2D(512, 7, use_bias=False, padding='valid',
                         name='fc6')(pool5)  #4096
            fc6 = BatchNormalization(name='batchnorm_fc6')(fc6)
            fc6 = Activation('relu', name='relu_fc6')(fc6)

            fc7 = Conv2D(512, 1, use_bias=False, padding='valid',
                         name='fc7')(fc6)  #4096
            fc7 = BatchNormalization(name='batchnorm_fc7')(fc7)
            fc7 = Activation('relu', name='relu_fc7')(fc7)

            x = Conv2DTranspose(512,
                                7,
                                use_bias=False,
                                padding='valid',
                                name='deconv-fc6')(fc7)
            x = BatchNormalization(name='batchnorm_deconv-fc6')(x)
            x = Activation('relu', name='relu_deconv-fc6')(x)
            x = MaxUnpoolWithArgmax(pool5_argmax, name='unpool5')(x)
            x.set_shape(conv_block_5.get_shape())

            x = Conv2DTranspose(512,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv5-1')(x)
            x = BatchNormalization(name='batchnorm_deconv5-1')(x)
            x = Activation('relu', name='relu_deconv5-1')(x)

            x = Conv2DTranspose(512,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv5-2')(x)
            x = BatchNormalization(name='batchnorm_deconv5-2')(x)
            x = Activation('relu', name='relu_deconv5-2')(x)

            x = Conv2DTranspose(512,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv5-3')(x)
            x = BatchNormalization(name='batchnorm_deconv5-3')(x)
            x = Activation('relu', name='relu_deconv5-3')(x)

            x = MaxUnpoolWithArgmax(pool4_argmax, name='unpool4')(x)
            x.set_shape(conv_block_4.get_shape())

            x = Conv2DTranspose(512,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv4-1')(x)
            x = BatchNormalization(name='batchnorm_deconv4-1')(x)
            x = Activation('relu', name='relu_deconv4-1')(x)

            x = Conv2DTranspose(512,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv4-2')(x)
            x = BatchNormalization(name='batchnorm_deconv4-2')(x)
            x = Activation('relu', name='relu_deconv4-2')(x)

            x = Conv2DTranspose(256,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv4-3')(x)
            x = BatchNormalization(name='batchnorm_deconv4-3')(x)
            x = Activation('relu', name='relu_deconv4-3')(x)

            x = MaxUnpoolWithArgmax(pool3_argmax, name='unpool3')(x)
            x.set_shape(conv_block_3.get_shape())

            x = Conv2DTranspose(256,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv3-1')(x)
            x = BatchNormalization(name='batchnorm_deconv3-1')(x)
            x = Activation('relu', name='relu_deconv3-1')(x)

            x = Conv2DTranspose(256,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv3-2')(x)
            x = BatchNormalization(name='batchnorm_deconv3-2')(x)
            x = Activation('relu', name='relu_deconv3-2')(x)

            x = Conv2DTranspose(128,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv3-3')(x)
            x = BatchNormalization(name='batchnorm_deconv3-3')(x)
            x = Activation('relu', name='relu_deconv3-3')(x)

            x = MaxUnpoolWithArgmax(pool2_argmax, name='unpool2')(x)
            x.set_shape(conv_block_2.get_shape())

            x = Conv2DTranspose(128,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv2-1')(x)
            x = BatchNormalization(name='batchnorm_deconv2-1')(x)
            x = Activation('relu', name='relu_deconv2-1')(x)

            x = Conv2DTranspose(64,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv2-2')(x)
            x = BatchNormalization(name='batchnorm_deconv2-2')(x)
            x = Activation('relu', name='relu_deconv2-2')(x)

            x = MaxUnpoolWithArgmax(pool1_argmax, name='unpool1')(x)
            x.set_shape(conv_block_1.get_shape())

            x = Conv2DTranspose(64,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv1-1')(x)
            x = BatchNormalization(name='batchnorm_deconv1-1')(x)
            x = Activation('relu', name='relu_deconv1-1')(x)

            x = Conv2DTranspose(64,
                                3,
                                use_bias=False,
                                padding='same',
                                name='deconv1-2')(x)
            x = BatchNormalization(name='batchnorm_deconv1-2')(x)
            x = Activation('relu', name='relu_deconv1-2')(x)

            output = Conv2DTranspose(21,
                                     1,
                                     activation='softmax',
                                     padding='same',
                                     name='output')(x)

            self.model = Model(inputs=inputs, outputs=output)
            vgg16 = VGG16(weights="imagenet",
                          include_top=False,
                          input_shape=(224, 224, 3))

            if print_summary:
                print(self.model.summary())

            for layer in self.model.layers:
                if layer.name.startswith('conv'):
                    block = layer.name[4:].split('-')[0]
                    depth = layer.name[4:].split('-')[1]
                    # apply vgg16 weights without bias
                    layer.set_weights([
                        vgg16.get_layer('block{}_conv{}'.format(
                            block, depth)).get_weights()[0]
                    ])

            self.model.compile(optimizer='adam',
                               loss='categorical_crossentropy',
                               metrics=['accuracy', 'mse'])
Example #26
0
def obtain_features(model_prefix,
                    features_prefix,
                    labels_prefix,
                    data_prefix,
                    training_percentage,
                    am_filling_percentage,
                    experiment,
                    occlusion=None,
                    bars_type=None):
    """ Generate features for images.
    
    Uses the previously trained neural networks for generating the features corresponding
    to the images. It may introduce occlusions.
    """
    (data, labels) = get_data(experiment, occlusion, bars_type)

    total = len(data)
    step = int(total / constants.training_stages)

    # Amount of data used for training the networks
    trdata = int(total * training_percentage)

    # Amount of data used for testing memories
    tedata = step

    n = 0
    histories = []
    for i in range(0, total, step):
        j = (i + tedata) % total

        if j > i:
            testing_data = data[i:j]
            testing_labels = labels[i:j]
            other_data = np.concatenate((data[0:i], data[j:total]), axis=0)
            other_labels = np.concatenate((labels[0:i], labels[j:total]),
                                          axis=0)
            training_data = other_data[:trdata]
            training_labels = other_labels[:trdata]
            filling_data = other_data[trdata:]
            filling_labels = other_labels[trdata:]
        else:
            testing_data = np.concatenate((data[0:j], data[i:total]), axis=0)
            testing_labels = np.concatenate((labels[0:j], labels[i:total]),
                                            axis=0)
            training_data = data[j:j + trdata]
            training_labels = labels[j:j + trdata]
            filling_data = data[j + trdata:i]
            filling_labels = labels[j + trdata:i]

        # Recreate the exact same model, including its weights and the optimizer
        model = tf.keras.models.load_model(
            constants.model_filename(model_prefix, n))

        # Drop the autoencoder and the last layers of the full connected neural network part.
        classifier = Model(model.input, model.output[0])
        no_hot = to_categorical(testing_labels)
        classifier.compile(optimizer='adam',
                           loss='categorical_crossentropy',
                           metrics='accuracy')
        history = classifier.evaluate(testing_data,
                                      no_hot,
                                      batch_size=batch_size,
                                      verbose=1,
                                      return_dict=True)
        print(history)
        histories.append(history)
        model = Model(classifier.input, classifier.layers[-4].output)
        model.summary()

        training_features = model.predict(training_data)
        if len(filling_data) > 0:
            filling_features = model.predict(filling_data)
        else:
            r, c = training_features.shape
            filling_features = np.zeros((0, c))
        testing_features = model.predict(testing_data)

        dict = {
            constants.training_suffix:
            (training_data, training_features, training_labels),
            constants.filling_suffix:
            (filling_data, filling_features, filling_labels),
            constants.testing_suffix:
            (testing_data, testing_features, testing_labels)
        }

        for suffix in dict:
            data_fn = constants.data_filename(data_prefix + suffix, n)
            features_fn = constants.data_filename(features_prefix + suffix, n)
            labels_fn = constants.data_filename(labels_prefix + suffix, n)

            d, f, l = dict[suffix]
            np.save(data_fn, d)
            np.save(features_fn, f)
            np.save(labels_fn, l)

        n += 1

    return histories
Example #27
0
x = layers.Dropout(0.2)(x)
x = layers.Dense(1000, activation='softmax')(x)

model = Model(pre_trained_model.input, x)

model.summary()

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
es = EarlyStopping(patience=15)
lr = ReduceLROnPlateau(patience=7, factor=0.6)
mc = ModelCheckpoint('../data/lotte/mc/lotte_v3_3.h5',
                     save_best_only=True,
                     verbose=1)

model.compile(optimizer=RMSprop(lr=1e-6),
              loss='categorical_crossentropy',
              metrics=['acc'])

history = model.fit(x_train,
                    y_train,
                    callbacks=[es, lr],
                    epochs=500,
                    validation_split=0.2)

loss, acc = model.evaluate(x_test, y_test, batch_size=16)
print('loss, acc : ', loss, acc)

result = model.predict(x_pred, verbose=True)

import pandas as pd
submission = pd.read_csv('../lotte/sample.csv')
Example #28
0
                noise1 = layers.GaussianNoise(test_dict[1])(conv1, training=True)
                pool1 = layers.MaxPooling2D(pool_size=(2, 2))(noise1)

                conv2 = layers.Conv2D(64, kernel_size=(3,3), activation='relu')(pool1)
                noise2 = layers.GaussianNoise(test_dict[2])(conv2, training=True)
                pool2 = layers.MaxPooling2D(pool_size=(2, 2))(noise2)

                conv3 = layers.Conv2D(64, kernel_size=(3,3), activation='relu')(pool2)
                noise1 = layers.GaussianNoise(test_dict[3])(conv3, training=True)
                flat = layers.Flatten()(noise1)
                hidden1 = layers.Dense(64, activation='relu')(flat)
                output = layers.Dense(10)(hidden1)

                model1 = Model(inputs=visible, outputs=output)
                model1.compile(optimizer='adam',
                          loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                          metrics=['accuracy'])
                model1.set_weights(weights1)
                test_loss, test_acc = model1.evaluate(test_images,  test_labels, verbose=0)
                fintest_trial.append(test_acc)
                parameters[name]=fintest_trial
      #testing clear model
      name=str(m)+'clearmodelLayer'+str(l)
      fintest_trial=[]
      for n in range(0, 2):
                print('n', n)
                del(model1)
                tf.compat.v1.reset_default_graph()
                if n==0:
                    test_dict={1: 0, 2: 0, 3: 0}
                else:
Example #29
0
class VanilllaGAN(gan.Model):
    def __init__(self, model_parameters):
        super().__init__(model_parameters)

    def define_gan(self):
        self.generator = Generator(self.batch_size).\
            build_model(input_shape=(self.noise_dim,), dim=self.layers_dim, data_dim=self.data_dim)

        self.discriminator = Discriminator(self.batch_size).\
            build_model(input_shape=(self.data_dim,), dim=self.layers_dim)

        optimizer = Adam(self.lr, beta_1=self.beta_1, beta_2=self.beta_2)

        # Build and compile the discriminator
        self.discriminator.compile(loss='binary_crossentropy',
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        # The generator takes noise as input and generates imgs
        z = Input(shape=(self.noise_dim, ))
        record = self.generator(z)

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The discriminator takes generated images as input and determines validity
        validity = self.discriminator(record)

        # The combined model  (stacked generator and discriminator)
        # Trains the generator to fool the discriminator
        self._model = Model(z, validity)
        self._model.compile(loss='binary_crossentropy', optimizer=optimizer)

    def get_data_batch(self, train, batch_size, seed=0):
        # # random sampling - some samples will have excessively low or high sampling, but easy to implement
        # np.random.seed(seed)
        # x = train.loc[ np.random.choice(train.index, batch_size) ].values
        # iterate through shuffled indices, so every sample gets covered evenly

        start_i = (batch_size * seed) % len(train)
        stop_i = start_i + batch_size
        shuffle_seed = (batch_size * seed) // len(train)
        np.random.seed(shuffle_seed)
        train_ix = np.random.choice(
            list(train.index), replace=False,
            size=len(train))  # wasteful to shuffle every time
        train_ix = list(train_ix) + list(
            train_ix)  # duplicate to cover ranges past the end of the set
        x = train.loc[train_ix[start_i:stop_i]].values
        return np.reshape(x, (batch_size, -1))

    def train(self, data, train_arguments):
        [cache_prefix, epochs, sample_interval] = train_arguments

        # Adversarial ground truths
        valid = np.ones((self.batch_size, 1))
        fake = np.zeros((self.batch_size, 1))

        for epoch in range(epochs):
            # ---------------------
            #  Train Discriminator
            # ---------------------
            batch_data = self.get_data_batch(data, self.batch_size)
            noise = tf.random.normal((self.batch_size, self.noise_dim))

            # Generate a batch of events
            gen_data = self.generator(noise, training=True)

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(batch_data, valid)
            d_loss_fake = self.discriminator.train_on_batch(gen_data, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------
            noise = tf.random.normal((self.batch_size, self.noise_dim))
            # Train the generator (to have the discriminator label samples as valid)
            g_loss = self._model.train_on_batch(noise, valid)

            # Plot the progress
            print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" %
                  (epoch, d_loss[0], 100 * d_loss[1], g_loss))

            # If at save interval => save generated events
            if epoch % sample_interval == 0:
                #Test here data generation step
                # save model checkpoints
                if path.exists('./cache') is False:
                    os.mkdir('./cache')
                model_checkpoint_base_name = './cache/' + cache_prefix + '_{}_model_weights_step_{}.h5'
                self.generator.save_weights(
                    model_checkpoint_base_name.format('generator', epoch))
                self.discriminator.save_weights(
                    model_checkpoint_base_name.format('discriminator', epoch))

                #Here is generating the data
                z = tf.random.normal((432, self.noise_dim))
                gen_data = self.generator(z)
                print('generated_data')
Example #30
0
def main():
    numpy.random.seed(7)

    # data. definition of the problem.
    seq_length = 20
    x_train, y_train = task_add_two_numbers_after_delimiter(20_000, seq_length)
    x_val, y_val = task_add_two_numbers_after_delimiter(4_000, seq_length)

    # just arbitrary values. it's for visual purposes. easy to see than random values.
    test_index_1 = 4
    test_index_2 = 9
    x_test, _ = task_add_two_numbers_after_delimiter(10, seq_length, 0,
                                                     test_index_1,
                                                     test_index_2)
    # x_test_mask is just a mask that, if applied to x_test, would still contain the information to solve the problem.
    # we expect the attention map to look like this mask.
    x_test_mask = np.zeros_like(x_test[..., 0])
    x_test_mask[:, test_index_1:test_index_1 + 1] = 1
    x_test_mask[:, test_index_2:test_index_2 + 1] = 1

    # model
    i = Input(shape=(seq_length, 1))
    x = LSTM(100, return_sequences=True)(i)
    x = attention_3d_block(x)
    x = Dropout(0.2)(x)
    x = Dense(1, activation='linear')(x)

    model = Model(inputs=[i], outputs=[x])
    model.compile(loss='mse', optimizer='adam')
    print(model.summary())

    output_dir = 'task_add_two_numbers'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    max_epoch = int(sys.argv[1]) if len(sys.argv) > 1 else 200

    class VisualiseAttentionMap(Callback):
        def on_epoch_end(self, epoch, logs=None):
            attention_map = get_activations(
                model, x_test,
                layer_name='attention_weight')['attention_weight']

            # top is attention map.
            # bottom is ground truth.
            plt.imshow(np.concatenate([attention_map, x_test_mask]),
                       cmap='hot')

            iteration_no = str(epoch).zfill(3)
            plt.axis('off')
            plt.title(f'Iteration {iteration_no} / {max_epoch}')
            plt.savefig(f'{output_dir}/epoch_{iteration_no}.png')
            plt.close()
            plt.clf()

    model.fit(x_train,
              y_train,
              validation_data=(x_val, y_val),
              epochs=max_epoch,
              batch_size=64,
              callbacks=[VisualiseAttentionMap()])