def create_model(verbose=True): """Function which creates a good example model using transfer learning""" img_width, img_height = 250, 250 if K.image_data_format() == 'channels_first': input_shape = (3, img_width, img_height) else: input_shape = (img_width, img_height, 3) # maybe pop some layers with model.layers.pop() base_model = applications.VGG16(include_top=False, input_shape=input_shape) # base_model = applications.ResNet50(include_top=False, input_shape=input_shape) # freeze all layers except last two for layer in base_model.layers[:-2]: layer.trainable = False # model topology top_model = Sequential() top_model.add(Flatten(input_shape=base_model.output_shape[1:])) top_model.add(Dense(256)) top_model.add(Activation('relu')) top_model.add(Dropout(0.2)) top_model.add(Dense(120)) top_model.add(Activation('softmax')) model = Model(inputs=base_model.input, outputs=top_model(base_model.output)) if verbose: print(model.summary()) # functional method example 1 # x0 = Flatten()(top_model.output) # x1 = Dropout(0.2)(x0) # x2 = Dense(units=512, activation='relu')(x1) # x3 = Dropout(0.5)(x2) # x4 = Dense(units=120, activation='softmax')(x3) # functional method example 2 # x0 = Conv2D(128,(2,2),activation='relu', input_shape=top_model.output_shape[1:])\ # (top_model.output) # x1 = MaxPooling2D(pool_size=(2,2))(x0) # x2 = Flatten()(x1) # #x3 = Dropout(0.2)(x2) # x4 = Dense(units=128, activation='relu')(x2) # x5 = Dropout(0.5)(x4) # x6 = Dense(units=120, activation='softmax')(x5) # update to keras 2 api # model = Model(input=base_model.input, output=x4) # print(model.summary()) # create custom metric function top20_acc = functools.partial( keras.metrics.top_k_categorical_accuracy, k=20) top20_acc.__name__ = 'top20_acc' model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(lr=0.001), metrics=[metrics.categorical_accuracy, top20_acc]) return model
t = Dense(16, activation="tanh")(inputs) t = Dense(8, activation="selu")(t) t = Dense(8, activation="tanh")(t) t = Dense(8, activation="tanh")(t) t = Dense(8, activation="tanh")(t) out_1 = Dense(1, activation="selu")(t) t = Dense(16, activation="tanh")(inputs) t = Dense(8, activation="selu")(t) t = Dense(8, activation="tanh")(t) t = Dense(8, activation="tanh")(t) t = Dense(8, activation="tanh")(t) out_2 = Dense(1, activation="selu")(t) nn = Model(inputs=inputs, outputs=[out_1, out_2], name="ode_system") nn.compile(optimizer="adam", loss=custom_loss) t = np.random.choice(np.linspace(0, 2, 100), 20) t = np.expand_dims(t, axis=1) nn.fit(t, t, epochs=10000) t = np.linspace(0, 2, 100) nnx, nny = nn.predict(t) nnx = nnx.flatten() nny = nny.flatten() x_trial = -2.0 + t * nnx y_trial = t * nny plt.plot(t, x(t), color="black") plt.plot(t, y(t), color="black")
#%% model.summary() #%% # Compile the model #%% from tensorflow.keras.optimizers import RMSprop # compile the model model.compile(loss='binary_crossentropy', optimizer=RMSprop(lr=0.001), metrics=['acc']) #%% # Train the model #%% from tensorflow.keras.callbacks import ModelCheckpoint # checkpint settings model_checkpoint = ModelCheckpoint(folder_location + 'weights.hdf5', monitor='loss', verbose=1, save_best_only=True)
# Optional Attention Mechanisms if config == 1: encoder_output, attention_weights = SelfAttention( size=50, num_hops=16, use_penalization=False)(encoder_output) elif config == 2: encoder_output, attention_weights = Attention( context='many-to-one', alignment_type='global')(attention_input) encoder_output = Flatten()(encoder_output) elif config == 3: encoder_output, attention_weights = Attention( context='many-to-one', alignment_type='local-p*', window_width=25)(attention_input) encoder_output = Flatten()(encoder_output) # Prediction Layer Y = Dense(units=vocabulary_size, activation='softmax')(encoder_output) # Compile model model = Model(inputs=X, outputs=Y) model.compile(loss=loss, optimizer='adam', metrics=[perplexity, categorical_accuracy]) print(model.summary()) # Train multi-class classification model model.fit(x=X_train, y=Y_train, validation_data=(X_test, Y_test), epochs=num_epochs, batch_size=batch_size)
print('last layer output shape: ', last_layer.output_shape) last_output = last_layer.output # Flatten the output layer to 1 dimension x = layers.Flatten()(last_output) # Add a fully connected layer with 1,024 hidden units and ReLU activation x = layers.Dense(1024, activation='relu')(x) # Add a dropout rate of 0.2 x = layers.Dropout(0.2)(x) # Add a final sigmoid layer for classification x = layers.Dense(1, activation='sigmoid')(x) model = Model(pre_trained_model.input, x) model.compile(optimizer=RMSprop(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy']) history = model.fit(train_generator, validation_data=validation_generator, steps_per_epoch=100, epochs=20, validation_steps=50, verbose=2) ########################################### # Display Output # ########################################### import matplotlib.pyplot as plt acc = history.history['accuracy'] val_acc = history.history['val_accuracy'] loss = history.history['loss']
activation='relu')(layer) layer = AveragePooling2D()(layer) layer = Conv2D(filters=8, kernel_size=(3, 3), padding='same', activation='relu')(layer) layer = Flatten()(layer) layer = Dropout(0.01)(layer) layer = Dense(units=10, activation='softmax')(layer) model = Model(input_layer, layer) model.summary() model.compile('adam', 'categorical_crossentropy', ['accuracy']) # Train model with backprop. model.fit(x_train, y_train, batch_size=64, epochs=1, verbose=2, validation_data=(x_test, y_test)) # Store model so SNN Toolbox can find it. model_name = 'mnist_cnn' keras.models.save_model(model, os.path.join(path_wd, model_name + '.h5')) # SNN TOOLBOX CONFIGURATION # ############################# reset_mode = 'soft' # Create a config file with experimental setup for SNN Toolbox.
image_input = Input(shape=(2048, )) im1 = Dropout(0.5)(image_input) im2 = Dense(256, activation='relu')(im1) text_input = Input(shape=(MAX_LEN, )) sent1 = Embedding(vocab_size, EMBEDDING_DIM, input_length=MAX_LEN)(text_input) sent3 = Bidirectional(LSTM(128, return_sequences=False))(sent1) decoder1 = Add()([im2, sent3]) pred = Dense(vocab_size, activation='softmax')(decoder1) model = Model(inputs=[image_input, text_input], outputs=pred) model.compile(loss='categorical_crossentropy', optimizer="Adam", metrics=['accuracy']) model.summary() callbacks = [ EarlyStopping(patience=10, verbose=1), ReduceLROnPlateau(factor=0.1, patience=3, min_lr=0.00001, verbose=1), ModelCheckpoint(os.path.join( os.path.join(model_workspace_dir, 'weights_best.hdf5')), verbose=1, save_best_only=False), CSVLogger(os.path.join(model_workspace_dir, 'training.csv')), PerformanceMetrics(os.path.join(model_workspace_dir, 'performance.csv')), ]
inp = Input(shape=(image_size, image_size, 3)) x = base_model(inp) x = Flatten()(x) # OUTPUT FUNNELLING TO SWAG_model # “binary_crossentropy” as loss function and “sigmoid” as the final layer activation output11 = Dense(1, activation='sigmoid')(x) # SWAG_MODEL SWAG_model = Model(inp, [output11]) # STOCHASTIC GRADIENT DESCENT sgd = SGD(lr=learn_rate, momentum=.9, nesterov=False) SWAG_model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=["accuracy"]) STEP_SIZE_TRAIN_SWAG = SWAG_dftrain_generator.n // SWAG_dfvalid_generator.batch_size STEP_SIZE_VALID_SWAG = SWAG_dfvalid_generator.n // SWAG_dfvalid_generator.batch_size STEP_SIZE_TEST_SWAG = test_generator.n // test_generator.batch_size SWAG_history = SWAG_model.fit(generator_wrapper(SWAG_dftrain_generator, 0, 1), steps_per_epoch=STEP_SIZE_TRAIN_SWAG, validation_data=generator_wrapper( SWAG_dfvalid_generator, 0, 1), validation_steps=STEP_SIZE_VALID_SWAG, epochs=num_epochs, verbose=2) test_generator.reset()
for _ in range(adjs): input_a = Input((*input_shape, 1)) x = Conv2D(8, 3)(input_a) x = MaxPool2D(2)(x) x = Flatten()(x) x = Model(inputs=[input_a], outputs=x) cnns.append(x) combine = Concatenate()([x.output for x in cnns]) reshape = Reshape((len(cnns), cnns[0].output_shape[1]))(combine) lstm = LSTM(32)(reshape) z = Dense(classes, activation='softmax')(lstm) model = Model(inputs=[x.input for x in cnns], outputs=z) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train_cat, batch_size=32, epochs=50, shuffle=True, validation_split=0.1) y_pred = model.predict(x_test).argmax(axis=-1) acc = accuracy_score(y_test, y_pred) print("Fold accuracy: {:2.2f}".format(acc)) matrix = confusion_matrix(y_test, y_pred) print("Fold confusion matrix: \n {}".format(matrix)) total_acc += acc / n_splits print("Total model accuracy: {:2.2f}".format(total_acc))
class MyModel: def __init__(self, vocab_size, greedy=False, beam_width=10, top_paths=1, stop_tolerance=20, reduce_tolerance=15): self.input_size = config.target_image_size self.vocab_size = vocab_size self.model = None self.greedy = greedy self.beam_width = beam_width self.top_paths = max(1, top_paths) self.stop_tolerance = stop_tolerance self.reduce_tolerance = reduce_tolerance def summary(self, output=None, target=None): self.model.summary() if target is not None: os.makedirs(output, exist_ok=True) with open(os.path.join(output, target), "w") as f: with redirect_stdout(f): self.model.summary() def load_checkpoint(self, target): if os.path.isfile(target): if self.model is None: self.compile() self.model.load_weights(target) def get_callbacks(self, logdir, checkpoint, monitor="val_loss", verbose=0): callbacks = [ CSVLogger( filename=os.path.join(logdir, "epochs.log"), separator=";", append=True), TensorBoard( log_dir=logdir, histogram_freq=10, profile_batch=0, write_graph=True, write_images=False, update_freq="epoch"), ModelCheckpoint( filepath=checkpoint, monitor=monitor, save_best_only=True, save_weights_only=True, verbose=verbose), EarlyStopping( monitor=monitor, min_delta=1e-8, patience=self.stop_tolerance, restore_best_weights=True, verbose=verbose), ReduceLROnPlateau( monitor=monitor, min_delta=1e-8, factor=0.2, patience=self.reduce_tolerance, verbose=verbose) ] return callbacks def compile(self, learning_rate=None, initial_step=0): # define inputs, outputs and optimizer of the chosen architecture inputs, outputs = self.architecture(self.input_size, self.vocab_size + 1) if learning_rate is None: learning_rate = CustomSchedule(d_model=self.vocab_size + 1, initial_step=initial_step) self.learning_schedule = True else: self.learning_schedule = False optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate) # create and compile self.model = Model(inputs=inputs, outputs=outputs) self.model.compile(optimizer=optimizer, loss=lambda y1,y2: tf.py_function(self.ctc_loss_lambda_func, [y1,y2], [tf.float32])) def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1, max_queue_size=10, workers=1, use_multiprocessing=False, **kwargs): # remove ReduceLROnPlateau (if exist) when use schedule learning rate if callbacks and self.learning_schedule: callbacks = [x for x in callbacks if not isinstance(x, ReduceLROnPlateau)] if os.path.isfile(config.json_file): with open(config.json_file, "r") as f: initial_params = json.load(f) initial_epoch = initial_params["epoch"]+1 out = self.model.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, shuffle=shuffle, class_weight=class_weight, sample_weight=sample_weight, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_freq=validation_freq, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, **kwargs) return out def predict(self, x, batch_size=None, verbose=0, steps=1, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False, ctc_decode=True, ensemble=False): if verbose == 1: print("Model Predict") if ensemble: outs = [] for weights in config.ensemble_checkpoint_weights: self.load_checkpoint(weights) outs.append(self.model.predict(x=x, batch_size=batch_size, verbose=verbose, steps=steps, callbacks=callbacks, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing)) out = np.sum(outs, axis=0)/len(outs) else: out = self.model.predict(x=x, batch_size=batch_size, verbose=verbose, steps=steps, callbacks=callbacks, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing) if not ctc_decode: return np.log(out.clip(min=1e-8)), [] steps_done = 0 if verbose == 1: print("CTC Decode") progbar = tf.keras.utils.Progbar(target=steps) batch_size = int(np.ceil(len(out) / steps)) input_length = len(max(out, key=len)) predicts, probabilities = [], [] while steps_done < steps: index = steps_done * batch_size until = index + batch_size x_test = np.asarray(out[index:until]) x_test_len = np.asarray([input_length for _ in range(len(x_test))]) decode, log = self.ctc_decode(x_test, x_test_len, greedy=self.greedy, beam_width=self.beam_width, top_paths=self.top_paths) if not self.greedy: probabilities.extend([np.exp(x) for x in log]) else: probabilities.extend([np.exp(-x) for x in log]) decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode] predicts.extend(np.swapaxes(decode, 0, 1)) steps_done += 1 if verbose == 1: progbar.update(steps_done) return (predicts, probabilities) def ctc_decode(self, y_pred, input_length, greedy=True, beam_width=100, top_paths=1): input_shape = y_pred.shape num_samples, num_steps = input_shape[0], input_shape[1] y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + K.epsilon()) input_length = tf.cast(input_length, tf.int32) if greedy: (decoded, log_prob) = tf.nn.ctc_greedy_decoder( inputs=y_pred, sequence_length=input_length) else: (decoded, log_prob) = tf.nn.ctc_beam_search_decoder( inputs=y_pred, sequence_length=input_length, beam_width=beam_width, top_paths=top_paths) decoded_dense = [] for st in decoded: # st = tf.sparse.SparseTensor( # st.indices, st.values, (num_samples, num_steps)) decoded_dense.append( tf.sparse.to_dense(sp_input=st, default_value=-1)) return (decoded_dense, log_prob) @staticmethod def ctc_loss_lambda_func(y_true, y_pred): if len(y_true.shape) > 2: y_true = tf.squeeze(y_true) # y_pred.shape = (batch_size, string_length, alphabet_size_1_hot_encoded) # output of every model is softmax # so sum across alphabet_size_1_hot_encoded give 1 # string_length give string length input_length = tf.math.reduce_sum(y_pred, axis=-1, keepdims=False) input_length = tf.math.reduce_sum(input_length, axis=-1, keepdims=True) # y_true strings are padded with 0 # so sum of non-zero gives number of characters in this string label_length = tf.math.count_nonzero(y_true, axis=-1, keepdims=True, dtype="int64") loss = K.ctc_batch_cost(y_true, y_pred, input_length, label_length) # average loss across all entries in the batch loss = tf.reduce_mean(loss) return loss def architecture(self, input_size, d_model): input_data = Input(name="input", shape=input_size) cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 2), padding="same", kernel_initializer="he_uniform")(input_data) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=16, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=32, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=40, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=40, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=48, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=56, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=56, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) shape = cnn.get_shape() bgru = Reshape((shape[1], shape[2] * shape[3]))(cnn) bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru) bgru = Dense(units=256)(bgru) bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru) output_data = Dense(units=d_model, activation="softmax")(bgru) return (input_data, output_data)
initial_model = VGG16(weights="imagenet", include_top=False, input_shape=(DIMENSION, DIMENSION, 3)) last = initial_model.output x = Flatten()(last) x = Dense(NODE, activation='relu')(x) x = Dropout(0.4)(x) x = Dense(NODE, activation='relu')(x) x = Dropout(0.4)(x) preds = Dense(1, activation='sigmoid')(x) model = Model(initial_model.input, preds) #7. compile model.compile(optimizer=Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['acc']) #8. fit es = EarlyStopping(monitor='val_loss', patience=10) re = ReduceLROnPlateau(monitor='val_loss', patience=5) hist = model.fit_generator(datagen.flow(X_train, y_train, batch_size=32), steps_per_epoch=AUG_DATASET_LEN / 32, epochs=100, callbacks=[es, re], validation_data=datagen2.flow(X_val, y_val, batch_size=32)) # validation_split=0.2 # ValueError: `validation_split` is only supported for Tensors or NumPy arrays
# load weights if necessary model.load_weights('582-4731.7905.hdf5') # decay learning rate using cosine annealing lr_decay_schedule = CosineDecayRestarts(initial_learning_rate=1e-4, first_decay_steps=2000) current_step = 0 lr = lambda: lr_decay_schedule(current_step) # construct AdamW optimizer adamw_optimizer = AdamW(learning_rate=lr, weight_decay=1e-4) # compile model model.compile(adamw_optimizer, loss="mean_absolute_error", metrics=['acc']) ############################################################# # Run training/testing ############################################################# # save weights when loss improves checkpoint = ModelCheckpoint('{epoch:04d}-{val_loss:.4f}.hdf5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True) # callback that increments the global step value increment_step_callback = IncrementStepCallback()
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, input_data=None, expected_output=None, expected_output_dtype=None, fixed_batch_size=False): """Test routine for a layer with a single input tensor and single output tensor. Copy of the function in keras-team/keras because it's not in the public API. If we use the one from keras-team/keras it won't work with tf.keras. """ # generate input data if input_data is None: assert input_shape if not input_dtype: input_dtype = K.floatx() input_data_shape = list(input_shape) for i, e in enumerate(input_data_shape): if e is None: input_data_shape[i] = np.random.randint(1, 4) input_data = (10 * np.random.random(input_data_shape)) input_data = input_data.astype(input_dtype) else: if input_shape is None: input_shape = input_data.shape if input_dtype is None: input_dtype = input_data.dtype if expected_output_dtype is None: expected_output_dtype = input_dtype # instantiation layer = layer_cls(**kwargs) # test get_weights , set_weights at layer level weights = layer.get_weights() layer.set_weights(weights) expected_output_shape = layer.compute_output_shape(input_shape) # test in functional API if fixed_batch_size: x = Input(batch_shape=input_shape, dtype=input_dtype) else: x = Input(shape=input_shape[1:], dtype=input_dtype) y = layer(x) assert K.dtype(y) == expected_output_dtype # check with the functional API model = Model(x, y) actual_output = model.predict(input_data) actual_output_shape = actual_output.shape for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: assert expected_dim == actual_dim if expected_output is not None: assert_allclose(actual_output, expected_output, rtol=1e-3) # test serialization, weight setting at model level model_config = model.get_config() custom_objects = {layer.__class__.__name__: layer.__class__} recovered_model = model.__class__.from_config(model_config, custom_objects) if model.weights: weights = model.get_weights() recovered_model.set_weights(weights) _output = recovered_model.predict(input_data) assert_allclose(_output, actual_output, rtol=1e-3) # test training mode (e.g. useful when the layer has a # different behavior at training and testing time). if has_arg(layer.call, 'training'): model.compile('rmsprop', 'mse') model.train_on_batch(input_data, actual_output) # test instantiation from layer config layer_config = layer.get_config() layer_config['batch_input_shape'] = input_shape layer = layer.__class__.from_config(layer_config) # for further checks in the caller function return actual_output
kernel_regularizer=regularizers.l2(0.001), recurrent_regularizer=regularizers.l2(0.001), dropout=0.4)(x) x = BatchNormalization()(x) x = GRU(256, return_sequences=True, stateful=False, kernel_regularizer=regularizers.l2(0.001), recurrent_regularizer=regularizers.l2(0.001), dropout=0.4)(x) x = BatchNormalization()(x) x = Dense(128, activation='relu')(x) x = Dropout(0.4)(x) train_out = Dense(3, activation='softmax')(x) training_model = Model(inputs=train_in, outputs=train_out) training_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'], sample_weight_mode = 'temporal') training_model.summary() plot_model(streaming_model, to_file='streaming_model.png') # training history = training_model.fit(train_data, train_label, batch_size=16, epochs=30, validation_data=(val_data, val_label), sample_weight=weight_matrix ) training_model.save_weights('weights_0_30_silence_balance.hd5', overwrite=True) # define streaming model streaming_in_shape = train_data.shape[2:]
# --- Create logits logits = {} #logits['lbl'] = layers.AveragePooling3D(pool_size=(1, bf.shape[2], bf.shape[3]), padding='same', name='lbl')(bf) logits['lbl'] = layers.Conv3D(filters=1, kernel_size=(1, 1, 1), activation='sigmoid', name='lbl')(f0) # --- Create model model = Model(inputs=inputs, outputs=logits) # --- Compile model model.compile( optimizer=optimizers.Adam(learning_rate=5e-5), #loss=losses.Huber(delta=0.042), loss=losses.MeanAbsoluteError(), metrics=['mse', 'mae', 'mape'], experimental_run_tf_function=False) # --- Load data into memory for faster training client.load_data_in_memory() # --- TensorBoard #tensor_board = TensorBoard(log_dir='./graph', histogram_freq=0, write_graph=True, write_images=True) # --- Learning rate scheduler lr_scheduler = callbacks.LearningRateScheduler(lambda epoch, lr: lr * 0.996) # --- csv Callback num = '0091' path = '/home/treuters/breast-density/dense-net/experiments/exp-' + num + '/'
class Traintf: def __init__(self, df, split_rat=0.1, cross_valid=10, test_df=None, batch_size=32, epochs=80): self.df = df self.ratio = split_rat if split_rat is None: # print("No split selected") assert test_df is not None self.traindf = self.df self.testdf = test_df print("Length of the test set ", len(self.traindf), len(test_df)) else: self.testdf, self.traindf = self.__test_train_split() if cross_valid is not None: self.cross = True else: self.cross = False self.cvalid = cross_valid self.X = None self.Y = None self.trainmodel = None self.pred = None self.batch_size = batch_size self.epochs = epochs self.i = None self.model = None def get_labels(self, test_train=True): labels = ['nFix', 'FFD', 'GPT', 'TRT', 'fixProp'] drop_labels = ['sentence_id', 'word', 'Unnamed: 0', 'otag', 'utag', 'crossreftime', 'GPT-FFD', 'TRT-GPT', 'pps', 'phonem'] if test_train: df = self.traindf else: df = self.testdf df = df[df.columns[~df.columns.isin(drop_labels)]] self.Y = df[labels] self.X = df[df.columns[~df.columns.isin(labels)]] def lr_rate(self, epoch, lr): if epoch > 30: lr = lr * tf.math.exp(-0.01) else: lr = 1e-5 return lr def norma(self, x): return (x - x.min()) / (x.max() - x.min()), x.min().to_numpy(), \ x.max().to_numpy() def train(self, fields=None, load_previous=False, old=None, crnt=0): assert fields is not None if isinstance(fields, str): fields = [fields] # val = {} self.get_labels(test_train=False) X_test, Y_test = self.X, self.Y self.get_labels() print("Test size", X_test.shape) print("training size ", self.X.shape) for field in fields: print("Traing the NN for field - ", field) callback = tf.keras.callbacks.LearningRateScheduler(self.lr_rate, verbose=False) if not load_previous: print("==============Building models=============") input = tf.keras.Input(shape=(self.X.to_numpy().shape[-1]), name='embed') x = layers.Dense(1024, activation='relu')(input) x = layers.Dropout(rate=0.2, seed=10)(x) x = layers.Dense(512, activation='relu')(x) x = layers.Dense(512, activation='relu')(x) x = layers.Dropout(rate=0.2, seed=10)(x) x = layers.Dense(256, activation='relu')(x) x = layers.Dense(256, activation='relu')(x) nfix = layers.Dropout(rate=0.2)(x) nfix = layers.Dense(64, activation='relu', name='nfix0')(nfix) # nfix = (layers.Dense(64, activation='relu', name='nfix0', # kernel_regularizer='l2')(x)) nfix = layers.Dense(16, activation='relu', name='nfix2')(nfix) nfix = layers.Dense(1, activation='relu', name='NFIX')(nfix) self.model = Model(input, [nfix]) self.model.compile(optimizer='adam', loss='mae' ) else: print("==============Loading models=============") self.model = load_model("temp_model_" + field) print(self.model.summary(), self.X.columns.to_list()[:20]) keras.backend.set_value(self.model.optimizer.learning_rate, 1e-5) es = EarlyStopping(monitor='val_loss', mode='min', verbose=False, patience=30) mc = ModelCheckpoint('fm_' + str(crnt) + "_" + field, monitor='val_loss', mode='min', verbose=0, save_best_only=True) self.model.fit(self.X.to_numpy(), self.Y[field].to_numpy(), epochs=self.epochs, batch_size=self.batch_size, verbose=True, # validation_split=0.2, validation_data=(X_test.to_numpy(), Y_test[field].to_numpy()), callbacks=[callback, mc, es], use_multiprocessing=True) self.model.save("temp_model_" + field) def test(self, fields=None): assert fields is not None self.get_labels(test_train=False) val = {} print(self.X.shape, self.X.columns.to_list()) x = pd.DataFrame() for idx, field in enumerate(t): print("Test data size ", len(self.X)) model = load_model("fm_0_" + field) v = model.predict(self.X.to_numpy()) # print(v) x[field] = np.ravel(v) print("Mae for {}".format(field), mean_absolute_error(v, np.ravel(self.Y)[idx::5])) # original_val = tr.Y['nFix'].to_list() # pred_val = np.ravel(tr.pred)[::5] # print("Metrics is ", val) return x def __test_train_split(self): tr, te = train_test_split(self.df, train_size=self.ratio) return tr, te def model_process(self, fields): if not self.cross: self.cvalid = 1 final_val = pd.DataFrame() for i in range(self.cvalid): print("+++++++++++++++++++++++++++++++++++++++++++++++++++") print("Processing cross validation iteration {}".format(i)) print("+++++++++++++++++++++++++++++++++++++++++++++++++++") # if i == 0: # self.train(fields, load_previous=False, old=None, crnt=i) # else: # self.train(fields, load_previous=True, old=i-1, crnt=i) fv = self.test(fields) final_val = pd.concat([final_val, fv], axis=1) # print(final_val, "Total val: ", pd.DataFrame(final_val).mean(axis=1)) # print("Inference val: ", final_val) return final_val
class RelevanceModel: def __init__( self, feature_config: FeatureConfig, tfrecord_type: str, file_io: FileIO, scorer: Optional[ScorerBase] = None, metrics: List[Union[Type[kmetrics.Metric], str]] = [], optimizer: Optional[Optimizer] = None, model_file: Optional[str] = None, initialize_layers_dict: dict = {}, freeze_layers_list: list = [], compile_keras_model: bool = False, output_name: str = "score", logger=None, ): """ Constructor to instantiate a RelevanceModel that can be used for training and evaluating the search ML task Parameters ---------- feature_config : `FeatureConfig` object FeatureConfig object that defines the features to be loaded in the dataset and the preprocessing functions to be applied to each of them tfrecord_type : {"example", "sequence_example"} Type of the TFRecord protobuf message used for TFRecordDataset file_io : `FileIO` object file I/O handler objects for reading and writing data scorer : `ScorerBase` object Scorer object that wraps an InteractionModel and converts input features into scores metrics : list List of keras Metric classes that will be used for evaluating the trained model optimizer : `Optimizer` Tensorflow keras optimizer to be used for training the model model_file : str, optional Path to pretrained model file to be loaded for evaluation or retraining initialize_layers_dict : dict, optional Dictionary of tensorflow layer names mapped to the path of pretrained weights Use this for transfer learning with pretrained weights freeze_layers_list : list, optional List of model layer names to be frozen Use this for freezing pretrained weights from other ml4ir models compile_keras_model : bool, optional Whether the keras model loaded from disk should be compiled with loss, metrics and an optimizer output_name : str, optional Name of the output tensorflow node that captures the score logger : `Logger`, optional logging handler for status messages """ self.feature_config: FeatureConfig = feature_config self.logger: Logger = logger self.output_name = output_name self.scorer = scorer self.tfrecord_type = tfrecord_type self.file_io = file_io if scorer: self.max_sequence_size = scorer.interaction_model.max_sequence_size else: self.max_sequence_size = 0 # Load/Build Model if model_file and not compile_keras_model: """ If a model file is specified, load it without compiling into a keras model NOTE: This will allow the model to be only used for inference and cannot be used for retraining. """ self.model: Model = self.load(model_file) self.is_compiled = False else: """ Specify inputs to the model Individual input nodes are defined for each feature Each data point represents features for all records in a single query """ inputs: Dict[str, Input] = feature_config.define_inputs() scores, train_features, metadata_features = scorer(inputs) # Create model with functional Keras API self.model = Model(inputs=inputs, outputs={self.output_name: scores}) self.model.output_names = [self.output_name] # Get loss fn loss_fn = scorer.loss.get_loss_fn(**metadata_features) # Get metric objects metrics_impl: List[Union[str, kmetrics.Metric]] = get_metrics_impl( metrics=metrics, feature_config=feature_config, metadata_features=metadata_features ) # Compile model """ NOTE: Related Github issue: https://github.com/tensorflow/probability/issues/519 """ self.model.compile( optimizer=optimizer, loss=loss_fn, metrics=metrics_impl, experimental_run_tf_function=False, ) # Write model summary to logs model_summary = list() self.model.summary(print_fn=lambda x: model_summary.append(x)) if self.logger: self.logger.info("\n".join(model_summary)) if model_file: """ If model file is specified, load the weights from the SavedModel NOTE: The architecture, loss and metrics of self.model need to be the same as the loaded SavedModel """ self.load_weights(model_file) # Initialize layer weights for layer_name, layer_file in initialize_layers_dict.items(): layer = self.model.get_layer(layer_name) layer.set_weights(self.file_io.load_numpy_array(layer_file, unzip=True)) self.logger.info("Setting {} weights from {}".format(layer_name, layer_file)) # Freeze layer weights for layer_name in freeze_layers_list: layer = self.model.get_layer(layer_name) layer.trainable = False self.logger.info("Freezing {} layer".format(layer_name)) self.is_compiled = True @classmethod def from_relevance_scorer( cls, feature_config: FeatureConfig, interaction_model: InteractionModel, model_config: dict, loss: RelevanceLossBase, metrics: List[Union[kmetrics.Metric, str]], optimizer: Optimizer, tfrecord_type: str, file_io: FileIO, model_file: Optional[str] = None, initialize_layers_dict: dict = {}, freeze_layers_list: list = [], compile_keras_model: bool = False, output_name: str = "score", logger=None, ): """ Create a RelevanceModel with default Scorer function constructed from an InteractionModel Parameters ---------- feature_config : `FeatureConfig` object FeatureConfig object that defines the features to be loaded in the dataset and the preprocessing functions to be applied to each of them tfrecord_type : {"example", "sequence_example"} Type of the TFRecord protobuf message used for TFRecordDataset file_io : `FileIO` object file I/O handler objects for reading and writing data interaction_model : `InteractionModel` object InteractionModel object that converts input features into a dense feature representation loss : `RelevanceLossBase` object Loss object defining the final activation layer and the loss function metrics : list List of keras Metric classes that will be used for evaluating the trained model optimizer : `Optimizer` Tensorflow keras optimizer to be used for training the model model_file : str, optional Path to pretrained model file to be loaded for evaluation or retraining initialize_layers_dict : dict, optional Dictionary of tensorflow layer names mapped to the path of pretrained weights Use this for transfer learning with pretrained weights freeze_layers_list : list, optional List of model layer names to be frozen Use this for freezing pretrained weights from other ml4ir models compile_keras_model : bool, optional Whether the keras model loaded from disk should be compiled with loss, metrics and an optimizer output_name : str, optional Name of the output tensorflow node that captures the score logger : `Logger`, optional logging handler for status messages Returns ------- RelevanceModel RelevanceModel object with a default scorer build with a custom InteractionModel """ assert isinstance(interaction_model, InteractionModel) assert isinstance(loss, RelevanceLossBase) scorer: ScorerBase = RelevanceScorer( model_config=model_config, interaction_model=interaction_model, loss=loss, output_name=output_name, ) return cls( scorer=scorer, feature_config=feature_config, metrics=metrics, optimizer=optimizer, tfrecord_type=tfrecord_type, model_file=model_file, initialize_layers_dict=initialize_layers_dict, freeze_layers_list=freeze_layers_list, compile_keras_model=compile_keras_model, output_name=output_name, file_io=file_io, logger=logger, ) @classmethod def from_univariate_interaction_model( cls, model_config, feature_config: FeatureConfig, tfrecord_type: str, loss: RelevanceLossBase, metrics: List[Union[kmetrics.Metric, str]], optimizer: Optimizer, feature_layer_keys_to_fns: dict = {}, model_file: Optional[str] = None, initialize_layers_dict: dict = {}, freeze_layers_list: list = [], compile_keras_model: bool = False, output_name: str = "score", max_sequence_size: int = 0, file_io: FileIO = None, logger=None, ): """ Create a RelevanceModel with default UnivariateInteractionModel Parameters ---------- feature_config : `FeatureConfig` object FeatureConfig object that defines the features to be loaded in the dataset and the preprocessing functions to be applied to each of them model_config : dict dictionary defining the dense model architecture tfrecord_type : {"example", "sequence_example"} Type of the TFRecord protobuf message used for TFRecordDataset file_io : `FileIO` object file I/O handler objects for reading and writing data loss : `RelevanceLossBase` object Loss object defining the final activation layer and the loss function metrics : list List of keras Metric classes that will be used for evaluating the trained model optimizer : `Optimizer` Tensorflow keras optimizer to be used for training the model feature_layer_keys_to_fns : dict Dictionary of custom feature transformation functions to be applied on the input features as part of the InteractionModel model_file : str, optional Path to pretrained model file to be loaded for evaluation or retraining initialize_layers_dict : dict, optional Dictionary of tensorflow layer names mapped to the path of pretrained weights Use this for transfer learning with pretrained weights freeze_layers_list : list, optional List of model layer names to be frozen Use this for freezing pretrained weights from other ml4ir models compile_keras_model : bool, optional Whether the keras model loaded from disk should be compiled with loss, metrics and an optimizer output_name : str, optional Name of the output tensorflow node that captures the score max_sequence_size : int, optional Maximum length of the sequence to be used for SequenceExample protobuf objects logger : `Logger`, optional logging handler for status messages Returns ------- RelevanceModel RelevanceModel object with a UnivariateInteractionModel """ interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=feature_config, feature_layer_keys_to_fns=feature_layer_keys_to_fns, tfrecord_type=tfrecord_type, max_sequence_size=max_sequence_size, ) return cls.from_relevance_scorer( interaction_model=interaction_model, model_config=model_config, feature_config=feature_config, loss=loss, metrics=metrics, optimizer=optimizer, tfrecord_type=tfrecord_type, model_file=model_file, initialize_layers_dict=initialize_layers_dict, freeze_layers_list=freeze_layers_list, compile_keras_model=compile_keras_model, output_name=output_name, file_io=file_io, logger=logger, ) def fit( self, dataset: RelevanceDataset, num_epochs: int, models_dir: str, logs_dir: Optional[str] = None, logging_frequency: int = 25, monitor_metric: str = "", monitor_mode: str = "", patience=2, ): """ Trains model for defined number of epochs and returns the training and validation metrics as a dictionary Parameters ---------- dataset : `RelevanceDataset` object RelevanceDataset object to be used for training and validation num_epochs : int Value specifying number of epochs to train for models_dir : str Directory to save model checkpoints logs_dir : str, optional Directory to save model logs If set to False, no progress logs will be written logging_frequency : int, optional Every #batches to log results monitor_metric : str, optional Name of the metric to monitor for early stopping, checkpointing monitor_mode : {"max", "min"} Whether to maximize or minimize the monitoring metric patience : int Number of epochs to wait before early stopping Returns ------- train_metrics : dict Train and validation metrics in a single dictionary where key is metric name and value is floating point metric value. This dictionary will be used for experiment tracking for each ml4ir run """ if not monitor_metric.startswith("val_"): monitor_metric = "val_{}".format(monitor_metric) callbacks_list: list = self._build_callback_hooks( models_dir=models_dir, logs_dir=logs_dir, is_training=True, logging_frequency=logging_frequency, monitor_mode=monitor_mode, monitor_metric=monitor_metric, patience=patience, ) if self.is_compiled: history = self.model.fit( x=dataset.train, validation_data=dataset.validation, epochs=num_epochs, verbose=True, callbacks=callbacks_list, ) # Write metrics for experiment tracking # Returns a dictionary train_metrics = dict() for metric, value in history.history.items(): if not metric.startswith("val_"): """ NOTE: Prepend "train_" to metrics on training dataset to differentiate from validation and test metrics in the final experiment results """ # History is a dict of key: list(values per epoch) # We are capturing the metrics of the last epoch (-1) train_metrics["train_{}".format(metric)] = value[-1] else: train_metrics[metric] = value[-1] return train_metrics else: raise NotImplementedError( "The model could not be trained. " "Check if the model was compiled correctly." " Training loaded SavedModel is not currently supported." ) def predict( self, test_dataset: data.TFRecordDataset, inference_signature: str = "serving_default", additional_features: dict = {}, logs_dir: Optional[str] = None, logging_frequency: int = 25, ): """ Predict the scores on the test dataset using the trained model Parameters ---------- test_dataset : `Dataset` object `Dataset` object for which predictions are to be made inference_signature : str, optional If using a SavedModel for prediction, specify the inference signature to be used for computing scores additional_features : dict, optional Dictionary containing new feature name and function definition to compute them. Use this to compute additional features from the scores. For example, converting ranking scores for each document into ranks for the query logs_dir : str, optional Path to directory to save logs logging_frequency : int Value representing how often(in batches) to log status Returns ------- `pd.DataFrame` pandas DataFrame containing the predictions on the test dataset made with the `RelevanceModel` """ if logs_dir: outfile = os.path.join(logs_dir, RelevanceModelConstants.MODEL_PREDICTIONS_CSV_FILE) # Delete file if it exists self.file_io.rm_file(outfile) _predict_fn = get_predict_fn( model=self.model, tfrecord_type=self.tfrecord_type, feature_config=self.feature_config, inference_signature=inference_signature, is_compiled=self.is_compiled, output_name=self.output_name, features_to_return=self.feature_config.get_features_to_log(), additional_features=additional_features, max_sequence_size=self.max_sequence_size, ) predictions_df_list = list() batch_count = 0 for predictions_dict in test_dataset.map(_predict_fn).take(-1): predictions_df = pd.DataFrame(predictions_dict) if logs_dir: if os.path.isfile(outfile): predictions_df.to_csv(outfile, mode="a", header=False, index=False) else: # If writing first time, write headers to CSV file predictions_df.to_csv(outfile, mode="w", header=True, index=False) else: predictions_df_list.append(predictions_df) batch_count += 1 if batch_count % logging_frequency == 0: self.logger.info("Finished predicting scores for {} batches".format(batch_count)) predictions_df = None if logs_dir: self.logger.info("Model predictions written to -> {}".format(outfile)) else: predictions_df = pd.concat(predictions_df_list) return predictions_df def evaluate( self, test_dataset: data.TFRecordDataset, inference_signature: str = None, additional_features: dict = {}, group_metrics_min_queries: int = 50, logs_dir: Optional[str] = None, logging_frequency: int = 25, compute_intermediate_stats: bool = True, ): """ Evaluate the RelevanceModel Parameters ---------- test_dataset: an instance of tf.data.dataset inference_signature : str, optional If using a SavedModel for prediction, specify the inference signature to be used for computing scores additional_features : dict, optional Dictionary containing new feature name and function definition to compute them. Use this to compute additional features from the scores. For example, converting ranking scores for each document into ranks for the query group_metrics_min_queries : int, optional Minimum count threshold per group to be considered for computing groupwise metrics logs_dir : str, optional Path to directory to save logs logging_frequency : int Value representing how often(in batches) to log status compute_intermediate_stats : bool Determines if group metrics and other intermediate stats on the test set should be computed Returns ------- df_overall_metrics : `pd.DataFrame` object `pd.DataFrame` containing overall metrics df_groupwise_metrics : `pd.DataFrame` object `pd.DataFrame` containing groupwise metrics if group_metric_keys are defined in the FeatureConfig metrics_dict : dict metrics as a dictionary of metric names mapping to values Notes ----- You can directly do a `model.evaluate()` only if the keras model is compiled Override this method to implement your own evaluation metrics. """ if self.is_compiled: metrics_dict = self.model.evaluate(test_dataset) return None, None, dict(zip(self.model.metrics_names, metrics_dict)) else: raise NotImplementedError def save( self, models_dir: str, preprocessing_keys_to_fns={}, postprocessing_fn=None, required_fields_only: bool = True, pad_sequence: bool = False, ): """ Save the RelevanceModel as a tensorflow SavedModel to the `models_dir` There are two different serving signatures currently used to save the model: * `default`: default keras model without any pre/post processing wrapper * `tfrecord`: serving signature that allows keras model to be served using TFRecord proto messages. Allows definition of custom pre/post processing logic Additionally, each model layer is also saved as a separate numpy zipped array to enable transfer learning with other ml4ir models. Parameters ---------- models_dir : str path to directory to save the model preprocessing_keys_to_fns : dict dictionary mapping function names to tf.functions that should be saved in the preprocessing step of the tfrecord serving signature postprocessing_fn: function custom tensorflow compatible postprocessing function to be used at serving time. Saved as part of the postprocessing layer of the tfrecord serving signature required_fields_only: bool boolean value defining if only required fields need to be added to the tfrecord parsing function at serving time pad_sequence: bool, optional Value defining if sequences should be padded for SequenceExample proto inputs at serving time. Set this to False if you want to not handle padded scores. Notes ----- All the functions passed under `preprocessing_keys_to_fns` here must be serializable tensor graph operations """ model_file = os.path.join(models_dir, "final") # Save model with default signature self.model.save(filepath=os.path.join(model_file, "default")) """ Save model with custom signatures Currently supported - signature to read TFRecord SequenceExample inputs """ self.model.save( filepath=os.path.join(model_file, "tfrecord"), signatures=define_serving_signatures( model=self.model, tfrecord_type=self.tfrecord_type, feature_config=self.feature_config, preprocessing_keys_to_fns=preprocessing_keys_to_fns, postprocessing_fn=postprocessing_fn, required_fields_only=required_fields_only, pad_sequence=pad_sequence, max_sequence_size=self.max_sequence_size, ), ) # Save individual layer weights self.file_io.make_directory(os.path.join(model_file, "layers"), clear_dir=True) for layer in self.model.layers: try: self.file_io.save_numpy_array( np_array=layer.get_weights(), file_path=os.path.join(model_file, "layers", "{}.npz".format(layer.name)), zip=True, ) except FileNotFoundError: self.logger.warning("Error saving layer: {} due to FileNotFoundError. Skipping...".format(layer.name)) self.logger.info("Final model saved to : {}".format(model_file)) def load(self, model_file: str) -> Model: """ Loads model from the SavedModel file specified Parameters ---------- model_file : str path to file with saved tf keras model Returns ------- `tf.keras.Model` Tensorflow keras model loaded from file Notes ----- Retraining currently not supported! Would require compiling the model with the right loss and optimizer states """ """ NOTE: There is currently a bug in Keras Model with saving/loading models with custom losses and metrics. Therefore, we are currently loading the SavedModel with compile=False The saved model signatures can be used for inference at serving time Ref: https://github.com/keras-team/keras/issues/5916 https://github.com/tensorflow/tensorflow/issues/32348 https://github.com/keras-team/keras/issues/3977 """ model = tf.keras.models.load_model(model_file, compile=False) self.logger.info("Successfully loaded SavedModel from {}".format(model_file)) self.logger.warning("Retraining is not yet supported. Model is loaded with compile=False") return model def load_weights(self, model_file: str): """ Load saved model with compile=False Parameters ---------- model_file : str path to file with saved tf keras model """ loaded_model = self.load(model_file) # Set weights of Keras model from the loaded model weights self.model.set_weights(loaded_model.get_weights()) self.logger.info("Weights have been set from SavedModel. RankingModel can now be trained.") def _build_callback_hooks( self, models_dir: str, logs_dir: Optional[str] = None, is_training=True, logging_frequency=25, monitor_metric: str = "", monitor_mode: str = "", patience=2, ): """ Build callback hooks for the training and evaluation loop Parameters ---------- models_dir : str Path to directory to save model checkpoints logs_dir : str Path to directory to save tensorboard logs is_training : bool, optional Whether we are building callbacks for training or evaluation logging_frequency : int, optional How often, in number of epochs, to log training and evaluation progress monitor_metric : str, optional Name of metric to be used for ModelCheckpoint and EarlyStopping callbacks monitor_mode : {"max", "min"}, optional Mode for maximizing or minimizing the ModelCheckpoint and EarlyStopping patience : int, optional Number of epochs to wait before early stopping if metric change is below tolerance Returns ------- callbacks_list : list List of callbacks to be used with the RelevanceModel training and evaluation """ callbacks_list: list = list() if is_training: # Model checkpoint if models_dir and monitor_metric: checkpoints_path = os.path.join( models_dir, RelevanceModelConstants.CHECKPOINT_FNAME ) cp_callback = callbacks.ModelCheckpoint( filepath=checkpoints_path, save_weights_only=False, verbose=1, save_best_only=True, mode=monitor_mode, monitor=monitor_metric, ) callbacks_list.append(cp_callback) # Early Stopping if monitor_metric: early_stopping_callback = callbacks.EarlyStopping( monitor=monitor_metric, mode=monitor_mode, patience=patience, verbose=1, restore_best_weights=True, ) callbacks_list.append(early_stopping_callback) # TensorBoard if logs_dir: tensorboard_callback = callbacks.TensorBoard( log_dir=logs_dir, histogram_freq=1, update_freq=5 ) callbacks_list.append(tensorboard_callback) # Debugging/Logging callbacks_list.append(DebuggingCallback(self.logger, logging_frequency)) # Add more here return callbacks_list
def define_unet(img_rows, img_cols, optimizer): ''' Defines U-net with img_rows*img_cols input. Output: Keras Model.''' inputs = Input(shape=(img_rows, img_cols, 3)) conv1 = Convolution2D(32, (3, 3), activation='relu', padding='same')(inputs) conv1 = Convolution2D(32, (3, 3), activation='relu', padding='same')(conv1) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = Convolution2D(64, (3, 3), activation='relu', padding='same')(pool1) conv2 = Convolution2D(64, (3, 3), activation='relu', padding='same')(conv2) pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) conv3 = Convolution2D(128, (3, 3), activation='relu', padding='same')(pool2) conv3 = Convolution2D(128, (3, 3), activation='relu', padding='same')(conv3) pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) conv4 = Convolution2D(256, (3, 3), activation='relu', padding='same')(pool3) conv4 = Convolution2D(256, (3, 3), activation='relu', padding='same')(conv4) pool4 = MaxPooling2D(pool_size=(2, 2))(conv4) conv5 = Convolution2D(512, (3, 3), activation='relu', padding='same')(pool4) conv5 = Convolution2D(512, (3, 3), activation='relu', padding='same')(conv5) up6 = Concatenate()([ Convolution2D(256, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv5)), conv4 ]) conv6 = Convolution2D(256, (3, 3), activation='relu', padding='same')(up6) conv6 = Convolution2D(256, (3, 3), activation='relu', padding='same')(conv6) up7 = Concatenate()([ Convolution2D(128, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv6)), conv3 ]) conv7 = Convolution2D(128, (3, 3), activation='relu', padding='same')(up7) conv7 = Convolution2D(128, (3, 3), activation='relu', padding='same')(conv7) up8 = Concatenate()([ Convolution2D(64, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv7)), conv2 ]) conv8 = Convolution2D(64, (3, 3), activation='relu', padding='same')(up8) conv8 = Convolution2D(64, (3, 3), activation='relu', padding='same')(conv8) up9 = Concatenate()([ Convolution2D(32, (2, 2), activation='relu', padding='same')(UpSampling2D(size=(2, 2))(conv8)), conv1 ]) conv9 = Convolution2D(32, (3, 3), activation='relu', padding='same')(up9) conv9 = Convolution2D(32, (3, 3), activation='relu', padding='same')(conv9) conv10 = Convolution2D(3, (1, 1), activation='sigmoid')(conv9) model = Model(inputs=inputs, outputs=conv10) model.compile(optimizer=optimizer, loss='mse', metrics=['mse']) return model
if pretrain is not None else None, input_shape=(stack_length, 224, 224, 3 if mode == 'rgb' else 2)) x = tf.keras.layers.Reshape((1024, ))(backbone.output) x = Dense(64, activation='relu', kernel_initializer='he_uniform')(x) x = Dropout(0.5)(x) x = Dense(32, activation='relu', kernel_initializer='he_uniform')(x) x = Dropout(0.5)(x) x = Dense(1, kernel_initializer='he_uniform', use_bias=False)(x) x = BiasLayer(y_nums)(x) output = Activation('sigmoid')(x) model = Model(backbone.input, output) model_checkpoint = ModelCheckpoint(str( models_path.joinpath('{epoch:02d}-{val_mae_od:.2f}.h5')), period=1) model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0001, decay=1e-3 / STEP_SIZE_TRAIN), metrics=[mae_od]) his = model.fit(train_dataset, validation_data=val_dataset, epochs=epochs, callbacks=[model_checkpoint, wandbcb], verbose=1) # %% Save history to csv and images history = his.history save_history(history_path, history) plot_history(history_path, history)
def build_model(self, config: dict): bs = config['board_size'] ks = config['kernel_size'] nf = config['n_filters'] input_shape = (bs, bs, 1) # Input convolutional layer inputs = Input(shape=input_shape) x = layers.Conv2D(filters=nf, kernel_size=ks, padding='same', input_shape=input_shape, kernel_regularizer=l2( config['weight_decay']))(inputs) x = layers.BatchNormalization(axis=1)(x) conv_outputs = layers.Activation('relu')(x) for i in range(config['n_middle_blocks']): conv_outputs = layers.Conv2D( filters=nf, kernel_size=ks, padding='same', input_shape=input_shape, kernel_regularizer=l2(config['weight_decay']))(conv_outputs) conv_outputs = layers.BatchNormalization(axis=1)(conv_outputs) conv_outputs = layers.Activation('relu')(conv_outputs) head_inputs = input_shape if config.get('head_inputs_fixed', False) else input_shape[1:] # Policy head x = layers.Conv2D(filters=nf, kernel_size=ks, input_shape=head_inputs, kernel_regularizer=l2( config['weight_decay']))(conv_outputs) x = layers.BatchNormalization(axis=1)(x) x = layers.Activation('relu')(x) x = layers.Flatten()(x) x = layers.Dense(bs**2, kernel_regularizer=l2(config['weight_decay']))(x) policy_outputs = layers.Activation('softmax')(x) # Value head x = layers.Conv2D(filters=1, kernel_size=1, input_shape=head_inputs, kernel_regularizer=l2( config['weight_decay']))(conv_outputs) x = layers.BatchNormalization(axis=1)(x) x = layers.Activation('relu')(x) x = layers.Flatten()(x) x = layers.Dense(config['value_head_dense_layer_size'], kernel_regularizer=l2(config['weight_decay']))(x) x = layers.Activation('relu')(x) x = layers.Dense(1, kernel_regularizer=l2(config['weight_decay']))(x) value_output = layers.Activation('tanh')(x) model = Model(inputs, [policy_outputs, value_output]) model.compile(loss=['categorical_crossentropy', 'mean_squared_error'], optimizer=Adam(config['learning_rate'])) return model
x = resNet(inputs, training=True) x = GlobalAveragePooling2D()(x) # A Dense classifier with a single unit (binary classification) outputs = Dense(num_classes, activation='softmax')(x) model = Model(inputs, outputs) model.summary() # Horovod: adjust learning rate based on number of GPUs. scaled_lr = 1. * hvd.size() opt = tf.keras.optimizers.Adadelta(scaled_lr) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) model.compile(loss=tf.keras.losses.sparse_categorical_crossentropy, optimizer=opt, metrics=['accuracy']) tensorboard = TensorBoard(log_dir='logs/{}'.format(time())) callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0), # Horovod: average metrics among workers at the end of every epoch. # # Note: This callback must be in the list before the ReduceLROnPlateau, # TensorBoard or other metrics-based callbacks. hvd.callbacks.MetricAverageCallback(), # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final
class LanguageDEC: '''LanguageDEC Model consist of an encoder to extract features and a Deep Embedded Clustering layer (DECLayer) which accepts and assign the features into clusters according to a target distribution ''' def __init__(self, encoder=None, dir_path=None, languages=['en', 'de', 'cn', 'fr', 'ru'], model_id='', robust=False): self.model_id = model_id self.robust = robust # creating properties if dir_path == None: dir_path = os.path.dirname(os.path.realpath(__file__)) self.dir_path = dir_path self.languages = languages self.n_lang = len(languages) self.encoder = encoder # creating model flattened = Flatten(name='flattened_encoder_output')( self.encoder.output) if robust: dec = MDECLayer(self.n_lang, name='clustering') else: dec = DECLayer(self.n_lang, name='clustering') prediction = dec(flattened) self.model = Model(inputs=self.encoder.input, outputs=prediction) def compile(self, optimizer='sgd', loss='kld'): self.model.compile(optimizer=optimizer, loss=loss, run_eagerly=True) self.model.summary() def extract_features(self, x): features = self.encoder.predict(x) features = features.reshape((features.shape[0], -1)) return features def predict(self, x): # give cluster prediction q = self.model.predict(x, verbose=0) return q.argmax(1) def calulate_target_distribution(self, q): weight = q**2 / q.sum(0) return (weight.T / weight.sum(1)).T def write_training_log(self, key=None, value=''): ''' Write to log file ''' dir_path = os.path.dirname(os.path.realpath(__file__)) log_path = f'{dir_path}/logs/dec_logs_{self.model_id}.txt' if os.path.exists(log_path): open_mode = 'a' # append if already exists else: open_mode = 'w' with open(log_path, open_mode) as text_file: if key == None: # write model summary self.model.summary( print_fn=lambda x: text_file.write(x + '\n')) elif key == 'Distance': # write distance between cluster centers centroids = self.model.get_layer( name='clustering').get_weights()[0] dists = euclidean_distances(centroids) print(f'Distances:\n{dists}', file=text_file) else: print(f'{key}: {value}', file=text_file) def initialize(self, training_data, training_label=[], robust=False): features = self.extract_features(training_data) if self.robust: mean_list = list() inv_cov_list = list() for i in range(len(self.languages)): lang_features = features[training_label == i, ] #is_inlier = isolation_forest_method(lang_features) # df = pd.DataFrame(lang_features) # outlier, _ = robust_mahalanobis_method(df) # is_inlier = np.ones(lang_features.shape[0], dtype=int) # is_inlier[outlier] = 0 # before = int(lang_features.shape[0]) # lang_features = lang_features[is_inlier == 1] # after = int(lang_features.shape[0]) # n_removed = before - after # self.write_training_log( # 'Removed', f'{n_removed} outliers in {self.languages[i]}') df = pd.DataFrame(lang_features) mean, inv_cov = robust_mahalanobis_params(df) mean_list.append(mean) inv_cov_list.append(inv_cov) weights = [np.array(mean_list), np.array(inv_cov_list)] if not self.robust: cluster_centers = list() if len(training_label) == 0: # init using kmean kmeans = KMeans(n_clusters=self.n_lang) kmeans.fit_predict(features) cluster_centers = kmeans.cluster_centers_ else: # init using outlier removal & averaging for i in range(len(self.languages)): # remove outliers lang_features = features[training_label == i, ] is_inlier = isolation_forest_method(lang_features) #df = pd.DataFrame(lang_features) #outlier, _ = robust_mahalanobis_method(df) #is_inlier = np.ones(lang_features.shape[0], dtype=int) #is_inlier[outlier] = 0 before = int(lang_features.shape[0]) lang_features = lang_features[is_inlier == 1] after = int(lang_features.shape[0]) n_removed = before - after self.write_training_log('Removed', f'{n_removed} outliers') lang_centroid = np.average(lang_features, axis=0) cluster_centers.append(lang_centroid) weights = [np.array(cluster_centers)] self.model.get_layer(name='clustering').set_weights(weights) def fit(self, x, y, x_test=None, y_test=None, max_iteration=512, batch_size=128, update_interval=64, **kwargs): checkpoint_path = f'{self.dir_path}/model_checkpoints/dec_{self.model_id}' if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) np.save(f'{checkpoint_path}/train_x.npy', x) np.save(f'{checkpoint_path}/train_y.npy', y) np.save(f'{checkpoint_path}/test_x.npy', x_test) np.save(f'{checkpoint_path}/test_y.npy', y_test) self.write_training_log() # write the model summary index = 0 best_acc = 0 best_loss = float("inf") for ite in range(max_iteration): q = self.model.predict(x) p = self.calulate_target_distribution(q) # use idx to select batch from x & y # index_array = np.arange(x.shape[0]) # from_index = index * batch_size # to_index = min((index+1) * batch_size, x.shape[0]) # idx = index_array[from_index:to_index] # train_x = x[idx] # train_y = p[idx] # train all in 1 iteration train_x = x train_y = p loss = self.model.train_on_batch(x=train_x, y=train_y, **kwargs) # evaluate the clustering performance if ite % update_interval == 0: self.write_training_log( '================================================ite', ite) self.write_training_log('Distance') self.write_training_log('loss: ', loss) self.write_training_log('Prediction on train set: ', ) q = self.model.predict(x) y_pred = q.argmax(1) Metrics.evaluate(y, y_pred, languages=self.languages, model_id=self.model_id) self.write_training_log('Prediction on test set: ', ) q = self.model.predict(x_test) y_pred_test = q.argmax(1) test_acc, pred_classes = Metrics.evaluate( y_test, y_pred_test, languages=self.languages, model_id=self.model_id) if test_acc > best_acc and np.unique( pred_classes).size == self.n_lang: best_acc = test_acc self.encoder.save( f'{checkpoint_path}/trained_encoder_ite{ite}.h5') centroids = self.model.get_layer( name='clustering').get_weights() if self.robust: # robust model has two weights # first save the inversed variance matrix np.save(f'{checkpoint_path}/VI_ite{ite}.npy', centroids[1]) centroids = centroids[0] np.save(f'{checkpoint_path}/centroids_ite{ite}.npy', centroids) # update index index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0 tf.compat.v1.keras.experimental.export_saved_model( self.model, f'{self.dir_path}/models/dec_{self.model_id}')
def _build_model(self, x, y): """Construct the model using feature and label statistics. Args: - x: temporal feature - y: labels Returns: - model: predictor model """ # Parameters dim = len(x[0, 0, :]) seq_len = len(x[0, :, 0]) dim_y = len(y.shape) dilations = [2**(i) for i in range(int(np.log2(seq_len / 4)))] # Small hidden dimensions are better if self.h_dim > 30: self.h_dim = int(self.h_dim / 5) # Optimizer self.adam = tf.keras.optimizers.Adam(learning_rate=self.learning_rate, beta_1=0.9, beta_2=0.999, amsgrad=False) # Input inputs = Input(shape=( seq_len, dim, )) # First layer tcn_out = TCN(nb_filters=self.h_dim, dilations=dilations, return_sequences=True)(inputs) # Multi-layer for _ in range(self.n_layer - 2): tcn_out = TCN(nb_filters=self.h_dim, dilations=dilations, return_sequences=True)(tcn_out) # For classification if self.task == "classification": # For online prediction if dim_y == 3: tcn_out = TCN(nb_filters=self.h_dim, dilations=dilations, return_sequences=True)(tcn_out) output = TimeDistributed( Dense(y.shape[-1], activation="sigmoid", name="output"))(tcn_out) # For one-shot prediction elif dim_y == 2: tcn_out = TCN(nb_filters=self.h_dim, dilations=dilations, return_sequences=False)(tcn_out) output = Dense(y.shape[-1], activation="sigmoid", name="output")(tcn_out) # Model define and compile tcn_model = Model(inputs=[inputs], outputs=[output]) tcn_model.compile(loss=binary_cross_entropy_loss, optimizer=self.adam) # For regression elif self.task == "regression": # For online prediction if dim_y == 3: tcn_out = TCN(nb_filters=self.h_dim, dilations=dilations, return_sequences=True)(tcn_out) output = TimeDistributed( Dense(y.shape[-1], activation="linear", name="output"))(tcn_out) # For one-shot prediction elif dim_y == 2: tcn_out = TCN(nb_filters=self.h_dim, dilations=dilations, return_sequences=False)(tcn_out) output = Dense(y.shape[-1], activation="linear", name="output")(tcn_out) # Model define and compile tcn_model = Model(inputs=[inputs], outputs=[output]) tcn_model.compile(loss=mse_loss, optimizer=self.adam, metrics=["mse"]) return tcn_model
def train_networks(training_percentage, filename, experiment): stages = constants.training_stages (data, labels) = get_data(experiment, one_hot=True) total = len(data) step = total / stages # Amount of training data, from which a percentage is used for # validation. training_size = int(total * training_percentage) n = 0 histories = [] for k in range(stages): i = k * step j = int(i + training_size) % total i = int(i) if j > i: training_data = data[i:j] training_labels = labels[i:j] testing_data = np.concatenate((data[0:i], data[j:total]), axis=0) testing_labels = np.concatenate((labels[0:i], labels[j:total]), axis=0) else: training_data = np.concatenate((data[i:total], data[0:j]), axis=0) training_labels = np.concatenate((labels[i:total], labels[0:j]), axis=0) testing_data = data[j:i] testing_labels = labels[j:i] training_data, training_labels = expand_data(training_data, training_labels) truly_training = int(training_size * truly_training_percentage) validation_data = training_data[truly_training:] validation_labels = training_labels[truly_training:] training_data = training_data[:truly_training] training_labels = training_labels[:truly_training] input_img = Input(shape=(img_columns, img_rows, img_colors)) encoded = get_encoder(input_img) classified = get_classifier(encoded) decoded = get_decoder(encoded) model = Model(inputs=input_img, outputs=[classified, decoded]) model.compile(loss=['categorical_crossentropy', 'binary_crossentropy'], optimizer='adam', metrics='accuracy') model.summary() history = model.fit(training_data, (training_labels, training_data), batch_size=batch_size, epochs=epochs, validation_data=(validation_data, { 'classification': validation_labels, 'autoencoder': validation_data }), callbacks=[EarlyStoppingAtLossCrossing(patience)], verbose=2) histories.append(history) history = model.evaluate(testing_data, (testing_labels, testing_data), return_dict=True) histories.append(history) model.save(constants.model_filename(filename, n)) n += 1 return histories
class DeconvNet: def __init__(self, use_cpu=False, print_summary=False): self.maybe_download_and_extract() self.build(use_cpu=use_cpu, print_summary=print_summary) def maybe_download_and_extract(self): """Download and unpack VOC data if data folder only contains the .gitignore file""" if os.listdir('data') == ['.gitignore']: filenames = [ 'VOC_OBJECT.tar.gz', 'VOC2012_SEG_AUG.tar.gz', 'stage_1_train_imgset.tar.gz', 'stage_2_train_imgset.tar.gz' ] url = 'http://cvlab.postech.ac.kr/research/deconvnet/data/' for filename in filenames: wget.download(url + filename, out=os.path.join('data', filename)) tar = tarfile.open(os.path.join('data', filename)) tar.extractall(path='data') tar.close() os.remove(os.path.join('data', filename)) def predict(self, image): return self.model.predict(np.array([image])) def save(self, file_path='model.h5'): print(self.model.to_json()) self.model.save_weights(file_path) def load(self, file_path='model.h5'): self.model.load_weights(file_path) def random_crop_or_pad(self, image, truth, size=(224, 224)): assert image.shape[:2] == truth.shape[:2] if image.shape[0] > size[0]: crop_random_y = random.randint(0, image.shape[0] - size[0]) image = image[crop_random_y:crop_random_y + size[0], :, :] truth = truth[crop_random_y:crop_random_y + size[0], :] else: zeros = np.zeros((size[0], image.shape[1], image.shape[2]), dtype=np.float32) zeros[:image.shape[0], :image.shape[1], :] = image image = np.copy(zeros) zeros = np.zeros((size[0], truth.shape[1]), dtype=np.float32) zeros[:truth.shape[0], :truth.shape[1]] = truth truth = np.copy(zeros) if image.shape[1] > size[1]: crop_random_x = random.randint(0, image.shape[1] - size[1]) image = image[:, crop_random_x:crop_random_x + 224, :] truth = truth[:, crop_random_x:crop_random_x + 224] else: zeros = np.zeros((image.shape[0], size[1], image.shape[2])) zeros[:image.shape[0], :image.shape[1], :] = image image = np.copy(zeros) zeros = np.zeros((truth.shape[0], size[1])) zeros[:truth.shape[0], :truth.shape[1]] = truth truth = np.copy(zeros) return image, truth #(0=background, 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle, 6=bus, 7=car , 8=cat, 9=chair, # 10=cow, 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person, 16=potted plant, # 17=sheep, 18=sofa, 19=train, 20=tv/monitor, 255=no_label) def max_pool_with_argmax(self, x): return tf.nn.max_pool_with_argmax(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') def BatchGenerator(self, train_stage=1, batch_size=8, image_size=(224, 224, 3), labels=21): if train_stage == 1: trainset = open('data/stage_1_train_imgset/train.txt').readlines() else: trainset = open('data/stage_2_train_imgset/train.txt').readlines() while True: images = np.zeros( (batch_size, image_size[0], image_size[1], image_size[2])) truths = np.zeros( (batch_size, image_size[0], image_size[1], labels)) for i in range(batch_size): random_line = random.choice(trainset) image_file = random_line.split(' ')[0] truth_file = random_line.split(' ')[1] image = np.float32(cv2.imread('data' + image_file) / 255.0) truth_mask = cv2.imread('data' + truth_file[:-1], cv2.IMREAD_GRAYSCALE) truth_mask[truth_mask == 255] = 0 # replace no_label with background images[i], truth = self.random_crop_or_pad( image, truth_mask, image_size) truths[i] = (np.arange(labels) == truth[..., None] - 1).astype( int) # encode to one-hot-vector yield images, truths def train(self, steps_per_epoch=1000, epochs=10, batch_size=32): batch_generator = self.BatchGenerator(batch_size=batch_size) self.model.fit_generator(batch_generator, steps_per_epoch=steps_per_epoch, epochs=epochs) def buildConv2DBlock(self, block_input, filters, block, depth): for i in range(1, depth + 1): if i == 1: conv2d = Conv2D(filters, 3, padding='same', name='conv{}-{}'.format(block, i), use_bias=False)(block_input) else: conv2d = Conv2D(filters, 3, padding='same', name='conv{}-{}'.format(block, i), use_bias=False)(conv2d) conv2d = BatchNormalization( name='batchnorm{}-{}'.format(block, i))(conv2d) conv2d = Activation('relu', name='relu{}-{}'.format(block, i))(conv2d) return conv2d def build(self, use_cpu=False, print_summary=False): vgg16 = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3)) if use_cpu: device = '/cpu:0' else: device = '/gpu:0' with tf.device(device): inputs = Input(shape=(224, 224, 3)) conv_block_1 = self.buildConv2DBlock(inputs, 64, 1, 2) pool1, pool1_argmax = Lambda(self.max_pool_with_argmax, name='pool1')(conv_block_1) conv_block_2 = self.buildConv2DBlock(pool1, 128, 2, 2) pool2, pool2_argmax = Lambda(self.max_pool_with_argmax, name='pool2')(conv_block_2) conv_block_3 = self.buildConv2DBlock(pool2, 256, 3, 3) pool3, pool3_argmax = Lambda(self.max_pool_with_argmax, name='pool3')(conv_block_3) conv_block_4 = self.buildConv2DBlock(pool3, 512, 4, 3) pool4, pool4_argmax = Lambda(self.max_pool_with_argmax, name='pool4')(conv_block_4) conv_block_5 = self.buildConv2DBlock(pool4, 512, 5, 3) pool5, pool5_argmax = Lambda(self.max_pool_with_argmax, name='pool5')(conv_block_5) fc6 = Conv2D(512, 7, use_bias=False, padding='valid', name='fc6')(pool5) #4096 fc6 = BatchNormalization(name='batchnorm_fc6')(fc6) fc6 = Activation('relu', name='relu_fc6')(fc6) fc7 = Conv2D(512, 1, use_bias=False, padding='valid', name='fc7')(fc6) #4096 fc7 = BatchNormalization(name='batchnorm_fc7')(fc7) fc7 = Activation('relu', name='relu_fc7')(fc7) x = Conv2DTranspose(512, 7, use_bias=False, padding='valid', name='deconv-fc6')(fc7) x = BatchNormalization(name='batchnorm_deconv-fc6')(x) x = Activation('relu', name='relu_deconv-fc6')(x) x = MaxUnpoolWithArgmax(pool5_argmax, name='unpool5')(x) x.set_shape(conv_block_5.get_shape()) x = Conv2DTranspose(512, 3, use_bias=False, padding='same', name='deconv5-1')(x) x = BatchNormalization(name='batchnorm_deconv5-1')(x) x = Activation('relu', name='relu_deconv5-1')(x) x = Conv2DTranspose(512, 3, use_bias=False, padding='same', name='deconv5-2')(x) x = BatchNormalization(name='batchnorm_deconv5-2')(x) x = Activation('relu', name='relu_deconv5-2')(x) x = Conv2DTranspose(512, 3, use_bias=False, padding='same', name='deconv5-3')(x) x = BatchNormalization(name='batchnorm_deconv5-3')(x) x = Activation('relu', name='relu_deconv5-3')(x) x = MaxUnpoolWithArgmax(pool4_argmax, name='unpool4')(x) x.set_shape(conv_block_4.get_shape()) x = Conv2DTranspose(512, 3, use_bias=False, padding='same', name='deconv4-1')(x) x = BatchNormalization(name='batchnorm_deconv4-1')(x) x = Activation('relu', name='relu_deconv4-1')(x) x = Conv2DTranspose(512, 3, use_bias=False, padding='same', name='deconv4-2')(x) x = BatchNormalization(name='batchnorm_deconv4-2')(x) x = Activation('relu', name='relu_deconv4-2')(x) x = Conv2DTranspose(256, 3, use_bias=False, padding='same', name='deconv4-3')(x) x = BatchNormalization(name='batchnorm_deconv4-3')(x) x = Activation('relu', name='relu_deconv4-3')(x) x = MaxUnpoolWithArgmax(pool3_argmax, name='unpool3')(x) x.set_shape(conv_block_3.get_shape()) x = Conv2DTranspose(256, 3, use_bias=False, padding='same', name='deconv3-1')(x) x = BatchNormalization(name='batchnorm_deconv3-1')(x) x = Activation('relu', name='relu_deconv3-1')(x) x = Conv2DTranspose(256, 3, use_bias=False, padding='same', name='deconv3-2')(x) x = BatchNormalization(name='batchnorm_deconv3-2')(x) x = Activation('relu', name='relu_deconv3-2')(x) x = Conv2DTranspose(128, 3, use_bias=False, padding='same', name='deconv3-3')(x) x = BatchNormalization(name='batchnorm_deconv3-3')(x) x = Activation('relu', name='relu_deconv3-3')(x) x = MaxUnpoolWithArgmax(pool2_argmax, name='unpool2')(x) x.set_shape(conv_block_2.get_shape()) x = Conv2DTranspose(128, 3, use_bias=False, padding='same', name='deconv2-1')(x) x = BatchNormalization(name='batchnorm_deconv2-1')(x) x = Activation('relu', name='relu_deconv2-1')(x) x = Conv2DTranspose(64, 3, use_bias=False, padding='same', name='deconv2-2')(x) x = BatchNormalization(name='batchnorm_deconv2-2')(x) x = Activation('relu', name='relu_deconv2-2')(x) x = MaxUnpoolWithArgmax(pool1_argmax, name='unpool1')(x) x.set_shape(conv_block_1.get_shape()) x = Conv2DTranspose(64, 3, use_bias=False, padding='same', name='deconv1-1')(x) x = BatchNormalization(name='batchnorm_deconv1-1')(x) x = Activation('relu', name='relu_deconv1-1')(x) x = Conv2DTranspose(64, 3, use_bias=False, padding='same', name='deconv1-2')(x) x = BatchNormalization(name='batchnorm_deconv1-2')(x) x = Activation('relu', name='relu_deconv1-2')(x) output = Conv2DTranspose(21, 1, activation='softmax', padding='same', name='output')(x) self.model = Model(inputs=inputs, outputs=output) vgg16 = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3)) if print_summary: print(self.model.summary()) for layer in self.model.layers: if layer.name.startswith('conv'): block = layer.name[4:].split('-')[0] depth = layer.name[4:].split('-')[1] # apply vgg16 weights without bias layer.set_weights([ vgg16.get_layer('block{}_conv{}'.format( block, depth)).get_weights()[0] ]) self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'mse'])
def obtain_features(model_prefix, features_prefix, labels_prefix, data_prefix, training_percentage, am_filling_percentage, experiment, occlusion=None, bars_type=None): """ Generate features for images. Uses the previously trained neural networks for generating the features corresponding to the images. It may introduce occlusions. """ (data, labels) = get_data(experiment, occlusion, bars_type) total = len(data) step = int(total / constants.training_stages) # Amount of data used for training the networks trdata = int(total * training_percentage) # Amount of data used for testing memories tedata = step n = 0 histories = [] for i in range(0, total, step): j = (i + tedata) % total if j > i: testing_data = data[i:j] testing_labels = labels[i:j] other_data = np.concatenate((data[0:i], data[j:total]), axis=0) other_labels = np.concatenate((labels[0:i], labels[j:total]), axis=0) training_data = other_data[:trdata] training_labels = other_labels[:trdata] filling_data = other_data[trdata:] filling_labels = other_labels[trdata:] else: testing_data = np.concatenate((data[0:j], data[i:total]), axis=0) testing_labels = np.concatenate((labels[0:j], labels[i:total]), axis=0) training_data = data[j:j + trdata] training_labels = labels[j:j + trdata] filling_data = data[j + trdata:i] filling_labels = labels[j + trdata:i] # Recreate the exact same model, including its weights and the optimizer model = tf.keras.models.load_model( constants.model_filename(model_prefix, n)) # Drop the autoencoder and the last layers of the full connected neural network part. classifier = Model(model.input, model.output[0]) no_hot = to_categorical(testing_labels) classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics='accuracy') history = classifier.evaluate(testing_data, no_hot, batch_size=batch_size, verbose=1, return_dict=True) print(history) histories.append(history) model = Model(classifier.input, classifier.layers[-4].output) model.summary() training_features = model.predict(training_data) if len(filling_data) > 0: filling_features = model.predict(filling_data) else: r, c = training_features.shape filling_features = np.zeros((0, c)) testing_features = model.predict(testing_data) dict = { constants.training_suffix: (training_data, training_features, training_labels), constants.filling_suffix: (filling_data, filling_features, filling_labels), constants.testing_suffix: (testing_data, testing_features, testing_labels) } for suffix in dict: data_fn = constants.data_filename(data_prefix + suffix, n) features_fn = constants.data_filename(features_prefix + suffix, n) labels_fn = constants.data_filename(labels_prefix + suffix, n) d, f, l = dict[suffix] np.save(data_fn, d) np.save(features_fn, f) np.save(labels_fn, l) n += 1 return histories
x = layers.Dropout(0.2)(x) x = layers.Dense(1000, activation='softmax')(x) model = Model(pre_trained_model.input, x) model.summary() from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint es = EarlyStopping(patience=15) lr = ReduceLROnPlateau(patience=7, factor=0.6) mc = ModelCheckpoint('../data/lotte/mc/lotte_v3_3.h5', save_best_only=True, verbose=1) model.compile(optimizer=RMSprop(lr=1e-6), loss='categorical_crossentropy', metrics=['acc']) history = model.fit(x_train, y_train, callbacks=[es, lr], epochs=500, validation_split=0.2) loss, acc = model.evaluate(x_test, y_test, batch_size=16) print('loss, acc : ', loss, acc) result = model.predict(x_pred, verbose=True) import pandas as pd submission = pd.read_csv('../lotte/sample.csv')
noise1 = layers.GaussianNoise(test_dict[1])(conv1, training=True) pool1 = layers.MaxPooling2D(pool_size=(2, 2))(noise1) conv2 = layers.Conv2D(64, kernel_size=(3,3), activation='relu')(pool1) noise2 = layers.GaussianNoise(test_dict[2])(conv2, training=True) pool2 = layers.MaxPooling2D(pool_size=(2, 2))(noise2) conv3 = layers.Conv2D(64, kernel_size=(3,3), activation='relu')(pool2) noise1 = layers.GaussianNoise(test_dict[3])(conv3, training=True) flat = layers.Flatten()(noise1) hidden1 = layers.Dense(64, activation='relu')(flat) output = layers.Dense(10)(hidden1) model1 = Model(inputs=visible, outputs=output) model1.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) model1.set_weights(weights1) test_loss, test_acc = model1.evaluate(test_images, test_labels, verbose=0) fintest_trial.append(test_acc) parameters[name]=fintest_trial #testing clear model name=str(m)+'clearmodelLayer'+str(l) fintest_trial=[] for n in range(0, 2): print('n', n) del(model1) tf.compat.v1.reset_default_graph() if n==0: test_dict={1: 0, 2: 0, 3: 0} else:
class VanilllaGAN(gan.Model): def __init__(self, model_parameters): super().__init__(model_parameters) def define_gan(self): self.generator = Generator(self.batch_size).\ build_model(input_shape=(self.noise_dim,), dim=self.layers_dim, data_dim=self.data_dim) self.discriminator = Discriminator(self.batch_size).\ build_model(input_shape=(self.data_dim,), dim=self.layers_dim) optimizer = Adam(self.lr, beta_1=self.beta_1, beta_2=self.beta_2) # Build and compile the discriminator self.discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) # The generator takes noise as input and generates imgs z = Input(shape=(self.noise_dim, )) record = self.generator(z) # For the combined model we will only train the generator self.discriminator.trainable = False # The discriminator takes generated images as input and determines validity validity = self.discriminator(record) # The combined model (stacked generator and discriminator) # Trains the generator to fool the discriminator self._model = Model(z, validity) self._model.compile(loss='binary_crossentropy', optimizer=optimizer) def get_data_batch(self, train, batch_size, seed=0): # # random sampling - some samples will have excessively low or high sampling, but easy to implement # np.random.seed(seed) # x = train.loc[ np.random.choice(train.index, batch_size) ].values # iterate through shuffled indices, so every sample gets covered evenly start_i = (batch_size * seed) % len(train) stop_i = start_i + batch_size shuffle_seed = (batch_size * seed) // len(train) np.random.seed(shuffle_seed) train_ix = np.random.choice( list(train.index), replace=False, size=len(train)) # wasteful to shuffle every time train_ix = list(train_ix) + list( train_ix) # duplicate to cover ranges past the end of the set x = train.loc[train_ix[start_i:stop_i]].values return np.reshape(x, (batch_size, -1)) def train(self, data, train_arguments): [cache_prefix, epochs, sample_interval] = train_arguments # Adversarial ground truths valid = np.ones((self.batch_size, 1)) fake = np.zeros((self.batch_size, 1)) for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- batch_data = self.get_data_batch(data, self.batch_size) noise = tf.random.normal((self.batch_size, self.noise_dim)) # Generate a batch of events gen_data = self.generator(noise, training=True) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(batch_data, valid) d_loss_fake = self.discriminator.train_on_batch(gen_data, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- noise = tf.random.normal((self.batch_size, self.noise_dim)) # Train the generator (to have the discriminator label samples as valid) g_loss = self._model.train_on_batch(noise, valid) # Plot the progress print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss)) # If at save interval => save generated events if epoch % sample_interval == 0: #Test here data generation step # save model checkpoints if path.exists('./cache') is False: os.mkdir('./cache') model_checkpoint_base_name = './cache/' + cache_prefix + '_{}_model_weights_step_{}.h5' self.generator.save_weights( model_checkpoint_base_name.format('generator', epoch)) self.discriminator.save_weights( model_checkpoint_base_name.format('discriminator', epoch)) #Here is generating the data z = tf.random.normal((432, self.noise_dim)) gen_data = self.generator(z) print('generated_data')
def main(): numpy.random.seed(7) # data. definition of the problem. seq_length = 20 x_train, y_train = task_add_two_numbers_after_delimiter(20_000, seq_length) x_val, y_val = task_add_two_numbers_after_delimiter(4_000, seq_length) # just arbitrary values. it's for visual purposes. easy to see than random values. test_index_1 = 4 test_index_2 = 9 x_test, _ = task_add_two_numbers_after_delimiter(10, seq_length, 0, test_index_1, test_index_2) # x_test_mask is just a mask that, if applied to x_test, would still contain the information to solve the problem. # we expect the attention map to look like this mask. x_test_mask = np.zeros_like(x_test[..., 0]) x_test_mask[:, test_index_1:test_index_1 + 1] = 1 x_test_mask[:, test_index_2:test_index_2 + 1] = 1 # model i = Input(shape=(seq_length, 1)) x = LSTM(100, return_sequences=True)(i) x = attention_3d_block(x) x = Dropout(0.2)(x) x = Dense(1, activation='linear')(x) model = Model(inputs=[i], outputs=[x]) model.compile(loss='mse', optimizer='adam') print(model.summary()) output_dir = 'task_add_two_numbers' if not os.path.exists(output_dir): os.makedirs(output_dir) max_epoch = int(sys.argv[1]) if len(sys.argv) > 1 else 200 class VisualiseAttentionMap(Callback): def on_epoch_end(self, epoch, logs=None): attention_map = get_activations( model, x_test, layer_name='attention_weight')['attention_weight'] # top is attention map. # bottom is ground truth. plt.imshow(np.concatenate([attention_map, x_test_mask]), cmap='hot') iteration_no = str(epoch).zfill(3) plt.axis('off') plt.title(f'Iteration {iteration_no} / {max_epoch}') plt.savefig(f'{output_dir}/epoch_{iteration_no}.png') plt.close() plt.clf() model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=max_epoch, batch_size=64, callbacks=[VisualiseAttentionMap()])