def DNNclassifier_crps(self, p, num_cut, optimizer, seeding): tf.set_random_seed(seeding) inputs = Input(shape=(p,)) if isinstance(optimizer, str): opt = optimizer else: opt_name = optimizer.__class__.__name__ opt_config = optimizer.get_config() opt_class = getattr(optimizers, opt_name) opt = opt_class(**opt_config) for i, n_neuron in enumerate(self.hidden_list): if i == 0: net = Dense(n_neuron, kernel_initializer = 'he_uniform')(inputs) else: net = Dense(n_neuron, kernel_initializer = 'he_uniform')(net) net = Activation(activation = 'elu')(net) net = BatchNormalization()(net) net = Dropout(rate=self.dropout_list[i])(net) softmaxlayer = Dense(num_cut + 1, activation='softmax', kernel_initializer = 'he_uniform')(net) output = Lambda(self.tf_cumsum)(softmaxlayer) model = Model(inputs = [inputs], outputs=[output]) model.compile(optimizer=opt, loss=self.crps_loss) return model
def build_model(units, inputs_dim, output="regression", sparse_dim=[], with_ts=False, ts_maxlen=0): assert output == "regression" or output == "binary_clf", "This output type is not supported." assert len(sparse_dim) == inputs_dim[1], "Dimension not match." # Inputs for basic features. inputs1 = Input(shape=(inputs_dim[0], ), name="basic_input") x1 = Dense(units, kernel_regularizer='l2', activation="relu")(inputs1) # Inputs for long one-hot features. inputs2 = Input(shape=(inputs_dim[1], ), name="one_hot_input") for i in range(len(sparse_dim)): if i == 0: x2 = Embedding(sparse_dim[i], units, mask_zero=True)(slice(inputs2, i)) else: tmp = Embedding(sparse_dim[i], units, mask_zero=True)(slice(inputs2, i)) x2 = Concatenate()([x2, tmp]) x2 = tf.reshape(x2, [-1, units * inputs_dim[1]]) x = Concatenate()([x1, x2]) if with_ts: inputs3 = Input(shape=( None, inputs_dim[2], ), name="ts_input") x3 = LSTM(units, input_shape=(ts_maxlen, inputs_dim[2]), return_sequences=0)(inputs3) x = Concatenate()([x, x3]) x = Dense(units, kernel_regularizer='l2', activation="relu")(x) x = Dropout(0.5)(x) x = Dense(units, kernel_regularizer='l2', activation="relu")(x) x = Dropout(0.5)(x) if output == "regression": x = Dense(1, kernel_regularizer='l2')(x) model = Model(inputs=[inputs1, inputs2], outputs=x) if with_ts: model = Model(inputs=[inputs1, inputs2, inputs3], outputs=x) model.compile(optimizer='adam', loss='mean_squared_error') elif output == "binary_clf": x = Dense(1, kernel_regularizer='l2', activation="sigmoid")(x) model = Model(inputs=[inputs1, inputs2], outputs=x) if with_ts: model = Model(inputs=[inputs1, inputs2, inputs3], outputs=x) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) #model.summary() return model
def gru_keras(max_features, maxlen, bidirectional, dropout_rate, embed_dim, rec_units, mtype='GRU', reduction=None, classes=4, lr=0.001): if K.backend == 'tensorflow': K.clear_session() input_layer = Input(shape=(maxlen, )) embedding_layer = Embedding(max_features, output_dim=embed_dim, trainable=True)(input_layer) x = SpatialDropout1D(dropout_rate)(embedding_layer) if reduction: if mtype == 'GRU': if bidirectional: x = Bidirectional( CuDNNGRU(units=rec_units, return_sequences=True))(x) else: x = CuDNNGRU(units=rec_units, return_sequences=True)(x) elif mtype == 'LSTM': if bidirectional: x = Bidirectional( CuDNNLSTM(units=rec_units, return_sequences=True))(x) else: x = CuDNNLSTM(units=rec_units, return_sequences=True)(x) if reduction == 'average': x = GlobalAveragePooling1D()(x) elif reduction == 'maximum': x = GlobalMaxPool1D()(x) else: if mtype == 'GRU': if bidirectional: x = Bidirectional( CuDNNGRU(units=rec_units, return_sequences=False))(x) else: x = CuDNNGRU(units=rec_units, return_sequences=False)(x) elif mtype == 'LSTM': if bidirectional: x = Bidirectional( CuDNNLSTM(units=rec_units, return_sequences=False))(x) else: x = CuDNNLSTM(units=rec_units, return_sequences=False)(x) output_layer = Dense(classes, activation="sigmoid")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=lr, clipvalue=1, clipnorm=1), metrics=['acc']) return model
def create_model(self): base_model = Xception(weights=None, include_top=False, input_shape=(IM_HEIGHT, IM_WIDTH, 3)) x = base_model.output x = GlobalAveragePooling2D()(x) predictions = Dense(3, activation="linear")(x) model = Model(inputs=base_model.input, outputs=predictions) model.compile(loss="mse", optimizer=Adam(lr=0.001), metrics=["accuracy"]) # model.enable_eager_execution() return model
def build_model(hidden_size): inputs = Input(shape=(28, 28)) x1 = Flatten()(inputs) x2 = Dense(hidden_size, activation=tf.nn.relu)(x1) x3 = Dropout(0.2)(x2) x4 = Dense(10, activation=tf.nn.softmax)(x3) model = Model(inputs=inputs, outputs=x4) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Train and fit model model.fit(x_train, y_train, epochs=5) [loss, acc] = model.evaluate(x_test, y_test) return [model, acc]
def create_critic_network(self): # parallel 1 state_input = Input(shape = [self.obs_dim]) w1 = Dense(self.hidden_dim, activation = 'relu')(state_input) h1 = Dense(self.hidden_dim, activation = 'linear')(w1) # parallel 2 action_input = Input(shape = [self.act_dim], name = 'action2') a1 = Dense(self.hidden_dim, activation = 'linear')(action_input) # merge #h2 = concatenate([h1, a1], mode = 'sum') h2 = concatenate([h1, a1]) h3 = Dense(self.hidden_dim, activation = 'relu')(h2) value_out = Dense(self.act_dim, activation = 'linear')(h3) model = Model(inputs = [state_input, action_input], outputs = [value_out]) adam = Adam(self.lr) model.compile(loss = 'mse', optimizer = adam) return model, action_input, state_input
def cnn_keras(max_features, maxlen, dropout_rate, embed_dim, num_filters=300, classes=4, lr=0.001): if K.backend == 'tensorflow': K.clear_session() input_layer = Input(shape=(maxlen, )) embedding_layer = Embedding(max_features, output_dim=embed_dim, trainable=True)(input_layer) x = SpatialDropout1D(dropout_rate)(embedding_layer) x = Conv1D(num_filters, 7, activation='relu', padding='same')(x) x = GlobalMaxPooling1D()(x) output_layer = Dense(classes, activation="sigmoid")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=lr, clipvalue=1, clipnorm=1), metrics=['acc']) return model
def keras_build_fn(num_feature, num_output, is_sparse, embedding_dim=-1, num_hidden_layer=2, hidden_layer_dim=512, activation='elu', learning_rate=1e-3, dropout=0.5, l1=0.0, l2=0.0, loss='categorical_crossentropy'): """Initializes and compiles a Keras DNN model using the Adam optimizer. Args: num_feature: number of features num_output: number of outputs (targets, e.g., classes)) is_sparse: boolean whether input data is in sparse format embedding_dim: int number of nodes in embedding layer; if value is <= 0 then no embedding layer will be present in the model num_hidden_layer: number of hidden layers hidden_layer_dim: int number of nodes in the hidden layer(s) activation: string activation function for hidden layers; see https://keras.io/activations/ learning_rate: float learning rate for Adam dropout: float proportion of nodes to dropout; values in [0, 1] l1: float strength of L1 regularization on weights l2: float strength of L2 regularization on weights loss: string loss function; see https://keras.io/losses/ Returns: model: Keras.models.Model compiled Keras model """ assert num_hidden_layer >= 1 inputs = Input(shape=(num_feature, ), sparse=is_sparse) activation_func_args = () if activation.lower() == 'prelu': activation_func = PReLU elif activation.lower() == 'leakyrelu': activation_func = LeakyReLU elif activation.lower() == 'elu': activation_func = ELU elif activation.lower() == 'thresholdedrelu': activation_func = ThresholdedReLU else: activation_func = Activation activation_func_args = (activation) if l1 > 0 and l2 > 0: reg_init = lambda: regularizers.l1_l2(l1, l2) elif l1 > 0: reg_init = lambda: regularizers.l1(l1) elif l2 > 0: reg_init = lambda: regularizers.l2(l2) else: reg_init = lambda: None if embedding_dim > 0: # embedding layer e = Dense(embedding_dim)(inputs) x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(e) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) else: x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(inputs) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) # add additional hidden layers for _ in range(num_hidden_layer - 1): x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(x) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) x = Dense(num_output)(x) preds = Activation('softmax')(x) model = Model(inputs=inputs, outputs=preds) model.compile(optimizer=Adam(lr=learning_rate), loss=loss) return model
class seq2seq_train: def __init__(self, cfg): self.cfg = cfg self.enc_inp = None self.enc_outp = None self.dec_inp = None self.dec_outp = None self.enc_model = None self.model = None self.__get_model__() def __get_model__(self): self.enc_inp = Input(shape=(self.cfg.input_seq_len(), ), name="Encoder-Input") embd = Embedding(self.cfg.num_input_tokens(), self.cfg.latent_dim(), name='Encoder-Embedding', mask_zero=False) embd_outp = embd(self.enc_inp) x = BatchNormalization(name='Encoder-Batchnorm-1')(embd_outp) _, state_h = GRU(self.cfg.latent_dim(), return_state=True, name='Encoder-Last-GRU')(x) self.enc_model = Model(inputs=self.enc_inp, outputs=state_h, name='Encoder-Model') self.enc_outp = self.enc_model(self.enc_inp) self.cfg.logger.info("********** Encoder Model summary **************") self.cfg.logger.info(self.enc_model.summary()) # get the decoder self.dec_inp = Input(shape=(None, ), name='Decoder-Input') dec_emb = Embedding(self.cfg.num_output_tokens(), self.cfg.latent_dim(), name='Decoder-Embedding', mask_zero=False)(self.dec_inp) dec_bn = BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb) decoder_gru = GRU(self.cfg.latent_dim(), return_state=True, return_sequences=True, name='Decoder-GRU') decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=self.enc_outp) x = BatchNormalization(name='Decoder-Batchnorm-2')(decoder_gru_output) dec_dense = Dense(self.cfg.num_output_tokens(), activation='softmax', name='Final-Output-Dense') self.dec_outp = dec_dense(x) model_inp = [self.enc_inp, self.dec_inp] self.model = Model(model_inp, self.dec_outp) self.cfg.logger.info("********** Full Model summary **************") self.cfg.logger.info(str(self.model.summary())) plot_model(self.model, to_file=self.cfg.scratch_dir() + os.sep + "seq2seq.png") def fit_model(self, input_vecs, output_vecs): input_data = [input_vecs, output_vecs[:, :-1]] output_data = output_vecs[:, 1:] self.model.compile(optimizer=optimizers.Nadam(lr=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model_checkpoint = ModelCheckpoint(self.cfg.output_dir() + os.sep + 'model.hdf5', monitor='val_loss', save_best_only=True, period=1) csv_logger = CSVLogger(self.cfg.log_dir() + os.sep + 'history.csv') tb_dir = self.cfg.log_dir() + os.sep + "tensorboard" if os.path.isfile(tb_dir): rmtree(tb_dir) tensorboard = TensorBoard(log_dir=tb_dir, histogram_freq=10, batch_size=self.cfg.batch_size(), write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None) history = self.model.fit( input_data, np.expand_dims(output_data, -1), batch_size=self.cfg.batch_size(), epochs=self.cfg.nepochs(), validation_split=self.cfg.validation_split(), callbacks=[csv_logger, model_checkpoint, tensorboard]) return (history)
class SiameseNet(object): """Class for Siamese Network.""" def __init__(self, inputs, arch, siam_reg, main_path, y_true): self.orig_inputs = inputs # set up inputs self.inputs = { 'A': inputs['Unlabeled'], 'B': Input(shape=inputs['Unlabeled'].get_shape().as_list()[1:]), 'Labeled': inputs['Labeled'], } self.main_path = os.path.join(main_path, 'siemese/') self.y_true = y_true # generate layers self.layers = [] self.layers += util.make_layer_list(arch, 'siamese', siam_reg) # create the siamese net self.outputs = stack_layers(self.inputs, self.layers) # add the distance layer self.distance = Lambda(affinities.euclidean_distance, output_shape=affinities.eucl_dist_output_shape)( [self.outputs['A'], self.outputs['B']]) # create the distance model for training self.net = Model([self.inputs['A'], self.inputs['B']], self.distance) # compile the siamese network self.net.compile(loss=affinities.get_contrastive_loss(m_neg=1, m_pos=0.05), optimizer='rmsprop') def train(self, pairs_train, dist_train, pairs_val, dist_val, lr, drop, patience, num_epochs, batch_size, dset, load=True): """Train the Siamese Network.""" if load: # load weights into model output_path = os.path.join(self.main_path, dset) load_model(self.net, output_path, '_siamese') return # create handler for early stopping and learning rate scheduling self.lh = util.LearningHandler(lr=lr, drop=drop, lr_tensor=self.net.optimizer.lr, patience=patience) # initialize the training generator train_gen_ = util.train_gen(pairs_train, dist_train, batch_size) # format the validation data for keras validation_data = ([pairs_val[:, 0], pairs_val[:, 1]], dist_val) # compute the steps per epoch steps_per_epoch = int(len(pairs_train) / batch_size) # train the network self.net.fit_generator(train_gen_, epochs=num_epochs, validation_data=validation_data, steps_per_epoch=steps_per_epoch, callbacks=[self.lh]) model_json = self.net.to_json() output_path = os.path.join(self.main_path, dset) save_model(self.net, model_json, output_path, '_siamese') def predict(self, x, batch_sizes): # compute the siamese embeddings of the input data return train.predict(self.outputs['A'], x_unlabeled=x, inputs=self.orig_inputs, y_true=self.y_true, batch_sizes=batch_sizes)
epsilon=1e-3, center=True, scale=True)(x) # Flatten layer # x = Flatten()(x) # Dense Layer 1 x = Dense(256, activation='relu')(x) outputs = Dense(len(labels), activation="softmax")(x) model = Model(inputs, outputs) model.summary() model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy']) early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, min_delta=0.0001) checkpoint = ModelCheckpoint('speech2text_model.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max') hist = model.fit(x=x_train, y=y_train, epochs=100,
class SiameseNet: def __init__(self, inputs, arch, siam_reg, y_true): self.orig_inputs = inputs # set up inputs self.inputs = { 'A': inputs['Unlabeled'], 'B': Input(shape=inputs['Unlabeled'].get_shape().as_list()[1:]), 'Labeled': inputs['Labeled'], } self.y_true = y_true # generate layers self.layers = [] self.layers += make_layer_list(arch, 'siamese', siam_reg) # create the siamese net self.outputs = stack_layers(self.inputs, self.layers) # add the distance layer self.distance = Lambda(costs.euclidean_distance, output_shape=costs.eucl_dist_output_shape)( [self.outputs['A'], self.outputs['B']]) #create the distance model for training self.net = Model([self.inputs['A'], self.inputs['B']], self.distance) # compile the siamese network self.net.compile(loss=costs.get_contrastive_loss(m_neg=1, m_pos=0.05), optimizer='rmsprop') def train(self, pairs_train, dist_train, pairs_val, dist_val, lr, drop, patience, num_epochs, batch_size): # create handler for early stopping and learning rate scheduling self.lh = LearningHandler(lr=lr, drop=drop, lr_tensor=self.net.optimizer.lr, patience=patience) # initialize the training generator train_gen_ = train_gen(pairs_train, dist_train, batch_size) # format the validation data for keras validation_data = ([pairs_val[:, 0], pairs_val[:, 1]], dist_val) # compute the steps per epoch steps_per_epoch = int(len(pairs_train) / batch_size) # train the network hist = self.net.fit_generator(train_gen_, epochs=num_epochs, validation_data=validation_data, steps_per_epoch=steps_per_epoch, callbacks=[self.lh]) return hist def predict(self, x, batch_sizes): # compute the siamese embeddings of the input data return train.predict(self.outputs['A'], x_unlabeled=x, inputs=self.orig_inputs, y_true=self.y_true, batch_sizes=batch_sizes)
headModel = LeakyReLU(alpha=0.2)(headModel) headModel = Dropout(DROP)(headModel) headModel = Dense(2, activation="softmax")(headModel) # place the head FC model on top of the base model (this will become the actual model we will train) model = Model(inputs=baseModel.input, outputs=headModel) model.summary() # loop over all layers in the base model and freeze them so they will *not* be updated during the first training process for layer in baseModel.layers: layer.trainable = False # compile our model print("[INFO] compiling model...") opt = Adam(lr=INIT_LR, decay=DEC) model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"]) # train the head of the network print("[INFO] training head...") H = model.fit_generator(trainAug.flow(trainX, trainY, batch_size=BS), steps_per_epoch=len(trainX) // BS, validation_data=(testX, testY), validation_steps=len(testX) // BS, epochs=EPOCHS) # make predictions on the testing set print("[INFO] evaluating network...") predIdxs = model.predict(testX, batch_size=BS) # for each image in the testing set we need to find the index of the label with corresponding largest predicted probability predIdxs = np.argmax(predIdxs, axis=1)
def train(self, dataset): # Transform data into format to be fed into model # Below code is more suitable for run mode than train mode ''' (X, Y, X_valid, Y_valid) = dataset.load_as_list() X = self.trainable_model.encode_input(X) Y = self.trainable_model.encode_output(Y) X_valid = self.trainable_model.encode_input(X_valid) Y_valid = self.trainable_model.encode_output(Y_valid) ''' # If using multi-gpu, then we save model/log files in other directory than normal one dir_suffix = '' gpu_count = len(self.get_available_gpus()) if self.multi_gpu: gpu_count = len(self.get_available_gpus()) # Changed to save multi-gpu model at the same path as single gpu model #if gpu_count > 1: # dir_suffix = '_' + str(gpu_count) + 'gpus' print('Training on ' + str(gpu_count) + ' GPU(s)') # In case of train mode, we can load data in the wqay that we can utilize caching feature. # We separate call between input and output because they are use different transformation approach. (X, Y, X_valid, Y_valid) = self.trainable_model.load_encoded_data(dataset) print(len(X[0])) print(len(Y)) print(len(X_valid[0])) print(len(Y_valid)) ''' xx = X[0:5] yy = Y[0:5] print('xx') print(xx) print('yy') print(yy) ''' training_data_count = 0 if self.input_transform.get_data_dimension() > 1: training_data_count = X[0].shape[0] else: training_data_count = X.shape[0] print('Training data count = ' + str(training_data_count)) batch_count = int(training_data_count / self.training_config['batch_size']) print('Batch count = ' + str(batch_count)) training_data_count = int(batch_count * self.training_config['batch_size']) print('Training data used = ' + str(training_data_count)) epochs_count = int(self.training_config['epochs']) if 'final_epochs' in self.training_config: # Federated learning will have this vale overidden epochs_count = int(self.training_config['final_epochs']) training_steps = int(batch_count) * epochs_count training_batch_count = batch_count validation_data_count = 0 if self.input_transform.get_data_dimension() > 1: validation_data_count = X_valid[0].shape[0] else: validation_data_count = X_valid.shape[0] print('Validation data count = ' + str(validation_data_count)) batch_count = int(validation_data_count / self.training_config['batch_size']) print('Batch count = ' + str(batch_count)) validation_data_count = int(batch_count * self.training_config['batch_size']) print('Validation data used = ' + str(validation_data_count)) if self.input_transform.get_data_dimension() > 1: X = [a[0:training_data_count] for a in X] X_valid = [a[0:validation_data_count] for a in X_valid] print('>>> X len = ' + str(len(X[0]))) print('>>> X_valid len = ' + str(len(X_valid[0]))) else: X = X[0:training_data_count] X_valid = X_valid[0:validation_data_count] print('>>>> X len = ' + str(X.shape[0])) print('>>>> X_valid len = ' + str(X_valid.shape[0])) if self.output_transform.get_data_dimension() > 1: Y = [a[0:training_data_count] for a in Y] Y_valid = [a[0:validation_data_count] for a in Y_valid] print('>>> Y len = ' + str(len(X[0]))) print('>>> Y_valid len = ' + str(len(X_valid[0]))) else: Y = Y[0:training_data_count] Y_valid = Y_valid[0:validation_data_count] print('>>>> Y len = ' + str(Y.shape[0])) print('>>>> Y_valid len = ' + str(Y_valid.shape[0])) # If multi-model, wrap it as Data Parallel trainable model if gpu_count > 1: with tf.device('/cpu'): [input_tensors, output_tensors] = self.trainable_model.get_forward_tensors() print("=== INPUT_TENSOR ===") print(input_tensors) print("=== OUTPUT_TENSOR ===") print(output_tensors) model = Model(input_tensors, output_tensors) print("=== CPU TEMPLATE MODEL ===") model.summary() single_gpu_model = model # For saving weight model = multi_gpu_model(model, gpus=gpu_count) print("=== MULTI-GPU MODEL ===") model.summary() elif gpu_count == 1: with tf.device('/gpu'): [input_tensors, output_tensors] = self.trainable_model.get_forward_tensors() model = Model(input_tensors, output_tensors) single_gpu_model = model elif gpu_count == 0: with tf.device('/cpu'): [input_tensors, output_tensors] = self.trainable_model.get_forward_tensors() model = Model(input_tensors, output_tensors) single_gpu_model = model current_epoch_wrapper = LogCurrentEpochWrapper(self.training_config, dir_suffix) initial_epoch = 0 if 'resume_if_possible' in self.training_config and self.training_config[ 'resume_if_possible'] == True: initial_epoch = current_epoch_wrapper.get_current_epoch() # Home of output directory (support multi-OS) output_dir = os.path.join( *re.split('/|\\\\', self.training_config['output_dir'])) if not os.path.exists(output_dir): os.makedirs(output_dir) optimizer = self.training_config['optimizer'] if optimizer == 'adam': optimizer_params = self.training_config['optimizer_params'] optimizer = Adam(optimizer_params[0], optimizer_params[1], optimizer_params[2], epsilon=optimizer_params[3]) elif optimizer == 'bert_adam': optimizer_params = self.training_config['optimizer_params'] # Calculate total step and set it to decay_steps (learning rate reachs 0 in the every end) total_steps = batch_count * self.training_config['epochs'] print('[INFO] Training with BERT Optimizer with decay_steps = ' + str(total_steps)) from NLP_LIB.optimizer.bert_optimizer import BERTOptimizer optimizer = BERTOptimizer( decay_steps=total_steps, # 100000, warmup_steps=optimizer_params[2], # 10000, learning_rate=optimizer_params[0], # 1e-4, weight_decay=optimizer_params[1], # 0.01, weight_decay_pattern=[ 'embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo' ], ) elif optimizer == 'bert': optimizer_params = self.training_config['optimizer_params'] from NLP_LIB.ext.bert.optimization import AdamWeightDecayOptimizer print('initial_epoch = ' + str(initial_epoch)) print('training_batch_count = ' + str(training_batch_count)) initial_step = initial_epoch * training_batch_count print('initial_step = ' + str(initial_step)) optimizer = AdamWeightDecayOptimizer( initial_step= initial_step, # Start from current epoch to keep model running with correct LR learning_rate=optimizer_params[0], # 0.0001, num_train_steps=training_steps, # 100, warmup_steps=optimizer_params[4], # 10, lr_decay_power=optimizer_params[5], weight_decay_rate=optimizer_params[6], beta_1=optimizer_params[1], # 0.9, beta_2=optimizer_params[2], # 0.999, epsilon=optimizer_params[3], # 1e-6, exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) # Add model metric names and tensors to tracking list metric_names = self.trainable_model.get_metric_names() metric_funcs = self.trainable_model.get_metric_functions() ''' metric_names = self.trainable_model.get_metric_names() metric_tensors = self.trainable_model.get_metric_tensors() for metric_name, metric_tensor in zip(metric_names, metric_tensors): print('Add Metric: ' + metric_name) model.metrics_names.append(metric_name) model.metrics_tensors.append(metric_tensor) ''' model.compile(optimizer=optimizer, loss=self.trainable_model.get_loss_function(), metrics=metric_funcs) model.summary() if self.input_transform.get_data_dimension() > 1: x_feed = X x_valid_feed = X_valid else: x_feed = [X] x_valid_feed = [X_valid] #exit(0) if self.output_transform.get_data_dimension() > 1: y_feed = Y y_valid_feed = Y_valid else: y_feed = [Y] y_valid_feed = [Y_valid] # If model is sequence model, we have to feed prev_output too. # TODO: Can we embed the flow to generate input list into the data transformation class? if isinstance(self.trainable_model, SequenceModelWrapper): print('OH NOOO!!!') #exit(0) x_feed.append(Y) x_valid_feed.append(Y_valid) # Also, if we are running Sequence Model, output will be logits but label will be sparse value. # Keras loss function need label and output to be in same dimension, thus we need to convert label to dense value too. # The converson to Dense is done in custom loss funciton in the model, but be need to "prepare" addition dimension to sparse label. y_feed = [np.expand_dims(Y, axis=2)] y_valid_feed = [np.expand_dims(Y_valid, axis=2)] class CustomTensorBoard(TensorBoard): def __init__( self, log_dir, **kwargs): # add other arguments to __init__ if you need super().__init__(log_dir=log_dir, **kwargs) def on_epoch_end(self, epoch, logs=None): logs = logs or {} # If there is learning_rate_tensor in the optimizer, we want to log it too. if hasattr(optimizer, 'learning_rate_tensor'): logs.update({ 'learning_rate': K.eval(optimizer.learning_rate_tensor) }) ''' # Also add gradient norm as a default metric # Get a "l2 norm of gradients" tensor def get_gradient_norm(model): with K.name_scope('gradient_norm'): grads = K.gradients(model.total_loss, model.trainable_weights) norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) return norm logs.update({'gradient_norm': K.eval(get_gradient_norm(model))}) ''' super().on_epoch_end(epoch, logs) # Tensorboard log directory tboard_log_dir = os.path.join(output_dir, 'tboard_log' + dir_suffix) if not os.path.exists(tboard_log_dir): os.makedirs(tboard_log_dir) tboard_log_saver = CustomTensorBoard(tboard_log_dir, write_graph=False, write_images=False) # For saving weight history along with accuracy in each epoch (May use a lot of disk) verbose_model_saver = None if self.training_config['save_weight_history']: verbose_log_dir = os.path.join(output_dir, 'weight_history' + dir_suffix) if not os.path.exists(verbose_log_dir): os.makedirs(verbose_log_dir) verbose_weight_history_filepath = os.path.join( verbose_log_dir, 'weights.{epoch:02d}-{' + self.training_config['watch_metric'] + ':.4f}.h5') # If there is option to specified number of eopch to be saved if 'save_weight_every' in self.training_config: save_weight_every = self.training_config['save_weight_every'] print('[INFO] Save weight every = ' + str(save_weight_every)) verbose_model_saver = RefModelCheckpoint( verbose_weight_history_filepath, single_gpu_model, save_best_only=False, save_weights_only=True, period=save_weight_every) else: verbose_model_saver = RefModelCheckpoint( verbose_weight_history_filepath, single_gpu_model, save_best_only=False, save_weights_only=True) model.summary() # Initialize all variables, including local variables created by metrics calculations and optimizers. sess = K.get_session() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init) ##### ## DEBUG Print some training variable before loading checkpoint #global_vars = tf.global_variables() #print('[DEBUG]: First Weight Name = ' + str(global_vars[0].name)) #print('[DEBUG]: First Weight = ' + str(sess.run(global_vars[0]))) # Callback to model after finish variable initialization, init_from_checkpoint is loaded here. self.trainable_model.on_after_init(single_gpu_model) # If resume training, load latest checkpoint # Checkpoint saving directory checkpoint_dir = os.path.join(output_dir, 'checkpoint') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) last_checkpoint_filepath = os.path.join( checkpoint_dir, 'last_weight' + dir_suffix + '.h5') if 'resume_if_possible' in self.training_config and self.training_config[ 'resume_if_possible'] == True: print('Init model ' + str(self) + ' from epoch: ' + str(initial_epoch)) if os.path.exists(last_checkpoint_filepath): print('Init model ' + str(self) + ' from checkpoint: ' + last_checkpoint_filepath) single_gpu_model.load_weights(last_checkpoint_filepath) self.training_config['initial_epoch'] = initial_epoch checkpoint_filepath = os.path.join(checkpoint_dir, 'best_weight' + dir_suffix + '.h5') model_saver = RefModelCheckpoint(checkpoint_filepath, single_gpu_model, save_best_only=True, save_weights_only=True) # Also always save lastest model for continue training last_model_saver = RefModelCheckpoint(last_checkpoint_filepath, single_gpu_model, save_best_only=False, save_weights_only=True) # Construct all training callbacks training_callbacks = [model_saver, last_model_saver, tboard_log_saver] if verbose_model_saver is not None: training_callbacks.append(verbose_model_saver) if self.callback_list is not None: for callback in self.callback_list: training_callbacks.append(callback.get_keras_callback()) # Save current epoch training_callbacks.append(current_epoch_wrapper.get_keras_callback()) ##### ## DEBUG Print some training variable before after checkpoint #global_vars = tf.global_variables() #print('[DEBUG]: First Weight Name = ' + str(global_vars[0].name)) #print('[DEBUG]: First Weight = ' + str(sess.run(global_vars[0]))) print('Start training.') ''' with tf.Session(config = tf.ConfigProto(log_device_placement = False, allow_soft_placement=False)) as sess: init = tf.global_variables_initializer() sess.run(init) model.fit(x=x_feed, y=y_feed, batch_size=self.training_config['batch_size'], epochs=self.training_config['epochs'], validation_data=(x_valid_feed, y_valid_feed), callbacks=training_callbacks, initial_epoch=initial_epoch ) ''' # print(model.trainable_weights) model.fit(x=x_feed, y=y_feed, batch_size=self.training_config['batch_size'], epochs=self.training_config['epochs'], validation_data=(x_valid_feed, y_valid_feed), callbacks=training_callbacks, initial_epoch=initial_epoch) print('Finished training.') # Return trained model (single_gpu_model) and validation set as output. # They are used for further benchmarking like in federated training. return (single_gpu_model, x_valid_feed, y_valid_feed)