def __init__(self, inp_dim, out_dim, lr, tau, min_max=-1): #min=-1,max=+1 # Dimensions and Hyperparams self.env_dim = inp_dim self.act_dim = out_dim self.tau, self.lr = tau, lr self.model = self.network() self.model.compile(Adam(self.lr), 'mse') self.AdamOpt = AdamOpt.AdamOpt(sign=min_max, step=self.tau)
def create_dqn(): # Creation of a 2 layer Neural Network nn = Sequential() nn.add(Dense(36, input_dim=OBSERVATION_SPACE_DIMS, activation='tanh')) nn.add(Dense(28, activation='relu')) nn.add(Dense(len(ACTION_SPACE), activation='linear')) nn.compile(loss='mse', optimizer=Adam(lr=ALPHA, decay=ALPHA_DECAY)) return nn
def _get_optimizer(optimizer_params, init_lr): if optimizer_params.name.lower() == 'sgd': optimizer = SGD(lr=init_lr, momentum=optimizer_params.momentum, nesterov=optimizer_params.nesterov) elif optimizer_params.name.lower() == 'adam': optimizer = Adam(init_lr) else: raise (NotImplementedError('Valid optimizers are: SGD and Adam')) return optimizer
def create_model(nb_class, anchors, max_box_per_image, max_grid, batch_size, warmup_batches, ignore_thresh, multi_gpu, saved_weights_name, lr, grid_scales, obj_scale, noobj_scale, xywh_scale, class_scale): if multi_gpu > 1: with tf.device('/cpu:0'): template_model, infer_model = create_yolov3_model( nb_class=nb_class, anchors=anchors, max_box_per_image=max_box_per_image, max_grid=max_grid, batch_size=batch_size // multi_gpu, warmup_batches=warmup_batches, ignore_thresh=ignore_thresh, grid_scales=grid_scales, obj_scale=obj_scale, noobj_scale=noobj_scale, xywh_scale=xywh_scale, class_scale=class_scale) else: template_model, infer_model = create_yolov3_model( nb_class=nb_class, anchors=anchors, max_box_per_image=max_box_per_image, max_grid=max_grid, batch_size=batch_size, warmup_batches=warmup_batches, ignore_thresh=ignore_thresh, grid_scales=grid_scales, obj_scale=obj_scale, noobj_scale=noobj_scale, xywh_scale=xywh_scale, class_scale=class_scale) # load the pretrained weight if exists, otherwise load the backend weight only if os.path.exists(saved_weights_name): print("\nLoading pretrained weights.\n") template_model.load_weights(saved_weights_name) else: template_model.load_weights( "workspace/models/yolo/keras-pretrained-backbone/backend.h5", by_name=True) if multi_gpu > 1: train_model = multi_gpu_model(template_model, gpus=multi_gpu) else: train_model = template_model optimizer = Adam(lr=lr, clipnorm=0.001) train_model.compile(loss=dummy_loss, optimizer=optimizer) return train_model, infer_model
def define_gan(g_model, d_model): # make weights in the discriminator not trainable d_model.trainable = False # connect them model = Sequential() # add generator model.add(g_model) # add the discriminator model.add(d_model) # compile model opt = Adam(lr=0.0002, beta_1=0.5) model.compile(loss='binary_crossentropy', optimizer=opt) return model
def build_model(self): model = Sequential() # Input layer and hidden layer 1. kernel_initializer gives random values to weights according to specified dist. model.add( Dense(128, input_dim=self.state_size, activation='relu', kernel_initializer='he_uniform')) # Hidden layer 2 model.add(Dense(64, activation='relu')) # Output layer model.add(Dense(self.action_size, activation='relu')) # Compile the model # model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate, decay=0.00001)) model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate, decay=0.0)) return model
def create_critic_network(self): # parallel 1 state_input = Input(shape = [self.obs_dim]) w1 = Dense(self.hidden_dim, activation = 'relu')(state_input) h1 = Dense(self.hidden_dim, activation = 'linear')(w1) # parallel 2 action_input = Input(shape = [self.act_dim], name = 'action2') a1 = Dense(self.hidden_dim, activation = 'linear')(action_input) # merge #h2 = concatenate([h1, a1], mode = 'sum') h2 = concatenate([h1, a1]) h3 = Dense(self.hidden_dim, activation = 'relu')(h2) value_out = Dense(self.act_dim, activation = 'linear')(h3) model = Model(inputs = [state_input, action_input], outputs = [value_out]) adam = Adam(self.lr) model.compile(loss = 'mse', optimizer = adam) return model, action_input, state_input
def define_discriminator(in_shape=(32, 32, 3)): model = Sequential() # normal model.add(Conv2D(64, (3, 3), padding='same', input_shape=in_shape)) model.add(LeakyReLU(alpha=0.2)) # downsample model.add(Conv2D(128, (3, 3), strides=(2, 2), padding='same')) model.add(LeakyReLU(alpha=0.2)) # downsample model.add(Conv2D(128, (3, 3), strides=(2, 2), padding='same')) model.add(LeakyReLU(alpha=0.2)) # downsample model.add(Conv2D(256, (3, 3), strides=(2, 2), padding='same')) model.add(LeakyReLU(alpha=0.2)) # classifier model.add(Flatten()) model.add(Dropout(0.4)) model.add(Dense(1, activation='sigmoid')) # compile model opt = Adam(lr=0.0002, beta_1=0.5) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) return model
def keras_build_fn(num_feature, num_output, is_sparse, embedding_dim=-1, num_hidden_layer=2, hidden_layer_dim=512, activation='elu', learning_rate=1e-3, dropout=0.5, l1=0.0, l2=0.0, loss='categorical_crossentropy'): """Initializes and compiles a Keras DNN model using the Adam optimizer. Args: num_feature: number of features num_output: number of outputs (targets, e.g., classes)) is_sparse: boolean whether input data is in sparse format embedding_dim: int number of nodes in embedding layer; if value is <= 0 then no embedding layer will be present in the model num_hidden_layer: number of hidden layers hidden_layer_dim: int number of nodes in the hidden layer(s) activation: string activation function for hidden layers; see https://keras.io/activations/ learning_rate: float learning rate for Adam dropout: float proportion of nodes to dropout; values in [0, 1] l1: float strength of L1 regularization on weights l2: float strength of L2 regularization on weights loss: string loss function; see https://keras.io/losses/ Returns: model: Keras.models.Model compiled Keras model """ assert num_hidden_layer >= 1 inputs = Input(shape=(num_feature, ), sparse=is_sparse) activation_func_args = () if activation.lower() == 'prelu': activation_func = PReLU elif activation.lower() == 'leakyrelu': activation_func = LeakyReLU elif activation.lower() == 'elu': activation_func = ELU elif activation.lower() == 'thresholdedrelu': activation_func = ThresholdedReLU else: activation_func = Activation activation_func_args = (activation) if l1 > 0 and l2 > 0: reg_init = lambda: regularizers.l1_l2(l1, l2) elif l1 > 0: reg_init = lambda: regularizers.l1(l1) elif l2 > 0: reg_init = lambda: regularizers.l2(l2) else: reg_init = lambda: None if embedding_dim > 0: # embedding layer e = Dense(embedding_dim)(inputs) x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(e) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) else: x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(inputs) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) # add additional hidden layers for _ in range(num_hidden_layer - 1): x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(x) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) x = Dense(num_output)(x) preds = Activation('softmax')(x) model = Model(inputs=inputs, outputs=preds) model.compile(optimizer=Adam(lr=learning_rate), loss=loss) return model
def main(): dir_path = sys.argv[1] phi = 0 cont_training = False weighted_bifpn = True freeze_backbone = False tf.compat.v1.keras.backend.set_session(get_session()) # create the generators # train_generator = trainGenerator(dir_path) images, heatmaps = get_trainData(dir_path, multi_dim=True) print("Number of images: %s and heatmaps: %s\n" % (len(images), len(heatmaps))) model = efficientdet(phi, weighted_bifpn=weighted_bifpn, freeze_bn=freeze_backbone) # model_name = 'efficientnet-b{}'.format(phi) # file_name = '{}_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'.format(model_name) # file_hash = WEIGHTS_HASHES[model_name][1] # weights_path = keras.utils.get_file(file_name, # BASE_WEIGHTS_PATH + file_name, # cache_subdir='models', # file_hash=file_hash) # model.load_weights(weights_path, by_name=True) # freeze backbone layers if freeze_backbone: # 227, 329, 329, 374, 464, 566, 656 for i in range(1, [227, 329, 329, 374, 464, 566, 656][phi]): model.layers[i].trainable = False # compile model print("Compiling model ... \n") # # SOFTMAX ACTIVATION # model.compile(optimizer=Adam(lr=1e-3), # loss=[categorical_focal_loss(gamma = 2, alpha = 0.25)]) # SIGMOID ACTIVATION focalloss = SigmoidFocalCrossEntropy( reduction=Reduction.SUM_OVER_BATCH_SIZE) model.compile(optimizer=Adam(lr=1e-3), loss=focalloss) # # LINEAR ACTIVATION # model.compile(optimizer=Adam(lr=1e-3), # loss='mean_absolute_error') # print(model.summary()) # start training # return model.fit_generator( # generator=train_generator, # steps_per_epoch=10, # initial_epoch=0, # epochs=10, # verbose=1 # validation_data=validation_generator # ) ## 'efficientdet' for the first stacked heatmaps if cont_training: model.load_weights('efficientdet2') model.fit(images, heatmaps, batch_size=16, epochs=60, verbose=1) else: model.fit(images, heatmaps, batch_size=16, epochs=10, verbose=1) model.save_weights('efficientdet2') preds = model.predict(images[0:3]) # save_preds(dir_path, preds) # fig = plt.figure() plt.subplot(1, 2, 1) plt.imshow(np.sum(preds[0], axis=-1)) plt.subplot(1, 2, 2) plt.imshow(np.sum(heatmaps[0], axis=-1)) plt.show() plt.savefig("testres.png")
def get_model(input_shape=(640, 480, 3)): model = Sequential() model.add(Input((None, None, 3))) model.add(Reshape(input_shape)) model.add( Conv2D(input_shape=(640, 480, 3), use_bias=False, filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu')) model.add( SeparableConv2D(use_bias=False, filters=32, kernel_size=(3, 3), strides=(2, 2), padding='same', activation='relu', depth_multiplier=3)) model.add(BatchNormalization()) model.add( SeparableConv2D(use_bias=False, filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu', depth_multiplier=3)) model.add( SeparableConv2D(use_bias=False, filters=64, kernel_size=(3, 3), strides=(2, 2), padding='same', activation='relu', depth_multiplier=3)) model.add(BatchNormalization()) model.add( SeparableConv2D(use_bias=False, filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu', depth_multiplier=3)) model.add( SeparableConv2D(use_bias=False, filters=128, kernel_size=(3, 3), strides=(2, 2), padding='same', activation='relu', depth_multiplier=3)) model.add(BatchNormalization()) model.add( SeparableConv2D(use_bias=False, filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu', depth_multiplier=3)) model.add( SeparableConv2D(use_bias=False, filters=256, kernel_size=(3, 3), strides=(2, 2), padding='same', activation='relu', depth_multiplier=3)) model.add(BatchNormalization()) model.add( SeparableConv2D(use_bias=False, filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu', depth_multiplier=3)) model.add( SeparableConv2D(use_bias=False, filters=512, kernel_size=(3, 3), strides=(2, 2), padding='same', activation='relu', depth_multiplier=3)) model.add(BatchNormalization()) model.add( SeparableConv2D(use_bias=False, filters=512, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu', depth_multiplier=3)) model.add( SeparableConv2D(use_bias=False, filters=1024, kernel_size=(3, 3), strides=(2, 2), padding='same', activation='relu', depth_multiplier=3)) model.add(BatchNormalization()) model.add(GlobalAveragePooling2D()) model.add(Dropout(0.25)) model.add(Dense(256, activation='relu')) model.add(Dropout(0.25)) model.add(Dense(3, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) return model
headModel = BatchNormalization(momentum=MOM)(headModel) headModel = LeakyReLU(alpha=0.2)(headModel) headModel = Dropout(DROP)(headModel) headModel = Dense(2, activation="softmax")(headModel) # place the head FC model on top of the base model (this will become the actual model we will train) model = Model(inputs=baseModel.input, outputs=headModel) model.summary() # loop over all layers in the base model and freeze them so they will *not* be updated during the first training process for layer in baseModel.layers: layer.trainable = False # compile our model print("[INFO] compiling model...") opt = Adam(lr=INIT_LR, decay=DEC) model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"]) # train the head of the network print("[INFO] training head...") H = model.fit_generator(trainAug.flow(trainX, trainY, batch_size=BS), steps_per_epoch=len(trainX) // BS, validation_data=(testX, testY), validation_steps=len(testX) // BS, epochs=EPOCHS) # make predictions on the testing set print("[INFO] evaluating network...") predIdxs = model.predict(testX, batch_size=BS) # for each image in the testing set we need to find the index of the label with corresponding largest predicted probability
# Concatenate all words. text = " ".join(words) return text #Create the RNN model = Sequential() embedding_size = 8 model.add( Embedding(input_dim=num_words, output_dim=embedding_size, input_length=max_tokens, name='layer_embedding')) model.add(GRU(units=16, return_sequences=True)) model.add(GRU(units=8, return_sequences=True)) model.add(GRU(units=4)) model.add(Dense(1, activation='sigmoid')) optimizer = Adam(lr=1e-3) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) x = np.array(x_train_pad) y = np.array(y_train) model.fit(x, y, validation_split=0.06, epochs=3, batch_size=64) result = model.evaluate(x_test_pad, y_test) print("Accuracy: {0:.2%}".format(result[1]))
def train(self, dataset): # Transform data into format to be fed into model # Below code is more suitable for run mode than train mode ''' (X, Y, X_valid, Y_valid) = dataset.load_as_list() X = self.trainable_model.encode_input(X) Y = self.trainable_model.encode_output(Y) X_valid = self.trainable_model.encode_input(X_valid) Y_valid = self.trainable_model.encode_output(Y_valid) ''' # If using multi-gpu, then we save model/log files in other directory than normal one dir_suffix = '' gpu_count = len(self.get_available_gpus()) if self.multi_gpu: gpu_count = len(self.get_available_gpus()) # Changed to save multi-gpu model at the same path as single gpu model #if gpu_count > 1: # dir_suffix = '_' + str(gpu_count) + 'gpus' print('Training on ' + str(gpu_count) + ' GPU(s)') # In case of train mode, we can load data in the wqay that we can utilize caching feature. # We separate call between input and output because they are use different transformation approach. (X, Y, X_valid, Y_valid) = self.trainable_model.load_encoded_data(dataset) print(len(X[0])) print(len(Y)) print(len(X_valid[0])) print(len(Y_valid)) ''' xx = X[0:5] yy = Y[0:5] print('xx') print(xx) print('yy') print(yy) ''' training_data_count = 0 if self.input_transform.get_data_dimension() > 1: training_data_count = X[0].shape[0] else: training_data_count = X.shape[0] print('Training data count = ' + str(training_data_count)) batch_count = int(training_data_count / self.training_config['batch_size']) print('Batch count = ' + str(batch_count)) training_data_count = int(batch_count * self.training_config['batch_size']) print('Training data used = ' + str(training_data_count)) epochs_count = int(self.training_config['epochs']) if 'final_epochs' in self.training_config: # Federated learning will have this vale overidden epochs_count = int(self.training_config['final_epochs']) training_steps = int(batch_count) * epochs_count training_batch_count = batch_count validation_data_count = 0 if self.input_transform.get_data_dimension() > 1: validation_data_count = X_valid[0].shape[0] else: validation_data_count = X_valid.shape[0] print('Validation data count = ' + str(validation_data_count)) batch_count = int(validation_data_count / self.training_config['batch_size']) print('Batch count = ' + str(batch_count)) validation_data_count = int(batch_count * self.training_config['batch_size']) print('Validation data used = ' + str(validation_data_count)) if self.input_transform.get_data_dimension() > 1: X = [a[0:training_data_count] for a in X] X_valid = [a[0:validation_data_count] for a in X_valid] print('>>> X len = ' + str(len(X[0]))) print('>>> X_valid len = ' + str(len(X_valid[0]))) else: X = X[0:training_data_count] X_valid = X_valid[0:validation_data_count] print('>>>> X len = ' + str(X.shape[0])) print('>>>> X_valid len = ' + str(X_valid.shape[0])) if self.output_transform.get_data_dimension() > 1: Y = [a[0:training_data_count] for a in Y] Y_valid = [a[0:validation_data_count] for a in Y_valid] print('>>> Y len = ' + str(len(X[0]))) print('>>> Y_valid len = ' + str(len(X_valid[0]))) else: Y = Y[0:training_data_count] Y_valid = Y_valid[0:validation_data_count] print('>>>> Y len = ' + str(Y.shape[0])) print('>>>> Y_valid len = ' + str(Y_valid.shape[0])) # If multi-model, wrap it as Data Parallel trainable model if gpu_count > 1: with tf.device('/cpu'): [input_tensors, output_tensors] = self.trainable_model.get_forward_tensors() print("=== INPUT_TENSOR ===") print(input_tensors) print("=== OUTPUT_TENSOR ===") print(output_tensors) model = Model(input_tensors, output_tensors) print("=== CPU TEMPLATE MODEL ===") model.summary() single_gpu_model = model # For saving weight model = multi_gpu_model(model, gpus=gpu_count) print("=== MULTI-GPU MODEL ===") model.summary() elif gpu_count == 1: with tf.device('/gpu'): [input_tensors, output_tensors] = self.trainable_model.get_forward_tensors() model = Model(input_tensors, output_tensors) single_gpu_model = model elif gpu_count == 0: with tf.device('/cpu'): [input_tensors, output_tensors] = self.trainable_model.get_forward_tensors() model = Model(input_tensors, output_tensors) single_gpu_model = model current_epoch_wrapper = LogCurrentEpochWrapper(self.training_config, dir_suffix) initial_epoch = 0 if 'resume_if_possible' in self.training_config and self.training_config[ 'resume_if_possible'] == True: initial_epoch = current_epoch_wrapper.get_current_epoch() # Home of output directory (support multi-OS) output_dir = os.path.join( *re.split('/|\\\\', self.training_config['output_dir'])) if not os.path.exists(output_dir): os.makedirs(output_dir) optimizer = self.training_config['optimizer'] if optimizer == 'adam': optimizer_params = self.training_config['optimizer_params'] optimizer = Adam(optimizer_params[0], optimizer_params[1], optimizer_params[2], epsilon=optimizer_params[3]) elif optimizer == 'bert_adam': optimizer_params = self.training_config['optimizer_params'] # Calculate total step and set it to decay_steps (learning rate reachs 0 in the every end) total_steps = batch_count * self.training_config['epochs'] print('[INFO] Training with BERT Optimizer with decay_steps = ' + str(total_steps)) from NLP_LIB.optimizer.bert_optimizer import BERTOptimizer optimizer = BERTOptimizer( decay_steps=total_steps, # 100000, warmup_steps=optimizer_params[2], # 10000, learning_rate=optimizer_params[0], # 1e-4, weight_decay=optimizer_params[1], # 0.01, weight_decay_pattern=[ 'embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo' ], ) elif optimizer == 'bert': optimizer_params = self.training_config['optimizer_params'] from NLP_LIB.ext.bert.optimization import AdamWeightDecayOptimizer print('initial_epoch = ' + str(initial_epoch)) print('training_batch_count = ' + str(training_batch_count)) initial_step = initial_epoch * training_batch_count print('initial_step = ' + str(initial_step)) optimizer = AdamWeightDecayOptimizer( initial_step= initial_step, # Start from current epoch to keep model running with correct LR learning_rate=optimizer_params[0], # 0.0001, num_train_steps=training_steps, # 100, warmup_steps=optimizer_params[4], # 10, lr_decay_power=optimizer_params[5], weight_decay_rate=optimizer_params[6], beta_1=optimizer_params[1], # 0.9, beta_2=optimizer_params[2], # 0.999, epsilon=optimizer_params[3], # 1e-6, exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) # Add model metric names and tensors to tracking list metric_names = self.trainable_model.get_metric_names() metric_funcs = self.trainable_model.get_metric_functions() ''' metric_names = self.trainable_model.get_metric_names() metric_tensors = self.trainable_model.get_metric_tensors() for metric_name, metric_tensor in zip(metric_names, metric_tensors): print('Add Metric: ' + metric_name) model.metrics_names.append(metric_name) model.metrics_tensors.append(metric_tensor) ''' model.compile(optimizer=optimizer, loss=self.trainable_model.get_loss_function(), metrics=metric_funcs) model.summary() if self.input_transform.get_data_dimension() > 1: x_feed = X x_valid_feed = X_valid else: x_feed = [X] x_valid_feed = [X_valid] #exit(0) if self.output_transform.get_data_dimension() > 1: y_feed = Y y_valid_feed = Y_valid else: y_feed = [Y] y_valid_feed = [Y_valid] # If model is sequence model, we have to feed prev_output too. # TODO: Can we embed the flow to generate input list into the data transformation class? if isinstance(self.trainable_model, SequenceModelWrapper): print('OH NOOO!!!') #exit(0) x_feed.append(Y) x_valid_feed.append(Y_valid) # Also, if we are running Sequence Model, output will be logits but label will be sparse value. # Keras loss function need label and output to be in same dimension, thus we need to convert label to dense value too. # The converson to Dense is done in custom loss funciton in the model, but be need to "prepare" addition dimension to sparse label. y_feed = [np.expand_dims(Y, axis=2)] y_valid_feed = [np.expand_dims(Y_valid, axis=2)] class CustomTensorBoard(TensorBoard): def __init__( self, log_dir, **kwargs): # add other arguments to __init__ if you need super().__init__(log_dir=log_dir, **kwargs) def on_epoch_end(self, epoch, logs=None): logs = logs or {} # If there is learning_rate_tensor in the optimizer, we want to log it too. if hasattr(optimizer, 'learning_rate_tensor'): logs.update({ 'learning_rate': K.eval(optimizer.learning_rate_tensor) }) ''' # Also add gradient norm as a default metric # Get a "l2 norm of gradients" tensor def get_gradient_norm(model): with K.name_scope('gradient_norm'): grads = K.gradients(model.total_loss, model.trainable_weights) norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) return norm logs.update({'gradient_norm': K.eval(get_gradient_norm(model))}) ''' super().on_epoch_end(epoch, logs) # Tensorboard log directory tboard_log_dir = os.path.join(output_dir, 'tboard_log' + dir_suffix) if not os.path.exists(tboard_log_dir): os.makedirs(tboard_log_dir) tboard_log_saver = CustomTensorBoard(tboard_log_dir, write_graph=False, write_images=False) # For saving weight history along with accuracy in each epoch (May use a lot of disk) verbose_model_saver = None if self.training_config['save_weight_history']: verbose_log_dir = os.path.join(output_dir, 'weight_history' + dir_suffix) if not os.path.exists(verbose_log_dir): os.makedirs(verbose_log_dir) verbose_weight_history_filepath = os.path.join( verbose_log_dir, 'weights.{epoch:02d}-{' + self.training_config['watch_metric'] + ':.4f}.h5') # If there is option to specified number of eopch to be saved if 'save_weight_every' in self.training_config: save_weight_every = self.training_config['save_weight_every'] print('[INFO] Save weight every = ' + str(save_weight_every)) verbose_model_saver = RefModelCheckpoint( verbose_weight_history_filepath, single_gpu_model, save_best_only=False, save_weights_only=True, period=save_weight_every) else: verbose_model_saver = RefModelCheckpoint( verbose_weight_history_filepath, single_gpu_model, save_best_only=False, save_weights_only=True) model.summary() # Initialize all variables, including local variables created by metrics calculations and optimizers. sess = K.get_session() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init) ##### ## DEBUG Print some training variable before loading checkpoint #global_vars = tf.global_variables() #print('[DEBUG]: First Weight Name = ' + str(global_vars[0].name)) #print('[DEBUG]: First Weight = ' + str(sess.run(global_vars[0]))) # Callback to model after finish variable initialization, init_from_checkpoint is loaded here. self.trainable_model.on_after_init(single_gpu_model) # If resume training, load latest checkpoint # Checkpoint saving directory checkpoint_dir = os.path.join(output_dir, 'checkpoint') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) last_checkpoint_filepath = os.path.join( checkpoint_dir, 'last_weight' + dir_suffix + '.h5') if 'resume_if_possible' in self.training_config and self.training_config[ 'resume_if_possible'] == True: print('Init model ' + str(self) + ' from epoch: ' + str(initial_epoch)) if os.path.exists(last_checkpoint_filepath): print('Init model ' + str(self) + ' from checkpoint: ' + last_checkpoint_filepath) single_gpu_model.load_weights(last_checkpoint_filepath) self.training_config['initial_epoch'] = initial_epoch checkpoint_filepath = os.path.join(checkpoint_dir, 'best_weight' + dir_suffix + '.h5') model_saver = RefModelCheckpoint(checkpoint_filepath, single_gpu_model, save_best_only=True, save_weights_only=True) # Also always save lastest model for continue training last_model_saver = RefModelCheckpoint(last_checkpoint_filepath, single_gpu_model, save_best_only=False, save_weights_only=True) # Construct all training callbacks training_callbacks = [model_saver, last_model_saver, tboard_log_saver] if verbose_model_saver is not None: training_callbacks.append(verbose_model_saver) if self.callback_list is not None: for callback in self.callback_list: training_callbacks.append(callback.get_keras_callback()) # Save current epoch training_callbacks.append(current_epoch_wrapper.get_keras_callback()) ##### ## DEBUG Print some training variable before after checkpoint #global_vars = tf.global_variables() #print('[DEBUG]: First Weight Name = ' + str(global_vars[0].name)) #print('[DEBUG]: First Weight = ' + str(sess.run(global_vars[0]))) print('Start training.') ''' with tf.Session(config = tf.ConfigProto(log_device_placement = False, allow_soft_placement=False)) as sess: init = tf.global_variables_initializer() sess.run(init) model.fit(x=x_feed, y=y_feed, batch_size=self.training_config['batch_size'], epochs=self.training_config['epochs'], validation_data=(x_valid_feed, y_valid_feed), callbacks=training_callbacks, initial_epoch=initial_epoch ) ''' # print(model.trainable_weights) model.fit(x=x_feed, y=y_feed, batch_size=self.training_config['batch_size'], epochs=self.training_config['epochs'], validation_data=(x_valid_feed, y_valid_feed), callbacks=training_callbacks, initial_epoch=initial_epoch) print('Finished training.') # Return trained model (single_gpu_model) and validation set as output. # They are used for further benchmarking like in federated training. return (single_gpu_model, x_valid_feed, y_valid_feed)