def do_train(self): print("***************************start training***************************") save_callback = SaveCallback(save_path=conf.SAVE_DIR, backbone=conf.backbone, model=self.model, timestamp=self.timestamp, save_name=self.save_name) # , validation_data=[x_test, y_test]) early_stop_callback = callbacks.EarlyStopping(monitor='val_loss', patience=conf.early_stop_patience, verbose=1, mode='auto', restore_best_weights=True) reduce_lr_callback = callbacks.ReduceLROnPlateau(monitor='val_acc', factor=conf.reduce_lr_factor, patience=conf.reduce_lr_patience, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0.00001) tensorboard_callback = TensorBoard(log_dir=conf.OUT_DIR) callbacks_list = [] callbacks_list.append(save_callback) callbacks_list.append(early_stop_callback) #callbacks_list.append(reduce_lr_callback) callbacks_list.append(tensorboard_callback) if conf.FIT_GENERATE == True: self.model.fit(self.fit_gen.generate(), epochs=conf.epochs, steps_per_epoch=self.corpus_size / conf.batch_size, callbacks=callbacks_list, validation_data=([self.x_test, self.y_test], self.y_test), verbose=1) else: self.model.fit(x=[self.x_train, self.y_train], y=self.y_train, batch_size=conf.batch_size, epochs=conf.epochs, callbacks=callbacks_list, validation_data=([self.x_test, self.y_test], self.y_test), # validation_split=0.02, verbose=1) print("***************************train done***************************")
def fit_model(model, x_train, y_train, x_valid, y_valid, ckpt_path): monitor = "val_loss" K.clear_session() history = model.fit(x=x_train, y=y_train, batch_size=16, epochs=50, verbose=1, callbacks=[ callbacks.ModelCheckpoint(filepath=ckpt_path, monitor=monitor, verbose=2, save_best_only=True, save_weights_only=True), callbacks.EarlyStopping( monitor=monitor, min_delta=1e-4, patience=25, verbose=2, ), callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.8, patience=3, verbose=2, min_lr=1e-4) ], validation_data=(x_valid, y_valid)) return history
def trainForRotorTemp(train_frame, num_epochs): train_X = train_frame.drop(columns = ['profile_id', 'pm','stator_yoke', 'stator_tooth', 'stator_winding']) train_Y = train_frame[['pm','stator_yoke', 'stator_tooth', 'stator_winding']] test_X = train_X n_cols = train_X.shape[1] model = keras.Sequential() model.add(keras.layers.Dense((len(train_X.columns)+1), activation='relu',input_shape=(n_cols,))) model.add(keras.layers.Dropout(0.2)) model.add(keras.layers.Dense(64, activation='relu')) model.add(keras.layers.Dropout(0.2)) model.add(keras.layers.Dense(64, activation ='relu')) model.add(keras.layers.Dense(len(train_Y.columns))) reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001) model.compile(optimizer='adam', loss='mean_squared_error', metrics = ['accuracy']) model.fit(train_X, train_Y, validation_split = 0.2, epochs = num_epochs, shuffle = True, callbacks =[reduce_lr]) X_predictions = model.predict(test_X) XP = pd.DataFrame(X_predictions, columns=['pm','stator_yoke', 'stator_tooth', 'stator_winding']) print (XP.head())
def train(self, epoch, batch_size): print("Training the model...") # tensorboard = callbacks.TensorBoard(log_dir=self.LOG_DIR) reduceLR = callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=3, verbose=1, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000001) early = callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=5, mode='auto', restore_best_weights=True) self.model.fit( [self.train_x[:, 0], self.train_x[:, 1], self.train_x[:, 2]], self.train_y, epochs=epoch, batch_size=batch_size, validation_split=.2, shuffle=True, callbacks=[reduceLR, early]) self.model.save(self.SAVE_DIR + 'final.hdf5')
def model_train(self, params): clear_session() model = self.build_model(params) adam = optimizers.Adam(lr=params['LR']) model.compile(optimizer=adam, loss='mae', metrics=['accuracy']) reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=params['LR_patience'], verbose=0, mode='min') earlystop = callbacks.EarlyStopping(monitor='val_loss', patience=params['ES_patience'], verbose=0, restore_best_weights=False, mode='min') model.fit(self.x_train, self.y_train, batch_size=256, epochs=250, validation_data=(self.x_valid, self.y_valid), verbose=0, callbacks=[reduce_lr, earlystop]) return model
def get_callbacks(args, partition_idx): import tensorflow.keras.callbacks as bk # from CustomEarlyStopping import CustomEarlyStopping model_type = args.model_type timestamp = args.timestamp early_stop = args.early_stop t_name = args.weights_dir + '/tensorboard_logs/{}_{}_{}'.format( model_type, timestamp, partition_idx) t_name = t_name.replace('/', '\\') # Correction for Windows paths callbacks = list() callbacks.append(None) # Position for Checkpoint # CustomEarlyStopping(patience_loss=args.patience, patience_acc=10, threshold=.95) callbacks.append(bk.CSVLogger(args.weights_dir + '/log.csv')) # CustomEarlyStopping(patience_loss=10, threshold=0.95) callbacks.append(bk.TensorBoard(log_dir=t_name, histogram_freq=args.debug)) if early_stop > 0: # TODO - Test multiple EarlyStopping callbacks.append( bk.EarlyStopping(monitor='val_loss', patience=early_stop, verbose=0)) # callbacks.append(bk.EarlyStopping(monitor='val_accuracy', patience=early_stop, verbose=0)) callbacks.append( bk.ReduceLROnPlateau(monitor='val_loss', factor=.9, patience=10, min_lr=0.00001, cooldown=0, verbose=0)) # calls.append(C.LearningRateScheduler(schedule=lambda epoch: args.lr * (args.lr_decay ** epoch))) # calls.append( C.LearningRateScheduler(schedule=lambda epoch: args.lr * math.cos(1+( (epoch-1 % (args.epochs/cycles)))/(args.epochs/cycles) ) )) # calls.append( C.LearningRateScheduler(schedule=lambda epoch: 0.001 * np.exp(-epoch / 10.)) ) return callbacks
def train(): # 定义优化器 opt = optimizers.Adam(lr) # 定义回调函数,包含防止过拟合、日志统计、自动保存模型等 callback = [ callbacks.TensorBoard(log_dir=log_dir, update_freq='batch'), callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=3), callbacks.EarlyStopping(monitor='loss', patience=4), callbacks.ModelCheckpoint(filepath=checkpoint_filepath, verbose=1), callbacks.ModelCheckpoint(filepath=save_path, monitor='val_categorical_accuracy', save_best_only=True, mode='max', verbose=1) ] # 分布式训练 with strategy.scope(): model = make_model() if os.path.exists(save_path): model.load_weights(save_path) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy']) model.fit(train_db, epochs=epochs, validation_data=test_db, callbacks=callback) # 开始训练 model.evaluate(test_db) # 在测试集中评估模型 model.save(save_path) # 保存最终模型
def train_by_fit(optimizer, loss, train_data, train_steps, validation_data, validation_steps): """ 使用fit方式训练,可以知道训练完的时间,以及更规范的添加callbacks参数 :param optimizer: 优化器 :param loss: 自定义的loss function :param train_data: 以tf.data封装好的训练集数据 :param validation_data: 验证集数据 :param train_steps: 迭代一个epoch的轮次 :param validation_steps: 同上 :return: None """ cbk = [ callbacks.ReduceLROnPlateau(verbose=1), callbacks.EarlyStopping(patience=10, verbose=1), callbacks.ModelCheckpoint('./model/yolov3_{val_loss:.04f}.h5', save_best_only=True, save_weights_only=True) ] model = yolo_body() model.compile(optimizer=optimizer, loss=loss) # initial_epoch用于恢复之前的训练 model.fit(train_data, steps_per_epoch=max(1, train_steps), validation_data=validation_data, validation_steps=max(1, validation_steps), epochs=cfg.epochs, callbacks=cbk)
def get_callbacks(model_name): filepath = '' with open('Pathfile.txt', 'r') as myfile: filepath = myfile.read() filepath = filepath.split("\n")[0] tb_log_dir = os.path.join(filepath, 'Logs', model_name) lg_log_dir = os.path.join(filepath, 'History', model_name + '.csv') lg = callbacks.CSVLogger(lg_log_dir, separator=',', append=False) es = callbacks.EarlyStopping(monitor='loss', min_delta=0.0001, patience=40, verbose=1, mode='auto', restore_best_weights=True) # lr = callbacks.LearningRateScheduler(scheduler, verbose=1) #callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', save_freq='epoch') rop = callbacks.ReduceLROnPlateau(monitor='loss', factor=0.3, patience=5, verbose=1, mode='auto', min_delta=0.001, cooldown=0, min_lr=0.00000001) tb = callbacks.TensorBoard( log_dir=tb_log_dir, histogram_freq=0, write_graph=False, write_images=False, update_freq='epoch', profile_batch=0) # embeddings_freq=0,embeddings_metadata=None) return [es, rop, tb, lg]
def main(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" gpus = tf.config.list_physical_devices("GPU") if gpus: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) train_dir = './dataset/train' val_dir = './dataset/validation' epochs = 200 batch_size = 256 lr = 2e-3 class_name = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'] num_classes = len(class_name) is_train = False train_image, train_label = read_data(train_dir, class_name) val_image, val_label = read_data(val_dir, class_name) train_step = len(train_image) // batch_size val_step = len(val_image) // batch_size train_dataset = make_datasets(train_image, train_label, batch_size, mode='train') val_dataset = make_datasets(val_image, val_label, batch_size, mode='train') model = SE_ResNet18(224, 224, num_classes) optimizer = optimizers.Adam(lr) lr_metric = get_lr_metric(optimizer) model.compile(optimizer=optimizer, loss=losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy', lr_metric]) cbk = [ callbacks.ModelCheckpoint("./model_weights/SEResNet.h5", save_weights_only=True, save_best_only=True), callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=2) ] if is_train: model.fit(train_dataset, steps_per_epoch=train_step, epochs=epochs, validation_data=val_dataset, validation_steps=val_step, callbacks=cbk, verbose=1) else: model.load_weights("./model_weights/SEResNet.h5") img_path = './dataset/dandelion.jpg' image, _ = val_parse(img_path, 0) pred = model.predict(tf.expand_dims(image, axis=0))[0] index = tf.argmax(pred).numpy() print("预测类别:{}, 预测可能性{:.03f}".format(class_name[index], pred[index] * 100))
def init_callbacks(self): self.callbacks.append( callbacks.ReduceLROnPlateau( **self.config.trainer.reduce_lr_on_plateau)) if self.config.trainer.tensorboard_enabled: self.callbacks.append( callbacks.LambdaCallback( on_epoch_begin=lambda epoch, loss: self.log_lr(epoch)))
def cb(model_weights): early_stopping=tkc.EarlyStopping( monitor='val_loss',patience=20,verbose=2) checkpointer=tkc.ModelCheckpoint( filepath=model_weights,verbose=2,save_weights_only=True, monitor='val_accuracy',mode='max',save_best_only=True) lr_reduction=tkc.ReduceLROnPlateau( monitor='val_loss',verbose=2,patience=5,factor=.8) return [checkpointer,early_stopping,lr_reduction]
def set_reducer(): reduce_lr = callbacks.ReduceLROnPlateau( monitor='val_cls_position_loss', factor=0.9, verbose=1, patience=8, cooldown=3, min_lr=config.DECAY) return reduce_lr
def model_callbacks(weights): checkpoint = tkc.ModelCheckpoint(filepath=weights, verbose=2, save_best_only=True) lr_reduce = tkc.ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=2, factor=.8, min_lr=.1**6) estop = tkc.EarlyStopping(monitor='val_loss', patience=20, verbose=2) return [checkpoint, lr_reduce, estop]
def cb(fw): early_stopping=\ tkc.EarlyStopping(monitor='val_loss', patience=10,verbose=2) checkpointer=\ tkc.ModelCheckpoint(filepath=fw, save_best_only=True,verbose=2) lr_reduction=\ tkc.ReduceLROnPlateau(monitor='val_loss',verbose=2, patience=5,factor=.75) return [checkpointer, early_stopping, lr_reduction]
def main(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" gpus = tf.config.list_physical_devices("GPU") if gpus: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) train_dir = r'C:\Software\Code\Work_Python\BasicNet\dataset\train' val_dir = r'C:\Software\Code\Work_Python\BasicNet\dataset\validation' epochs = 50 batch_size = 4 lr = 1e-4 num_classes = 5 resolution = 224 train_data = read_data(train_dir) val_data = read_data(val_dir) train_step = len(train_data) // batch_size val_step = len(val_data) // batch_size train_dataset = get_batch_data(train_data, resolution, num_classes, batch_size, mode='train') val_dataset = get_batch_data(val_data, resolution, num_classes, batch_size, mode='validation') model = efn.EfficientNetB0(alpha=1.0, beta=1.0, r=resolution, classes=num_classes) model.compile(optimizer=optimizers.Adam(lr), loss=losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy']) cbk = [ callbacks.ModelCheckpoint("Efn-B0.h5", save_weights_only=True, save_best_only=True), callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=4) ] model.fit(train_dataset, steps_per_epoch=train_step, epochs=epochs, validation_data=val_dataset, validation_steps=val_step, callbacks=cbk, verbose=1)
def init_callbacks(self): self.callbacks.append( callbacks.ReduceLROnPlateau( **self.config.trainer.reduce_lr_on_plateau)) if "model_checkpoint" in self.config.trainer: self.callbacks.append( callbacks.ModelCheckpoint( save_weights_only=True, **self.config.trainer.model_checkpoint)) self.callbacks.append( callbacks.LambdaCallback( on_epoch_begin=lambda epoch, loss: self.log_lr(epoch)))
def fit(self, train_x,train_y, val_x, val_y, epochs, batch_size, verbose): reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_logloss', factor=0.3, patience=5, mode='min', min_lr=5e-6) early_stopping = callbacks.EarlyStopping(monitor='val_logloss', min_delta=5e-6, patience=5, mode='min',restore_best_weights=True) history = self.model.fit(train_x, train_y, epochs=epochs, validation_data=(val_x, val_y), batch_size = batch_size, verbose = verbose, callbacks = [reduce_lr, early_stopping] ) return history
def run_task(): training_generator, validation_generator, test_generator = data_generator( data_h5) output_path = "./model.h5" model = build_model(return_sequences=True, num_feat=33, nb_filters=12, kernel_size=3, dilations=[2**i for i in range(6)], nb_stacks=1, max_len=39, lr=0.001, dropout_rate=0.2, use_layer_norm=True, use_batch_norm=False, use_skip_connections=True) model.summary() callback = [ callbacks.ModelCheckpoint(output_path, verbose=1, save_best_only=True), callbacks.ReduceLROnPlateau(factor=0.5, patience=20, min_lr=0.0001), callbacks.EarlyStopping(patience=20) ] model.fit(x=training_generator, epochs=200, callbacks=callback, validation_data=validation_generator) model = load_model(output_path, custom_objects={'TCN': TCN}) nmodel = NNmodel(model=model) data_list = [{ "data_generator": training_generator, "set_key": "train_tf", "display_name": "TF Train" }, { "data_generator": validation_generator, "set_key": "validation_tf", "display_name": "TF Validation" }, { "data_generator": test_generator, "set_key": "test_tf", "display_name": "TF Test" }] metrics = nmodel.evaluate(data_list=data_list) print(metrics)
def trainVAE(x_train, epochs, save_path, z_dim, batch_size=64): if not os.path.exists(save_path): os.makedirs(save_path) print(x_train.shape) # Set model encoder, decoder, vae = build_vae(x_train, z_dim) vae.summary() # Custom vae_loss def vae_loss(x, rec_x): z_mean, z_log_var, z = encoder(x) # 1.reconstruct loss rec_x = decoder(z) rec_loss = tf.keras.losses.binary_crossentropy(x, rec_x) rec_loss = tf.reduce_mean(rec_loss) rec_loss *= (128 * 64) # 2. KL Divergence loss kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var) kl_loss = -0.5 * tf.reduce_mean(kl_loss) total_loss = rec_loss + kl_loss return total_loss # Compile with custom loss vae.compile(optimizer='adam', loss=vae_loss) # Set callbacks ckp = callbacks.ModelCheckpoint(filepath=save_path + '/model.h5', monitor='loss', verbose=1, save_best_only=True) csv_logger = callbacks.CSVLogger(save_path + '/logger.csv') reduce_lr = callbacks.ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, min_lr=1e-5) # Train history = vae.fit(x_train, x_train, epochs=epochs, batch_size=batch_size, callbacks=[ckp, reduce_lr, csv_logger]) # Plotining plot_loss(history, save_path) plot_model(encoder, to_file=save_path + '/vae_encoder.png', show_shapes=True) plot_model(decoder, to_file=save_path + '/vae_decoder.png', show_shapes=True)
def run_task(): training_generator, validation_generator, test_generator = data_generator( data_h5, Num_classes, batch_size) output_path = "models/model.h5" model = build_model(num_feat=33, data_len=39, nb_filters=3, lr=0.001, kernel_size=[20, 30, 40], nb_stacks=3, dropout_rate=0.12) model.summary() log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") callback = [ callbacks.ModelCheckpoint(output_path, verbose=1, save_best_only=True), callbacks.ReduceLROnPlateau(factor=0.5, patience=10, min_lr=0.0001), callbacks.EarlyStopping(patience=50), callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) ] model.fit(x=training_generator, epochs=200, callbacks=callback, validation_data=validation_generator) model = load_model(output_path, custom_objects={'inception': inception}) nmodel = NNmodel(model=model) data_list = [{ "data_generator": training_generator, "set_key": "train_tf", "display_name": "TF Train" }, { "data_generator": validation_generator, "set_key": "validation_tf", "display_name": "TF Validation" }, { "data_generator": test_generator, "set_key": "test_tf", "display_name": "TF Test" }] metrics = nmodel.evaluate(data_list=data_list) print(metrics)
def main(): parser = argparse.ArgumentParser(description='Running Settings') parser.add_argument('--model', help='valid options Vgg, ResNet and ' 'Squeeze Excitation Models', required=True) parser.add_argument('--batch', help='# of batches', type=int, default=32) parser.add_argument('--data', help='path where the data is stored', default='data') args = parser.parse_args() if MODELS.get(args.model) is None: raise ValueError("Model Does not Exist") builder = DatasetBuilder(args.data, shape=(256, 256)) builder() data_train = TFRecordDataset(join(args.data, 'train.records')) data_train = data_train.map(builder.decode) data_train = data_train.map(builder.augmentation) data_train = data_train.shuffle(7000) data_train = data_train.batch(batch_size=args.batch) data_test = TFRecordDataset(join(args.data, 'test.records')) data_test = data_test.map(builder.decode) data_test = data_test.batch(batch_size=args.batch) model = MODELS.get(args.model)() model.build((1, 256, 256, 3)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) log_dir = join('logs', args.model) tensor_board_callback = callbacks.TensorBoard(log_dir=log_dir) model_checkpoint = callbacks.ModelCheckpoint('models/{}.h5'.format(args.model), save_best_only=True) reduce_lr = callbacks.ReduceLROnPlateau(factor=0.2, patience=5, min_lr=1e-6) early_stop = callbacks.EarlyStopping(patience=10) _callbacks = [model_checkpoint, reduce_lr, early_stop, tensor_board_callback] model.fit(data_train, epochs=100, validation_data=data_test, callbacks=_callbacks)
def main(): units = 128 # LSTM网络参数量 epochs = 150 # 训练轮数 model = MyRNN(units) log_dir = "logs/" # 学习率下降,训练到一定轮数有助于更好的拟合数据 reduce_lr = callbacks.ReduceLROnPlateau( monitor='val_loss', # 参考值为测试集的损失值 factor=0.8, # 符合条件学习率降为原来的0.8倍 min_delta=0.1, patience=10, # 10轮测试集的损失值没有优化则下调学习率 verbose=1) # 每30轮自动保存数据 checkpoint_period = callbacks.ModelCheckpoint( log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=True, period=30) # 是否需要早停,当val_loss一直不下降的时候意味着模型基本训练完毕,可以停止 early_stopping = callbacks.EarlyStopping(monitor='val_loss', min_delta=0.05, patience=20, verbose=1) # 设置训练参数,初始学习率为0.01,损失函数为MSE model.compile(optimizer=keras.optimizers.Adam(0.01), loss='mse', metrics=['mse']) # model.build(input_shape=(None, 25, 102)) # model.load_weights(log_dir + 'last1.h5') # 读取之前的权重继续训练 # 训练模型 history = model.fit(train_db, epochs=epochs, validation_data=val_db, callbacks=[reduce_lr, checkpoint_period]) model.save_weights(log_dir + 'last1.h5') # 保存最终权重 model.summary() # 画出训练过程中训练集和验证集MSE的变化趋势 plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Validation'], loc='upper left') plt.show()
def Train_Epochs(self, train_x, train_y, epochs=10000): reduce_lr = callbacks.ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, min_lr=0.001, verbose=1) #self.model.fit(train_x, train_y, epochs=epochs, batch_size=10000, callbacks=[self.tensorboard_callback]) self.model.fit(train_x, train_y, epochs=epochs, batch_size=20000) if time.time() - self.last_save_time > 60: self.last_save_time = time.time() if (self.save_model_after_epoch): self.model.save_weights(self.model_path)
def train(model, train_db, val_db, epochs): """Train the ML model for a given training and validation set""" reduce_lr = callbacks.ReduceLROnPlateau(monitor="loss", factor=0.5, patience=10) model_checkpoint_callback = callbacks.ModelCheckpoint( filepath=MODEL_CHECKPOINT, monitor="loss", save_best_only=True ) model.fit( train_db, epochs=epochs, validation_data=val_db, validation_freq=1, callbacks=[reduce_lr, model_checkpoint_callback], ) print("Finished training!")
def train(model, x, y, batch_size=64, epochs=40, tag=None): """ fits @model with @data WARNING: this can be lengthy on a non-GPU local computer """ # get before/after weights (make sure there is a change) untrained_weights = np.array(model.get_layer(index=1).get_weights()[1]) reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001, verbose=1) early_stop = callbacks.EarlyStopping(monitor="val_loss", verbose=1, patience=5) loss = model.fit(x, y, epochs=epochs, batch_size=batch_size, validation_split=0.15, callbacks=[reduce_lr, early_stop]) trained_weights = np.array(model.get_layer(index=1).get_weights()[1]) # plot the loss plt.figure() plt.plot(loss.history['loss'], label='loss') plt.plot(loss.history['val_loss'], label='val_loss') plt.ylabel('loss') plt.xlabel('epoch') plt.title("Evolution of loss per epoch") plt.grid(True) plt.legend() if tag: plt.savefig(evaluation_path + tag + "_loss.png", dpi=300) plt.show() weight_diff = trained_weights - untrained_weights if np.all(weight_diff) == 0: print( "Training does not seem to have changed the weights. Something might have gone wrong." ) else: print("Model was trained successfully.")
def get_default_callbacks( model_path: Path, monitor: str = 'val_acc', base_patience: int = 3, lr_reduce_factor: float = 0.5, min_lr: float = 1e-7, verbose: int = 1, log_dir: Path = None, gradients: bool = True, #change to false confusion_matrix: bool = True, #change to false loss: Callable = None, data: Tuple[np.ndarray, np.ndarray] = None, classes: list = None, heatmap_options: dict = None, csv_logdir: Path = None, csv_append: bool = False, save_latest: bool = False): callbacks = [ clb.ReduceLROnPlateau(monitor=monitor, factor=lr_reduce_factor, min_lr=min_lr, patience=base_patience, verbose=verbose), clb.EarlyStopping(monitor=monitor, patience=(2 * base_patience + 1), verbose=verbose), clb.ModelCheckpoint(monitor=monitor, filepath=model_path, save_best_only=True, verbose=verbose) ] if log_dir: callbacks.append( ExtendedTensorBoard(log_dir, gradients, confusion_matrix, loss, data, classes, heatmap_options)) if csv_logdir: if csv_append: callbacks.append(clb.CSVLogger(csv_logdir, append=True)) else: callbacks.append(clb.CSVLogger(csv_logdir)) if save_latest: latest_path = model_path.parent / f'{model_path.stem}_latest{model_path.suffix}' callbacks.append( clb.ModelCheckpoint(monitor=monitor, filepath=latest_path)) return callbacks
def get_callbacks(model_path, save_weights_only=False, lr_sched=None, tensorboard_log_dir=None, reduce_lr_on_plateau=False, monitor='val_loss', verbose=1): """Returns a list of callbacks used for training Args: model_path: (str) path for the h5 model file. save_weights_only: (bool) if True, then only the model's weights will be saved. lr_sched (function): learning rate scheduler per epoch. from deepcell.utils.train_utils.rate_scheduler. tensorboard_log_dir (str): log directory for tensorboard. monitor (str): quantity to monitor. verbose (int): verbosity mode, 0 or 1. Returns: list: a list of callbacks to be passed to model.fit() """ cbs = [ callbacks.ModelCheckpoint(model_path, monitor=monitor, save_best_only=True, verbose=verbose, save_weights_only=save_weights_only), ] if lr_sched: cbs.append(callbacks.LearningRateScheduler(lr_sched)) if reduce_lr_on_plateau: cbs.append( callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.1, patience=10, verbose=verbose, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)) if tensorboard_log_dir: cbs.append(callbacks.TensorBoard(log_dir=tensorboard_log_dir)) return cbs
def get_callbacks(path_train_log, path_checkpoint): reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', min_lr=1e-6, factor=0.5, patience=3, verbose=1, mode='auto') early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=30, verbose=1, min_delta=0.001) csv_logger = callbacks.CSVLogger(path_train_log) checkpointer = callbacks.ModelCheckpoint(filepath=path_checkpoint, save_best_only=True, save_weights_only=True) return [reduce_lr, early_stopping, csv_logger, checkpointer]
def train(): x_train, y_train = get_dataset() print(x_train.shape) print(y_train.shape) model.compile(optimizer=optimizers.Adam(5e-5), loss='mean_squared_error') model.summary() model.fit(x_train, y_train, batch_size=2048, epochs=1000, verbose=1, validation_split=0.1, callbacks=[callbacks.ReduceLROnPlateau(monitor='loss', patience=10), callbacks.EarlyStopping(monitor='loss', patience=15, min_delta=1e-4)]) model.save(root + '/models/model.h5')