def _validate_path(path): from config import __abspath__ path = path_possibly_formatted(path) path = __abspath__(path) if not osp.isabs(path) else path if not osp.exists(path): raise ValueError(f"Given path is invalid: {path}") return path
def ensure_web_app(): from config import Path, __abspath__ import os.path as osp config_deploy = ConfigSerializer.load(Path.DeployConfigAbs) params_webapp = Params(upload_folder=None).cross_join(config_deploy.web) # NOTE: relative path should relate to project root, not webapp's if not osp.isabs(params_webapp.upload_folder): params_webapp.upload_folder = __abspath__(params_webapp.upload_folder) from web import get_webapp webapp = get_webapp(**params_webapp) params_webapp_run = Params(host="127.0.0.1", port="2020", ssl_context=None) \ .left_join(config_deploy.web, {"host": "local_ip", "port": "local_port"}) if config_deploy.web.use_https: params_webapp_run.ssl_context = (config_deploy.web.certfile_path, config_deploy.web.keyfile_path) webapp.async_run(**params_webapp_run) return webapp
def model_train(model: object, data: object, **params): """ NOTE: Keras常见陷阱:1.Keras先validation_split再shuffle,因此data中如果是负样本排在最后、宜自行先shuffle :param model: :param data: accept `np.ndarray`, `tf.data.Dataset` or `tf.Tensor`, or a pair of such data if y is available. """ # TODO: confirm if params for tf.Model.compile can be combined with those for tf.Model.fit params_train = Params(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'], validation_split=0.1, epochs=5, batch_size=None, checkpoint=Params(load_weights="latest", save_weights=Params( frequency="epoch", max_to_keep=5)), show_result=Params()).update_to(params) x_train, y_train = ModelManager._validate_input(data) import tensorflow as tf # IMPROVE: check availability of ml backends if isinstance(model, tf.keras.Model): # 1.compile and load variables from checkpoint model.compile( **params_train.fromkeys(['optimizer', 'loss', 'metrics'])) # CKPT signatures: "tf.train.Checkpoint.restore", "tf.keras.Model.load_weights" ckpt_dir, ckpt_path_to_load = None, None if params_train.checkpoint.format == "CKPT_dir": from config import __abspath__ ckpt_dir = path_possibly_formatted( params_train.checkpoint.path) ckpt_dir = __abspath__( ckpt_dir) if not osp.isabs(ckpt_dir) else ckpt_dir ensure_dir_exists(ckpt_dir) ckpt_path_to_load = tf.train.latest_checkpoint(ckpt_dir) # NOTE: 当使用delayed-build模式时,仅当调用build(batch_input_shape)或compile()+fit(x,y,batch_size)后才能确定weights # ref:https://www.tensorflow.org/api_docs/python/tf/keras/Sequential if params_train.checkpoint.load_weights == "latest" \ and params_train.checkpoint.signature == "tf.keras.Model.load_weights" \ and ckpt_path_to_load is not None: model.load_weights(ckpt_path_to_load) # 2.prepare callbacks callbacks = [] # callback :: save medium CKPT if params_train.checkpoint.save_weights.is_defined( ) and ckpt_dir is not None: ckpt_path_to_save = osp.join( ckpt_dir, "ckpt.{epoch:02d}-{val_loss:.2f}") # NOTE: if save_freq is not equal to 'epoch', which means num of steps, it's will be less reliable _params = Params(save_freq='epoch').left_join( params_train.checkpoint.save_weights, key_map={"save_freq": "frequency"}) _callback = tf.keras.callbacks.ModelCheckpoint( ckpt_path_to_save, # not checkpoint_dir save_weights_only=True, save_best_only=True, verbose=1, **_params) callbacks.append(_callback) # callback :: early stop if params_train.early_stop.is_defined(): _params = Params(monitor='val_loss', patience=10).left_join( params_train.early_stop) _callback = tf.keras.callbacks.EarlyStopping(**_params) callbacks.append(_callback) # callback :: progress indicator / verbose # IMPROVE: use config for training verbose / progress indicator # _callback = tf.keras.callbacks.ProgbarLogger(count_mode='steps', stateful_metrics=None) # PyTest时不能实时输出 _callback = tf.keras.callbacks.LambdaCallback( on_batch_end=lambda batch, logs: INFO( f"batch{batch:05d}: loss={logs.get('loss',None):.4f},acc={logs.get('acc',None):.4f}" )) callbacks.append(_callback) cb_batch_stats = None if params_train.collect_batch_stats: # when only train several epochs, may collect stats of each batch instead of the epoch average. class CallbackCollectBatchStats(tf.keras.callbacks.Callback): def __init__(self): self.current_batch = 0 self.batch = [] self.loss = [] self.acc = [] def on_train_batch_end(self, batch, logs=None): self.batch.append(self.current_batch) self.loss.append(logs['loss']) self.acc.append(logs['acc']) self.model.reset_metrics() self.current_batch += 1 cb_batch_stats = CallbackCollectBatchStats( ) # TODO: can plot batch_lsses and batch_acc using this callbacks.append(cb_batch_stats) if len(callbacks) == 0: callbacks = None # 3.train the model, and save checkpoints if configured # TODO: use model.fit_generator() for batch feeding. `steps_per_epoch` = np.ceil(samples / param.batch_size) # NOTE: core API for model training params_train_fit = params_train.fromkeys( ['validation_split', 'batch_size', 'epochs']) INFO(f"Beginning to train: {params_train_fit}") history = model.fit(x_train, y_train, **params_train_fit, callbacks=callbacks) # == core == if cb_batch_stats is not None: history.history[ 'batch'] = cb_batch_stats.batch # accumulated batch number through epoches history.history['batch_loss'] = cb_batch_stats.loss history.history['batch_acc'] = cb_batch_stats.acc # 4.save checkpiont at last if params_train.save_model.is_defined() and ckpt_dir is not None: _params = Params(format="SavedModel").left_join( params_train.save_model) save_format, ckpt_path_to_save = None, None if _params.format == "HDF5": save_format = _ext = "h5" ckpt_path_to_save = osp.join(ckpt_dir, f"model_trained.{_ext}") else: # default=SavedModel save_format = "tf" ckpt_path_to_save = osp.join(ckpt_dir, f"model_trained") ensure_dir_exists(ckpt_path_to_save) # IMPROVE: consider using tf.saved_model.save() model.save(ckpt_path_to_save, save_format=save_format ) # by default, TF2 saves as 'tf' (SavedModel) # Optional: output history if params_train.show_result.is_defined(): plot_history = None if params_train.show_result.plotter == 'matplot': from helpers.plt_helper import plot_history_by_metrics as plot_history if params_train.show_result.plotter.__len__( ) > 0 and plot_history is None: WARN( f"Unsupported history plotter: {params_train.show_result.plotter}" ) if plot_history is not None: plot_history(history, params_train.show_result.get('metrics', None)) else: # TODO: check this section hist = history.history INFO( f"Last epoch: " f"ACC(train,val)=({hist['accuracy'][-1]}, {hist['val_accuracy'][-1]}), " f"MSE(train,val)=({hist['mse'][-1]}, {hist['val_mse'][-1]})" ) else: raise TypeError(f"Unsupported model type: {type(model)}") return model