def _validate_path(path): from config import __abspath__ path = path_possibly_formatted(path) path = __abspath__(path) if not osp.isabs(path) else path if not osp.exists(path): raise ValueError(f"Given path is invalid: {path}") return path
def model_train(model: object, data: object, **params): """ NOTE: Keras常见陷阱:1.Keras先validation_split再shuffle,因此data中如果是负样本排在最后、宜自行先shuffle :param model: :param data: accept `np.ndarray`, `tf.data.Dataset` or `tf.Tensor`, or a pair of such data if y is available. """ # TODO: confirm if params for tf.Model.compile can be combined with those for tf.Model.fit params_train = Params(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'], validation_split=0.1, epochs=5, batch_size=None, checkpoint=Params(load_weights="latest", save_weights=Params( frequency="epoch", max_to_keep=5)), show_result=Params()).update_to(params) x_train, y_train = ModelManager._validate_input(data) import tensorflow as tf # IMPROVE: check availability of ml backends if isinstance(model, tf.keras.Model): # 1.compile and load variables from checkpoint model.compile( **params_train.fromkeys(['optimizer', 'loss', 'metrics'])) # CKPT signatures: "tf.train.Checkpoint.restore", "tf.keras.Model.load_weights" ckpt_dir, ckpt_path_to_load = None, None if params_train.checkpoint.format == "CKPT_dir": from config import __abspath__ ckpt_dir = path_possibly_formatted( params_train.checkpoint.path) ckpt_dir = __abspath__( ckpt_dir) if not osp.isabs(ckpt_dir) else ckpt_dir ensure_dir_exists(ckpt_dir) ckpt_path_to_load = tf.train.latest_checkpoint(ckpt_dir) # NOTE: 当使用delayed-build模式时,仅当调用build(batch_input_shape)或compile()+fit(x,y,batch_size)后才能确定weights # ref:https://www.tensorflow.org/api_docs/python/tf/keras/Sequential if params_train.checkpoint.load_weights == "latest" \ and params_train.checkpoint.signature == "tf.keras.Model.load_weights" \ and ckpt_path_to_load is not None: model.load_weights(ckpt_path_to_load) # 2.prepare callbacks callbacks = [] # callback :: save medium CKPT if params_train.checkpoint.save_weights.is_defined( ) and ckpt_dir is not None: ckpt_path_to_save = osp.join( ckpt_dir, "ckpt.{epoch:02d}-{val_loss:.2f}") # NOTE: if save_freq is not equal to 'epoch', which means num of steps, it's will be less reliable _params = Params(save_freq='epoch').left_join( params_train.checkpoint.save_weights, key_map={"save_freq": "frequency"}) _callback = tf.keras.callbacks.ModelCheckpoint( ckpt_path_to_save, # not checkpoint_dir save_weights_only=True, save_best_only=True, verbose=1, **_params) callbacks.append(_callback) # callback :: early stop if params_train.early_stop.is_defined(): _params = Params(monitor='val_loss', patience=10).left_join( params_train.early_stop) _callback = tf.keras.callbacks.EarlyStopping(**_params) callbacks.append(_callback) # callback :: progress indicator / verbose # IMPROVE: use config for training verbose / progress indicator # _callback = tf.keras.callbacks.ProgbarLogger(count_mode='steps', stateful_metrics=None) # PyTest时不能实时输出 _callback = tf.keras.callbacks.LambdaCallback( on_batch_end=lambda batch, logs: INFO( f"batch{batch:05d}: loss={logs.get('loss',None):.4f},acc={logs.get('acc',None):.4f}" )) callbacks.append(_callback) cb_batch_stats = None if params_train.collect_batch_stats: # when only train several epochs, may collect stats of each batch instead of the epoch average. class CallbackCollectBatchStats(tf.keras.callbacks.Callback): def __init__(self): self.current_batch = 0 self.batch = [] self.loss = [] self.acc = [] def on_train_batch_end(self, batch, logs=None): self.batch.append(self.current_batch) self.loss.append(logs['loss']) self.acc.append(logs['acc']) self.model.reset_metrics() self.current_batch += 1 cb_batch_stats = CallbackCollectBatchStats( ) # TODO: can plot batch_lsses and batch_acc using this callbacks.append(cb_batch_stats) if len(callbacks) == 0: callbacks = None # 3.train the model, and save checkpoints if configured # TODO: use model.fit_generator() for batch feeding. `steps_per_epoch` = np.ceil(samples / param.batch_size) # NOTE: core API for model training params_train_fit = params_train.fromkeys( ['validation_split', 'batch_size', 'epochs']) INFO(f"Beginning to train: {params_train_fit}") history = model.fit(x_train, y_train, **params_train_fit, callbacks=callbacks) # == core == if cb_batch_stats is not None: history.history[ 'batch'] = cb_batch_stats.batch # accumulated batch number through epoches history.history['batch_loss'] = cb_batch_stats.loss history.history['batch_acc'] = cb_batch_stats.acc # 4.save checkpiont at last if params_train.save_model.is_defined() and ckpt_dir is not None: _params = Params(format="SavedModel").left_join( params_train.save_model) save_format, ckpt_path_to_save = None, None if _params.format == "HDF5": save_format = _ext = "h5" ckpt_path_to_save = osp.join(ckpt_dir, f"model_trained.{_ext}") else: # default=SavedModel save_format = "tf" ckpt_path_to_save = osp.join(ckpt_dir, f"model_trained") ensure_dir_exists(ckpt_path_to_save) # IMPROVE: consider using tf.saved_model.save() model.save(ckpt_path_to_save, save_format=save_format ) # by default, TF2 saves as 'tf' (SavedModel) # Optional: output history if params_train.show_result.is_defined(): plot_history = None if params_train.show_result.plotter == 'matplot': from helpers.plt_helper import plot_history_by_metrics as plot_history if params_train.show_result.plotter.__len__( ) > 0 and plot_history is None: WARN( f"Unsupported history plotter: {params_train.show_result.plotter}" ) if plot_history is not None: plot_history(history, params_train.show_result.get('metrics', None)) else: # TODO: check this section hist = history.history INFO( f"Last epoch: " f"ACC(train,val)=({hist['accuracy'][-1]}, {hist['val_accuracy'][-1]}), " f"MSE(train,val)=({hist['mse'][-1]}, {hist['val_mse'][-1]})" ) else: raise TypeError(f"Unsupported model type: {type(model)}") return model
def model_predict(model: object, data: object, **params) -> object: params_predict = Params( decode_prediction=Params(name='logits_to_index'), show_result=Params(top_k=20, only_difference=True)).update_to(params) predictions = None x, y = ModelManager._validate_input(data) import numpy as np import tensorflow as tf # IMPROVE: check availability of ml backends # wrapper for different model types def _predict(inputs): # NOTE: core API for prediction if isinstance(model, tf.keras.Model): # NOTE: if x is ndarray, result will be ndarray too return model.predict(inputs) elif callable(model): # type(model).__name__ == "tensorflow.python.eager.wrap_function.WrappedFunction" if isinstance(inputs, tf.data.Dataset): # IMPROVE: stack result as a tensor result = [] for t in inputs: result.append(model(t)) return tf.stack(result) else: return model(inputs) else: raise TypeError(f"Unsupported model type: {type(model)}") predictions = _predict(x) if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank.") return None if params_predict.decode_prediction.is_defined(): if params_predict.decode_prediction.name == 'logits_to_index': # one-hot array -> index if isinstance(predictions, np.ndarray): predictions = np.argmax(predictions, axis=-1) elif isinstance(predictions, tf.Tensor): predictions = tf.math.argmax(predictions, axis=-1) else: raise TypeError( f"Unsupported type for logits_to_index: {type(predictions)}" ) elif params_predict.decode_prediction.name == 'logits_to_indices_and_probs': # for retrain, prediction should be a probs array and need to be sorted by `top_k` # NOTE: length of each prediction must be equivalent. top_k = params_predict.decode_prediction.get( 'top_k', safe_get_len(predictions[0])) # returns: top_values(=probs), top_idxs if isinstance(predictions, np.ndarray): predictions = np_top_k(predictions, top_k) elif isinstance(predictions, tf.Tensor): predictions = tf.math.top_k(input=predictions, k=top_k) else: raise TypeError( f"Unsupported type for logits_to_indices_and_probs: {type(predictions)}" ) else: raise ValueError( f"Unsupported result decoding: {params_predict.decode_prediction.name}" ) if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank (after decoding).") return None if params_predict.show_result.is_defined() and isinstance( predictions, np.ndarray): x_show, p_show, y_show = x, predictions, y # NOTE: y(=label) is optional (default:None) if params_predict.show_result.only_difference: if hasattr(y_show, '__len__'): if p_show.__len__() == y_show.__len__(): differences = p_show != y_show x_show, p_show, y_show = x_show[differences], p_show[ differences], y_show[differences] else: WARN( f"Cannot dump differences: len of targets is not same as predictions" f"({y_show.__len__()} vs {p_show.__len__()})") else: WARN( f"Cannot dump differences: unsupported y type(={type(y_show)})" ) INFO( f"Number of mismatch between prediction and truth: {len(p_show)}" ) if params_predict.show_result.get('top_k', None) is not None: top_k = params_predict.show_result.top_k # TODO: sorting? 1.use tf.math.top_k 2.diff algorithm need to be specified x_show, p_show, y_show = (safe_slice(_, end=top_k) for _ in (x_show, p_show, y_show)) if len(p_show) > 0: dumps = [] for i, p in enumerate(p_show): if not hasattr(y_show, '__len__') or y_show.__len__() <= i: dumps.append(f"{p}") else: dumps.append(f"({p} vs {y_show[i]})") need_to_show = params_predict.show_result.plotter.__len__() > 0 need_to_save = params_predict.show_result.save_path.__len__( ) > 0 only_save = params_predict.show_result.only_save if need_to_show or need_to_save: def denormalize(x): x = x * 255 if hasattr(x, 'astype'): # np.ndarray return x.astype(np.int32) else: return tf.cast(x, tf.int32) # tf.Tensor # IMPROVE: use signature to match normalize and `un-normalize` routines if hasattr( x_show, "dtype") and x_show.dtype.name.startswith('float'): x_show = denormalize(x_show) elif hasattr(x_show, "element_spec") and \ hasattr(x_show.element_spec, "dtype") and x_show.element_spec.dtype.name.startswith('float'): x_show = x_show.map(denormalize) save_dir, save_paths = None, None if need_to_save: save_dir = path_possibly_formatted( params_predict.show_result.save_path) # save_paths = [osp.join(save_dir, _+'.jpg') for _ in dumps] if params_predict.show_result.plotter == "matplot": onlysave_path = None if only_save: if need_to_save: from helpers.util import tmp_filename_by_time onlysave_path = osp.join( save_dir, tmp_filename_by_time('jpg')) need_to_save = False else: WARN( 'only_save is true, but save_path is not specified. ignored' ) show_image_mats(x_show, texts=dumps, title="Predictions", onlysave_path=onlysave_path) else: INFO( f"Predictions{'(only diff)' if 'differences' in vars() else ''}: " + ", ".join(dumps)) # if need_to_save: # save_image_mats(x_show, save_paths) else: top_k = params_predict.show_result.top_k INFO( f"Predictions(top{top_k}): {safe_slice(predictions, end=top_k)}" ) return predictions
def model_predict(model: object, data: object, **params) -> object: params_predict = Params(decode_prediction=Params({}), show_result=Params({})).update_to(params) predictions = None x, y = ModelManager._validate_input(data) import numpy as np import tensorflow as tf # IMPROVE: check availability of ml backends # wrapper for different model types def _predict(inputs): # NOTE: core API for prediction if isinstance(model, tf.keras.Model): # NOTE: if x is ndarray, result will be ndarray too return model.predict(inputs) elif callable(model): # type(model).__name__ == "tensorflow.python.eager.wrap_function.WrappedFunction" input_spec = Params(input_num=None).left_join(params_predict) params = {} # IMPROVE: judge the base class of model, to append required params if model.__module__.startswith("modules.models.tensorlayer"): params.update({'is_train': False}) if isinstance(inputs, tf.data.Dataset): # TODO: specify InputSpec (inputs element_spec) for prediction if input_spec.input_num is None: pass elif isinstance(input_spec.input_num, int): assert input_spec.input_num > 0, "input_num must > 0" # TODO: more test cases needed # inputs.batch(input_spec.input_num) # result = [] # assert iterable(inputs) # for batch in inputs: # inputs_list = [_ for _ in batch] # inputs_list = inputs_list[0] if len(inputs_list) == 1 else inputs_list # result.append(model(inputs_list, **params)) # # return tf.stack(result) # return result[0] if len(result) == 1 else None if len(result) == 0 else result if input_spec.input_num > 1: inputs = inputs.batch( input_spec.input_num) # NOTE: headed with a `batch_size` dim by this step # inputs = inputs.unbatch() else: raise ValueError(f'cannot handle input_spec.input_num={input_spec.input_num}') # NOTE: callable model might not support batch feeding. so it's up to caller to constrain the size. result = [] for inputs_ in inputs.as_numpy_iterator(): result.append(model(inputs_, **params)) # NOTE: if input_num > 1 return result[0] if len(result) == 1 else None if len(result) == 0 else result else: result = model(inputs, **params) return result else: raise TypeError(f"Unsupported model type: {type(model)}") predictions = _predict(x) if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank.") return predictions # None if params_predict.decode_prediction.is_defined(): if params_predict.decode_prediction.name == 'logits_to_index': # one-hot array -> index if isinstance(predictions, np.ndarray): predictions = np.argmax(predictions, axis=-1) elif isinstance(predictions, tf.Tensor): predictions = tf.math.argmax(predictions, axis=-1) else: raise TypeError(f"Unsupported type for logits_to_index: {type(predictions)}") elif params_predict.decode_prediction.name == 'logits_to_indices_and_probs': # for retrain, prediction should be a probs array and need to be sorted by `top_k` # NOTE: length of each prediction must be equivalent. top_k = params_predict.decode_prediction.get('top_k', safe_get_len(predictions[0])) # returns: top_values(=probs), top_idxs if isinstance(predictions, np.ndarray): predictions = np_top_k(predictions, top_k) elif isinstance(predictions, tf.Tensor): predictions = tf.math.top_k(input=predictions, k=top_k) else: raise TypeError(f"Unsupported type for logits_to_indices_and_probs: {type(predictions)}") elif params_predict.decode_prediction.name == 'image_denormalize': from modules.data.data_manager import DataManager predictions = DataManager.denormalize(predictions) else: raise ValueError( f"Unsupported result decoding: {params_predict.decode_prediction.name}") if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank (after decoding).") return None if params_predict.show_result.is_defined(): if isinstance(predictions, np.ndarray): # IMPROVE: support `show_result.inputs_type/outputs_type` e.g.'images''features''label_indexes' x_show, p_show, y_show = x, predictions, y # NOTE: y(=label) is optional (default:None) if params_predict.show_result.only_difference: if hasattr(y_show, '__len__'): if p_show.__len__() == y_show.__len__(): differences = p_show != y_show x_show, p_show, y_show = x_show[differences], p_show[differences], y_show[differences] else: WARN(f"Cannot dump differences: len of targets is not same as predictions" f"({y_show.__len__()} vs {p_show.__len__()})") else: WARN(f"Cannot dump differences: unsupported y type(={type(y_show)})") INFO(f"Number of mismatch between prediction and truth: {len(p_show)}") if params_predict.show_result.get('top_k', None) is not None: top_k = params_predict.show_result.top_k # TODO: sorting? 1.use tf.math.top_k 2.diff algorithm need to be specified x_show, p_show, y_show = (safe_slice(_, end=top_k) for _ in (x_show, p_show, y_show)) if len(p_show) > 0: dumps = [] for i, p in enumerate(p_show): if not hasattr(y_show, '__len__') or y_show.__len__() <= i: dumps.append(f"{p}") else: dumps.append(f"({p} vs {y_show[i]})") need_to_show = params_predict.show_result.plotter.__len__() > 0 need_to_save = params_predict.show_result.save_path.__len__() > 0 only_save = params_predict.show_result.only_save if need_to_show or need_to_save: # IMPROVE: use signature to match normalize and `denormalize` routines from modules.data.data_manager import DataManager if hasattr(x_show, "dtype") and x_show.dtype.name.startswith('float'): x_show = DataManager.denormalize(x_show) elif hasattr(x_show, "element_spec") and \ hasattr(x_show.element_spec, "dtype") and x_show.element_spec.dtype.name.startswith('float'): x_show = x_show.map(DataManager.denormalize) save_dir, save_paths = None, None if need_to_save: save_dir = path_possibly_formatted(params_predict.show_result.save_path) # save_paths = [osp.join(save_dir, _+'.jpg') for _ in dumps] if params_predict.show_result.plotter == "matplot": onlysave_path = None if only_save: if need_to_save: from helpers.util import tmp_filename_by_time onlysave_path = osp.join(save_dir, tmp_filename_by_time('jpg')) need_to_save = False else: WARN('only_save is true, but save_path is not specified. ignored') show_image_mats(x_show, texts=dumps, title="Predictions", onlysave_path=onlysave_path) else: INFO(f"Predictions{'(only diff)' if 'differences' in vars() else ''}: " + ", ".join(dumps)) # if need_to_save: # save_image_mats(x_show, save_paths) else: top_k = params_predict.show_result.top_k INFO(f"Predictions(top{top_k}): {safe_slice(predictions, end=top_k)}") return predictions