예제 #1
0
 def _validate_path(path):
     from config import __abspath__
     path = path_possibly_formatted(path)
     path = __abspath__(path) if not osp.isabs(path) else path
     if not osp.exists(path):
         raise ValueError(f"Given path is invalid: {path}")
     return path
예제 #2
0
def ensure_web_app():
    from config import Path, __abspath__
    import os.path as osp
    config_deploy = ConfigSerializer.load(Path.DeployConfigAbs)
    params_webapp = Params(upload_folder=None).cross_join(config_deploy.web)
    # NOTE: relative path should relate to project root, not webapp's
    if not osp.isabs(params_webapp.upload_folder):
        params_webapp.upload_folder = __abspath__(params_webapp.upload_folder)

    from web import get_webapp
    webapp = get_webapp(**params_webapp)

    params_webapp_run = Params(host="127.0.0.1", port="2020", ssl_context=None) \
        .left_join(config_deploy.web, {"host": "local_ip", "port": "local_port"})
    if config_deploy.web.use_https:
        params_webapp_run.ssl_context = (config_deploy.web.certfile_path, config_deploy.web.keyfile_path)
    webapp.async_run(**params_webapp_run)
    return webapp
예제 #3
0
    def model_train(model: object, data: object, **params):
        """
        NOTE: Keras常见陷阱:1.Keras先validation_split再shuffle,因此data中如果是负样本排在最后、宜自行先shuffle
        :param model:
        :param data: accept `np.ndarray`, `tf.data.Dataset` or `tf.Tensor`, or a pair of such data if y is available.
        """
        # TODO: confirm if params for tf.Model.compile can be combined with those for tf.Model.fit
        params_train = Params(optimizer='adam',
                              loss='sparse_categorical_crossentropy',
                              metrics=['acc'],
                              validation_split=0.1,
                              epochs=5,
                              batch_size=None,
                              checkpoint=Params(load_weights="latest",
                                                save_weights=Params(
                                                    frequency="epoch",
                                                    max_to_keep=5)),
                              show_result=Params()).update_to(params)
        x_train, y_train = ModelManager._validate_input(data)

        import tensorflow as tf  # IMPROVE: check availability of ml backends
        if isinstance(model, tf.keras.Model):
            # 1.compile and load variables from checkpoint
            model.compile(
                **params_train.fromkeys(['optimizer', 'loss', 'metrics']))
            # CKPT signatures: "tf.train.Checkpoint.restore", "tf.keras.Model.load_weights"
            ckpt_dir, ckpt_path_to_load = None, None
            if params_train.checkpoint.format == "CKPT_dir":
                from config import __abspath__
                ckpt_dir = path_possibly_formatted(
                    params_train.checkpoint.path)
                ckpt_dir = __abspath__(
                    ckpt_dir) if not osp.isabs(ckpt_dir) else ckpt_dir
                ensure_dir_exists(ckpt_dir)
                ckpt_path_to_load = tf.train.latest_checkpoint(ckpt_dir)
            # NOTE: 当使用delayed-build模式时,仅当调用build(batch_input_shape)或compile()+fit(x,y,batch_size)后才能确定weights
            #  ref:https://www.tensorflow.org/api_docs/python/tf/keras/Sequential
            if params_train.checkpoint.load_weights == "latest" \
                    and params_train.checkpoint.signature == "tf.keras.Model.load_weights" \
                    and ckpt_path_to_load is not None:
                model.load_weights(ckpt_path_to_load)

            # 2.prepare callbacks
            callbacks = []
            # callback :: save medium CKPT
            if params_train.checkpoint.save_weights.is_defined(
            ) and ckpt_dir is not None:
                ckpt_path_to_save = osp.join(
                    ckpt_dir, "ckpt.{epoch:02d}-{val_loss:.2f}")
                # NOTE: if save_freq is not equal to 'epoch', which means num of steps, it's will be less reliable
                _params = Params(save_freq='epoch').left_join(
                    params_train.checkpoint.save_weights,
                    key_map={"save_freq": "frequency"})
                _callback = tf.keras.callbacks.ModelCheckpoint(
                    ckpt_path_to_save,  # not checkpoint_dir
                    save_weights_only=True,
                    save_best_only=True,
                    verbose=1,
                    **_params)
                callbacks.append(_callback)
            # callback :: early stop
            if params_train.early_stop.is_defined():
                _params = Params(monitor='val_loss', patience=10).left_join(
                    params_train.early_stop)
                _callback = tf.keras.callbacks.EarlyStopping(**_params)
                callbacks.append(_callback)
            # callback :: progress indicator / verbose
            # IMPROVE: use config for training verbose / progress indicator
            # _callback = tf.keras.callbacks.ProgbarLogger(count_mode='steps', stateful_metrics=None) # PyTest时不能实时输出
            _callback = tf.keras.callbacks.LambdaCallback(
                on_batch_end=lambda batch, logs: INFO(
                    f"batch{batch:05d}: loss={logs.get('loss',None):.4f},acc={logs.get('acc',None):.4f}"
                ))
            callbacks.append(_callback)
            cb_batch_stats = None
            if params_train.collect_batch_stats:
                # when only train several epochs, may collect stats of each batch instead of the epoch average.
                class CallbackCollectBatchStats(tf.keras.callbacks.Callback):
                    def __init__(self):
                        self.current_batch = 0
                        self.batch = []
                        self.loss = []
                        self.acc = []

                    def on_train_batch_end(self, batch, logs=None):
                        self.batch.append(self.current_batch)
                        self.loss.append(logs['loss'])
                        self.acc.append(logs['acc'])
                        self.model.reset_metrics()
                        self.current_batch += 1

                cb_batch_stats = CallbackCollectBatchStats(
                )  # TODO: can plot batch_lsses and batch_acc using this
                callbacks.append(cb_batch_stats)
            if len(callbacks) == 0:
                callbacks = None

            # 3.train the model, and save checkpoints if configured
            # TODO: use model.fit_generator() for batch feeding. `steps_per_epoch` = np.ceil(samples / param.batch_size)
            # NOTE: core API for model training
            params_train_fit = params_train.fromkeys(
                ['validation_split', 'batch_size', 'epochs'])
            INFO(f"Beginning to train: {params_train_fit}")
            history = model.fit(x_train,
                                y_train,
                                **params_train_fit,
                                callbacks=callbacks)  # == core ==
            if cb_batch_stats is not None:
                history.history[
                    'batch'] = cb_batch_stats.batch  # accumulated batch number through epoches
                history.history['batch_loss'] = cb_batch_stats.loss
                history.history['batch_acc'] = cb_batch_stats.acc

            # 4.save checkpiont at last
            if params_train.save_model.is_defined() and ckpt_dir is not None:
                _params = Params(format="SavedModel").left_join(
                    params_train.save_model)
                save_format, ckpt_path_to_save = None, None
                if _params.format == "HDF5":
                    save_format = _ext = "h5"
                    ckpt_path_to_save = osp.join(ckpt_dir,
                                                 f"model_trained.{_ext}")
                else:  # default=SavedModel
                    save_format = "tf"
                    ckpt_path_to_save = osp.join(ckpt_dir, f"model_trained")
                    ensure_dir_exists(ckpt_path_to_save)
                # IMPROVE: consider using tf.saved_model.save()
                model.save(ckpt_path_to_save, save_format=save_format
                           )  # by default, TF2 saves as 'tf' (SavedModel)

            # Optional: output history
            if params_train.show_result.is_defined():
                plot_history = None
                if params_train.show_result.plotter == 'matplot':
                    from helpers.plt_helper import plot_history_by_metrics as plot_history
                if params_train.show_result.plotter.__len__(
                ) > 0 and plot_history is None:
                    WARN(
                        f"Unsupported history plotter: {params_train.show_result.plotter}"
                    )
                if plot_history is not None:
                    plot_history(history,
                                 params_train.show_result.get('metrics', None))
                else:
                    # TODO: check this section
                    hist = history.history
                    INFO(
                        f"Last epoch: "
                        f"ACC(train,val)=({hist['accuracy'][-1]}, {hist['val_accuracy'][-1]}), "
                        f"MSE(train,val)=({hist['mse'][-1]}, {hist['val_mse'][-1]})"
                    )
        else:
            raise TypeError(f"Unsupported model type: {type(model)}")

        return model