Example #1
0
def preload_gpu_devices(active_indexes: list = None, memory_limit: int = None):
    global __preloaded_gpu___
    if __preloaded_gpu___:
        return
    import tensorflow as tf
    gpus = tf.config.experimental.list_physical_devices('GPU')

    __preloaded_gpu___ = True
    if gpus:
        try:
            if active_indexes is not None:
                for index in active_indexes:
                    tf.config.experimental.set_visible_devices(gpus[index], 'GPU')
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            INFO(f"Num of Physical GPU vs Logical ones: {len(gpus)} vs {len(logical_gpus)}, "
                 f"{len(gpus)-len(logical_gpus)} disabled")
            if memory_limit is None:
                tf.config.experimental.set_memory_growth(gpus[0], True)
                INFO("Physical GPU Memory Growth is turned ON.")
            else:
                tf.config.experimental.set_virtual_device_configuration(gpus[0], [
                    tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memory_limit)])
                INFO(f"Physical GPU Memory Growth is limited under: {memory_limit}")
        except RuntimeError as e:
            # Visible devices must be set before GPUs have been initialized
            ERROR(f"Exception during preload_gpu_devices: {e}")
    else:
        WARN("No physical GPU available.")
 def on_done_consume_inputs(result):
     """
     If using task.set_result, set_exception etc and wait for task instead of data,
     callbacks will be optional.
     """
     nonlocal data
     INFO(f'on_done_consume_inputs: {result}')
     data = result.get('data', None)
Example #3
0
    def run(self, pCallback: callable, pIsRunOnce: bool = False):
        """
        :param pCallback: func like (pMode:str, pVal:any). pMode is defined in __init__()
        :param pIsRunOnce: bool, False: run only once
        """
        self.Callback = pCallback
        self.IsRunOnce = pIsRunOnce

        # --- Connect & Run
        INFO("")
        INFO("/////////////////////////////////////")
        INFO("// --- Begin Monitor Clipboard --- //")
        INFO("/////////////////////////////////////")

        # --- Run Monitor App
        self.QtApp = QtWidgets.QApplication([]) if self.QtApp is None else self.QtApp
        self.QtClipboard = self.QtApp.clipboard()
        self.QtClipboard.dataChanged.connect(self._slot_Clipboard_OnChanged)
        self.QtApp.exec()
        return self.CallResults
Example #4
0
    def run(self, cb: callable, onetime: bool = False):
        """
        :param cb: func like (mode:str, value:any). mode is defined in __init__()
        :param onetime: bool, False: run only once
        """
        self.Callback = cb
        self.IsRunOnce = onetime

        # --- Connect & Run
        INFO("")
        INFO("/////////////////////////////////////")
        INFO("// --- Begin Monitor Clipboard --- //")
        INFO("/////////////////////////////////////")

        # --- Run Monitor App
        self.QtApp = QtWidgets.QApplication(
            []) if self.QtApp is None else self.QtApp
        self.QtClipboard = self.QtApp.clipboard()
        self.QtClipboard.dataChanged.connect(self._slot_Clipboard_OnChanged)
        self.QtApp.exec()
        return self.CallResults if self.CallResults is not self.__class__.FLAG_NO_RESULT else None
Example #5
0
def preload_gpu_devices():
    global __preloaded_gpu___
    if __preloaded_gpu___:
        return
    import tensorflow as tf
    gpus = tf.config.experimental.list_physical_devices('GPU')
    __preloaded_gpu___ = True
    if gpus:
        # Restrict TensorFlow to only use the first GPU
        try:
            tf.config.experimental.set_memory_growth(gpus[0], True)
            INFO("Physical GPU Memory Growth is turned ON.")
            tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            INFO(
                f"Num of Physical GPUs: {len(gpus)}, Num of Logical GPU: {len(logical_gpus)}"
            )
        except RuntimeError as e:
            # Visible devices must be set before GPUs have been initialized
            ERROR(f"Exception during preload_gpu_devices: {e}")
    else:
        WARN("No physical GPU available.")
    def model_evaluate(model: object, data: object, **params) -> object:
        eval_metrics = None
        x_test, y_test = ModelManager._validate_input(data)

        import tensorflow as tf   # IMPROVE: check availability of ml backends
        if isinstance(model, tf.keras.Model):
            # NOTE: core API for model evaluation
            eval_metrics = model.evaluate(x_test, y_test)
            dumps = [f"{name}={value:8.4}" for name, value in zip(model.metrics_names, eval_metrics)]
            INFO("Evaluation: " + ", ".join(dumps))
        else:
            raise TypeError(f"Unsupported model type: {type(model)}")
        return eval_metrics
Example #7
0
    def model_predict(model: object, data: object, **params) -> object:
        params_predict = Params(
            decode_prediction=Params(name='logits_to_index'),
            show_result=Params(top_k=20,
                               only_difference=True)).update_to(params)
        predictions = None
        x, y = ModelManager._validate_input(data)

        import numpy as np
        import tensorflow as tf  # IMPROVE: check availability of ml backends

        # wrapper for different model types
        def _predict(inputs):
            # NOTE: core API for prediction
            if isinstance(model, tf.keras.Model):
                # NOTE: if x is ndarray, result will be ndarray too
                return model.predict(inputs)
            elif callable(model):
                # type(model).__name__ == "tensorflow.python.eager.wrap_function.WrappedFunction"
                if isinstance(inputs, tf.data.Dataset):
                    # IMPROVE: stack result as a tensor
                    result = []
                    for t in inputs:
                        result.append(model(t))
                    return tf.stack(result)
                else:
                    return model(inputs)
            else:
                raise TypeError(f"Unsupported model type: {type(model)}")

        predictions = _predict(x)
        if predictions is None or safe_get_len(predictions) == 0:
            WARN("Predictions is blank.")
            return None

        if params_predict.decode_prediction.is_defined():
            if params_predict.decode_prediction.name == 'logits_to_index':
                # one-hot array -> index
                if isinstance(predictions, np.ndarray):
                    predictions = np.argmax(predictions, axis=-1)
                elif isinstance(predictions, tf.Tensor):
                    predictions = tf.math.argmax(predictions, axis=-1)
                else:
                    raise TypeError(
                        f"Unsupported type for logits_to_index: {type(predictions)}"
                    )
            elif params_predict.decode_prediction.name == 'logits_to_indices_and_probs':
                # for retrain, prediction should be a probs array and need to be sorted by `top_k`
                # NOTE: length of each prediction must be equivalent.
                top_k = params_predict.decode_prediction.get(
                    'top_k', safe_get_len(predictions[0]))
                # returns: top_values(=probs), top_idxs
                if isinstance(predictions, np.ndarray):
                    predictions = np_top_k(predictions, top_k)
                elif isinstance(predictions, tf.Tensor):
                    predictions = tf.math.top_k(input=predictions, k=top_k)
                else:
                    raise TypeError(
                        f"Unsupported type for logits_to_indices_and_probs: {type(predictions)}"
                    )
            else:
                raise ValueError(
                    f"Unsupported result decoding: {params_predict.decode_prediction.name}"
                )
        if predictions is None or safe_get_len(predictions) == 0:
            WARN("Predictions is blank (after decoding).")
            return None

        if params_predict.show_result.is_defined() and isinstance(
                predictions, np.ndarray):
            x_show, p_show, y_show = x, predictions, y  # NOTE: y(=label) is optional (default:None)
            if params_predict.show_result.only_difference:
                if hasattr(y_show, '__len__'):
                    if p_show.__len__() == y_show.__len__():
                        differences = p_show != y_show
                        x_show, p_show, y_show = x_show[differences], p_show[
                            differences], y_show[differences]
                    else:
                        WARN(
                            f"Cannot dump differences: len of targets is not same as predictions"
                            f"({y_show.__len__()} vs {p_show.__len__()})")
                else:
                    WARN(
                        f"Cannot dump differences: unsupported y type(={type(y_show)})"
                    )
                INFO(
                    f"Number of mismatch between prediction and truth: {len(p_show)}"
                )
            if params_predict.show_result.get('top_k', None) is not None:
                top_k = params_predict.show_result.top_k
                # TODO: sorting? 1.use tf.math.top_k  2.diff algorithm need to be specified
                x_show, p_show, y_show = (safe_slice(_, end=top_k)
                                          for _ in (x_show, p_show, y_show))
            if len(p_show) > 0:
                dumps = []
                for i, p in enumerate(p_show):
                    if not hasattr(y_show, '__len__') or y_show.__len__() <= i:
                        dumps.append(f"{p}")
                    else:
                        dumps.append(f"({p} vs {y_show[i]})")
                need_to_show = params_predict.show_result.plotter.__len__() > 0
                need_to_save = params_predict.show_result.save_path.__len__(
                ) > 0
                only_save = params_predict.show_result.only_save
                if need_to_show or need_to_save:

                    def denormalize(x):
                        x = x * 255
                        if hasattr(x, 'astype'):  # np.ndarray
                            return x.astype(np.int32)
                        else:
                            return tf.cast(x, tf.int32)  # tf.Tensor

                    # IMPROVE: use signature to match normalize and `un-normalize` routines
                    if hasattr(
                            x_show,
                            "dtype") and x_show.dtype.name.startswith('float'):
                        x_show = denormalize(x_show)
                    elif hasattr(x_show, "element_spec") and \
                        hasattr(x_show.element_spec, "dtype") and x_show.element_spec.dtype.name.startswith('float'):
                        x_show = x_show.map(denormalize)
                save_dir, save_paths = None, None
                if need_to_save:
                    save_dir = path_possibly_formatted(
                        params_predict.show_result.save_path)
                    # save_paths = [osp.join(save_dir, _+'.jpg') for _ in dumps]
                if params_predict.show_result.plotter == "matplot":
                    onlysave_path = None
                    if only_save:
                        if need_to_save:
                            from helpers.util import tmp_filename_by_time
                            onlysave_path = osp.join(
                                save_dir, tmp_filename_by_time('jpg'))
                            need_to_save = False
                        else:
                            WARN(
                                'only_save is true, but save_path is not specified. ignored'
                            )
                    show_image_mats(x_show,
                                    texts=dumps,
                                    title="Predictions",
                                    onlysave_path=onlysave_path)
                else:
                    INFO(
                        f"Predictions{'(only diff)' if 'differences' in vars() else ''}: "
                        + ", ".join(dumps))
                # if need_to_save:
                #     save_image_mats(x_show, save_paths)
        else:
            top_k = params_predict.show_result.top_k
            INFO(
                f"Predictions(top{top_k}): {safe_slice(predictions, end=top_k)}"
            )
        return predictions
Example #8
0
    def model_train(model: object, data: object, **params):
        """
        NOTE: Keras常见陷阱:1.Keras先validation_split再shuffle,因此data中如果是负样本排在最后、宜自行先shuffle
        :param model:
        :param data: accept `np.ndarray`, `tf.data.Dataset` or `tf.Tensor`, or a pair of such data if y is available.
        """
        # TODO: confirm if params for tf.Model.compile can be combined with those for tf.Model.fit
        params_train = Params(optimizer='adam',
                              loss='sparse_categorical_crossentropy',
                              metrics=['acc'],
                              validation_split=0.1,
                              epochs=5,
                              batch_size=None,
                              checkpoint=Params(load_weights="latest",
                                                save_weights=Params(
                                                    frequency="epoch",
                                                    max_to_keep=5)),
                              show_result=Params()).update_to(params)
        x_train, y_train = ModelManager._validate_input(data)

        import tensorflow as tf  # IMPROVE: check availability of ml backends
        if isinstance(model, tf.keras.Model):
            # 1.compile and load variables from checkpoint
            model.compile(
                **params_train.fromkeys(['optimizer', 'loss', 'metrics']))
            # CKPT signatures: "tf.train.Checkpoint.restore", "tf.keras.Model.load_weights"
            ckpt_dir, ckpt_path_to_load = None, None
            if params_train.checkpoint.format == "CKPT_dir":
                from config import __abspath__
                ckpt_dir = path_possibly_formatted(
                    params_train.checkpoint.path)
                ckpt_dir = __abspath__(
                    ckpt_dir) if not osp.isabs(ckpt_dir) else ckpt_dir
                ensure_dir_exists(ckpt_dir)
                ckpt_path_to_load = tf.train.latest_checkpoint(ckpt_dir)
            # NOTE: 当使用delayed-build模式时,仅当调用build(batch_input_shape)或compile()+fit(x,y,batch_size)后才能确定weights
            #  ref:https://www.tensorflow.org/api_docs/python/tf/keras/Sequential
            if params_train.checkpoint.load_weights == "latest" \
                    and params_train.checkpoint.signature == "tf.keras.Model.load_weights" \
                    and ckpt_path_to_load is not None:
                model.load_weights(ckpt_path_to_load)

            # 2.prepare callbacks
            callbacks = []
            # callback :: save medium CKPT
            if params_train.checkpoint.save_weights.is_defined(
            ) and ckpt_dir is not None:
                ckpt_path_to_save = osp.join(
                    ckpt_dir, "ckpt.{epoch:02d}-{val_loss:.2f}")
                # NOTE: if save_freq is not equal to 'epoch', which means num of steps, it's will be less reliable
                _params = Params(save_freq='epoch').left_join(
                    params_train.checkpoint.save_weights,
                    key_map={"save_freq": "frequency"})
                _callback = tf.keras.callbacks.ModelCheckpoint(
                    ckpt_path_to_save,  # not checkpoint_dir
                    save_weights_only=True,
                    save_best_only=True,
                    verbose=1,
                    **_params)
                callbacks.append(_callback)
            # callback :: early stop
            if params_train.early_stop.is_defined():
                _params = Params(monitor='val_loss', patience=10).left_join(
                    params_train.early_stop)
                _callback = tf.keras.callbacks.EarlyStopping(**_params)
                callbacks.append(_callback)
            # callback :: progress indicator / verbose
            # IMPROVE: use config for training verbose / progress indicator
            # _callback = tf.keras.callbacks.ProgbarLogger(count_mode='steps', stateful_metrics=None) # PyTest时不能实时输出
            _callback = tf.keras.callbacks.LambdaCallback(
                on_batch_end=lambda batch, logs: INFO(
                    f"batch{batch:05d}: loss={logs.get('loss',None):.4f},acc={logs.get('acc',None):.4f}"
                ))
            callbacks.append(_callback)
            cb_batch_stats = None
            if params_train.collect_batch_stats:
                # when only train several epochs, may collect stats of each batch instead of the epoch average.
                class CallbackCollectBatchStats(tf.keras.callbacks.Callback):
                    def __init__(self):
                        self.current_batch = 0
                        self.batch = []
                        self.loss = []
                        self.acc = []

                    def on_train_batch_end(self, batch, logs=None):
                        self.batch.append(self.current_batch)
                        self.loss.append(logs['loss'])
                        self.acc.append(logs['acc'])
                        self.model.reset_metrics()
                        self.current_batch += 1

                cb_batch_stats = CallbackCollectBatchStats(
                )  # TODO: can plot batch_lsses and batch_acc using this
                callbacks.append(cb_batch_stats)
            if len(callbacks) == 0:
                callbacks = None

            # 3.train the model, and save checkpoints if configured
            # TODO: use model.fit_generator() for batch feeding. `steps_per_epoch` = np.ceil(samples / param.batch_size)
            # NOTE: core API for model training
            params_train_fit = params_train.fromkeys(
                ['validation_split', 'batch_size', 'epochs'])
            INFO(f"Beginning to train: {params_train_fit}")
            history = model.fit(x_train,
                                y_train,
                                **params_train_fit,
                                callbacks=callbacks)  # == core ==
            if cb_batch_stats is not None:
                history.history[
                    'batch'] = cb_batch_stats.batch  # accumulated batch number through epoches
                history.history['batch_loss'] = cb_batch_stats.loss
                history.history['batch_acc'] = cb_batch_stats.acc

            # 4.save checkpiont at last
            if params_train.save_model.is_defined() and ckpt_dir is not None:
                _params = Params(format="SavedModel").left_join(
                    params_train.save_model)
                save_format, ckpt_path_to_save = None, None
                if _params.format == "HDF5":
                    save_format = _ext = "h5"
                    ckpt_path_to_save = osp.join(ckpt_dir,
                                                 f"model_trained.{_ext}")
                else:  # default=SavedModel
                    save_format = "tf"
                    ckpt_path_to_save = osp.join(ckpt_dir, f"model_trained")
                    ensure_dir_exists(ckpt_path_to_save)
                # IMPROVE: consider using tf.saved_model.save()
                model.save(ckpt_path_to_save, save_format=save_format
                           )  # by default, TF2 saves as 'tf' (SavedModel)

            # Optional: output history
            if params_train.show_result.is_defined():
                plot_history = None
                if params_train.show_result.plotter == 'matplot':
                    from helpers.plt_helper import plot_history_by_metrics as plot_history
                if params_train.show_result.plotter.__len__(
                ) > 0 and plot_history is None:
                    WARN(
                        f"Unsupported history plotter: {params_train.show_result.plotter}"
                    )
                if plot_history is not None:
                    plot_history(history,
                                 params_train.show_result.get('metrics', None))
                else:
                    # TODO: check this section
                    hist = history.history
                    INFO(
                        f"Last epoch: "
                        f"ACC(train,val)=({hist['accuracy'][-1]}, {hist['val_accuracy'][-1]}), "
                        f"MSE(train,val)=({hist['mse'][-1]}, {hist['val_mse'][-1]})"
                    )
        else:
            raise TypeError(f"Unsupported model type: {type(model)}")

        return model
Example #9
0
    def load_model(model_signature: str, **params) -> object:
        """
        NOTE: Keras常见陷阱:1.TF卷积核与Theano卷积核shape相同,加载时需用测试样本验证其表现,Keras无法区别
        :param model_signature:
        :param params:
        """
        model = None
        inputs, outputs = {}, {}  # {name: shape} dicts
        if model_signature == _ModelSignature.TFSavedModel.signature:
            import tensorflow as tf  # IMPROVE: check availability of ml backends
            # format_ = ModelManager._validate_format(params['format'], _ModelSignature.TFSavedModel)
            path = ModelManager._validate_path(params.get('path', None))
            model = tf.saved_model.load(path, params.get('tags',
                                                         None))  # == core ==
            if params.get('signature_', None) is not None:
                model = model.signatures[params['signature_']]
            # TODO: append inputs, outputs spec to model object? so that predict() can adapt the fed inputs
            if hasattr(model, 'inputs') and hasattr(model,
                                                    'structured_outpus'):
                inputs = {model.inputs[0].name: model.inputs[0].shape}
                outputs = {
                    'default': model.structured_outputs['default']
                }  # IMPROVE: iterate
            pass
        elif model_signature == _ModelSignature.TFHub_KerasLayer.signature:
            import tensorflow_hub as tf_hub
            # format_ = ModelManager._validate_format(params['format'], _ModelSignature.TFSavedModel)
            path = ModelManager._validate_path(params.get('path', None))
            params_model = Params(input_shape=None,
                                  trainable=False).update_to(params)
            if params_model.input_shape.__len__() == 4:
                params_model.input_shape = params_model.input_shape[1:]
            # NOTE: it will be delayed-build pattern when `input_shape` is None. no weights info available until build.
            model = tf_hub.KerasLayer(path,
                                      input_shape=params_model.input_shape)
            model.trainable = params_model.trainable
            pass
        elif model_signature == _ModelSignature.KerasSequential.signature:
            # IMPROVE: check availability of ml backends
            from tensorflow.keras import Sequential, layers
            name = params['name']
            # IMPROVE:parse name -> layers, or use structural config for iteration
            if name == '{conv-pool}*2-flat-dense-drop-dense':
                # NOTE: only for _test_\TF_1x_to_2x_3, output is len=10 logits
                model = Sequential([
                    # NOTE: 1.TF2.x已无需限定Input层的维度,甚至各层间都能自动衔接
                    #      2.Conv层中无需设定上一层的(h,w),只需设定filter数、kernel维度、padding(使h,w保持)等
                    #      3.但若不指定input_shape,Optimizer将无法加载其之前被保存的参数,只能重新初始化
                    layers.Conv2D(32, (5, 5),
                                  strides=(1, 1),
                                  padding='same',
                                  activation='relu'),
                    layers.MaxPooling2D(pool_size=(2, 2),
                                        strides=(2, 2),
                                        padding='same'),
                    layers.Conv2D(64, (5, 5),
                                  strides=(1, 1),
                                  padding='same',
                                  activation='relu'),
                    layers.MaxPooling2D(pool_size=(2, 2),
                                        strides=(2, 2),
                                        padding='same'),
                    layers.Flatten(),  # 下面的神经网络需要1维的数据
                    layers.Dense(1024, activation='relu'),
                    layers.Dropout(0.5),  # TODO: 关闭Dropout @evluate,predict
                    layers.Dense(10, activation='softmax')
                ])
            elif name == 'dense-dense_softmax':
                params_model = Params(embedding_size=1024,
                                      class_count=None).update_to(params)
                if params_model.class_count is None:
                    raise ValueError('class_count must be specified')
                model = Sequential([
                    layers.Dense(params_model.embedding_size,
                                 activation='relu'),
                    layers.Dense(params_model.class_count,
                                 activation='softmax')
                ])
                # TODO: need to return intermediate tf.Tensor required by embedding, loss calculation and evaluation.
            else:
                raise ValueError(f"Undefined model: {name}")
            pass
        elif model_signature == _ModelSignature.KerasModels_LoadModel.signature:
            import tensorflow as tf  # IMPROVE: check availability of ml backends
            format_ = ModelManager._validate_format(
                params['format'], _ModelSignature.KerasModels_LoadModel)
            params_model = Params(path='', path_formatted='').update_to(params)
            path = ModelManager._validate_path(params_model.path)
            model = tf.keras.models.load_model(path)  # == core ==
        elif model_signature == _ModelSignature.TF_ImportGraphDef.signature:
            import tensorflow as tf  # IMPROVE: check availability of ml backends
            format_ = ModelManager._validate_format(
                params['format'], _ModelSignature.TF_ImportGraphDef)
            params_model = Params(inputs='', outputs='').update_to(params)
            path = ModelManager._validate_path(params_model.path)

            # import PB model (frozen) in TF2.x. ref:https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
            # ref:https://www.tensorflow.org/api_docs/python/tf/compat/v1/wrap_function
            def wrap_frozen_graph(pb_path, inputs, outputs, prefix=""):
                def _imports_graph_def():
                    tf.compat.v1.import_graph_def(
                        graph_def,
                        name=prefix)  # turn off the default prefix "import/"

                graph_def = tf.compat.v1.GraphDef()
                loaded = graph_def.ParseFromString(open(
                    pb_path, 'rb').read())  # == core ==
                wrapped_import = tf.compat.v1.wrap_function(
                    _imports_graph_def, [])  # == core ==
                import_graph = wrapped_import.graph
                return wrapped_import.prune(
                    tf.nest.map_structure(import_graph.as_graph_element,
                                          inputs),
                    tf.nest.map_structure(import_graph.as_graph_element,
                                          outputs))

            model = wrap_frozen_graph(path,
                                      inputs=params_model.inputs,
                                      outputs=params_model.outputs)
            test_img = tf.ones(
                [1, 224, 224, 3],
                dtype=tf.float32)  # fixed shape is for test ONLY
            DEBUG(f"wrap_func test result: {model(test_img).shape}")
        else:
            raise ValueError(f"Unsupported model signature: {model_signature}")
        INFO(f"type of loaded model={type(model)}")
        INFO(f"  inputs={inputs}, outputs={outputs}")
        return model
    def load_data(data_signature: str,
                  category="all",
                  meta_info=None,
                  **params) -> object:
        """
        :param data_signature:
        :param category: 'train', 'test' or 'all'
        :param meta_info: if given as a dict, caller may get meta info of the dataset through it
        :param params:
        :return: if `category`='all', 'train' and 'test' dataset will be returned as a tuple
        """
        data = None
        params_data = Params(timeout=0,
                             need_shuffle=False,
                             shuffle_seed=None,
                             test_split=0.2,
                             decode_x=Params(colormode=None,
                                             resize_w=None,
                                             resize_h=None,
                                             preserve_aspect_ratio=True,
                                             normalize=True,
                                             reshape=None),
                             decode_y=Params()).update_to(params)
        if data_signature == _DataSignature.LabeledFolders.signature:
            params_data = Params(
                file_exts=['jpg'],
                labels_ordered_in_train=None).update_to(params_data)
            import modules.data.dataset_labeled_folders as dataset_labeled_folders
            # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.LabeledFolders)
            path = DataManager._validate_path(params_data.path)
            ds = dataset_labeled_folders.dataset(path,
                                                 category=category,
                                                 meta_info=meta_info,
                                                 **params_data)
            DEBUG(f"loaded tf.data.Dataset: {ds}")
            data = ds
        elif data_signature == _DataSignature.TFKerasDataset.signature:
            # TODO: extract as modules.data.dataset_tf_keras_dataset :: dataset(name, **params)
            from importlib import import_module
            # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.TFKerasDataset)
            lib_dataset = import_module(
                f"tensorflow.keras.datasets.{params_data.name}")
            (x_train, y_train), (x_test,
                                 y_test) = lib_dataset.load_data()  # Tensors
            WARN(
                f"Keras dataset {params_data.name} loaded as is. Ignored configs: colormode, resize_w/h, preserve_aspect_ratio"
            )
            if params_data.decode_x.normalize:
                x_train, x_test = x_train / 255.0, x_test / 255.0
            if params_data.decode_x.reshape.__len__() > 0:
                # TODO: decode_x reshape means image reshape, not matrix reshape
                x_train = x_train.reshape(params_data.decode_x.reshape)
                x_test = x_test.reshape(params_data.decode_x.reshape)
            DEBUG(f"loaded data: y_train={y_train}, y_test={y_test}")
            if category == 'all':
                data = ((x_train, y_train), (x_test, y_test))
            elif category == 'train':
                data = (x_train, y_train)
            elif category == 'test':
                data = (x_test, y_test)
            else:
                raise ValueError(f"Unknown category: {category}")
            # IGNORED: meta_info returns no value. test_split has no use. fixed_seed not used.
        elif data_signature == _DataSignature.SingleFile.signature:
            path = DataManager._validate_path(params_data.path)
            params_decode = Params(encoding='jpg',
                                   colormode=None,
                                   reshape=None,
                                   preserve_aspect_ratio=True,
                                   color_transform=None,
                                   normalize=True).left_join(
                                       params_data.decode_x)
            data = DataManager._process_files(path, **params_decode)
        elif data_signature == _DataSignature.UI_Copy_Files.signature:
            params_decode = Params(encoding='jpg',
                                   colormode=None,
                                   reshape=None,
                                   preserve_aspect_ratio=True,
                                   color_transform=None,
                                   normalize=True).left_join(
                                       params_data.decode_x)

            def _process(event_type, abspath_or_list):
                nonlocal data
                INFO(f"clipboard event: path={abspath_or_list}")
                data = DataManager._process_files(abspath_or_list,
                                                  **params_decode)

            from helpers.qt_helper import ClipboardMonitor
            monitor_type = "Path_File" if params_data.format == "Path" else "PathList"

            # NOTE: use AsyncTask to impl async clipboard monitoring loop.
            # data = ClipboardMonitor([monitor_type]).run(_process, True)  #<- will get blank result on a fault copy
            from async_ import AsyncLoop, AsyncManager

            async def coro_clipboard_monitor():
                ClipboardMonitor([monitor_type]).run(_process, onetime=True)

            task = AsyncManager.run_task(coro_clipboard_monitor(),
                                         loop=None)  # block current loop
            DEBUG(
                f"[input_loop] monitoring clipboard with type {monitor_type} ..."
            )

            # wait until task done TODO: impl a context_manager for simple await
            import asyncio
            loop = asyncio.get_event_loop()  # block current loop

            async def coro_simple_wait(timeout=None):
                while data is None:  # IMPROVE: implement timeout. maybe wait_for(this_task)
                    await asyncio.sleep(1)

            loop.run_until_complete(coro_simple_wait(timeout=None))

        elif data_signature == _DataSignature.UI_Web_Files.signature:
            # path = DataManager._validate_path(params_data.path)
            params_decode = Params(encoding='jpg',
                                   colormode=None,
                                   reshape=None,
                                   preserve_aspect_ratio=True,
                                   color_transform=None,
                                   normalize=True).left_join(
                                       params_data.decode_x)
            data = None

            webapp = ensure_web_app(
            )  # will load config from Path.DeployConfigAbs
            INFO(
                f'waiting for data input from web app {webapp.host}:{webapp.port}'
            )  # IMPROVE: hint upload url
            from async_ import AsyncLoop, AsyncManager, amend_blank_cbs
            from helpers.util import track_entry_and_exit, load_image_mat, async_show_image_mats
            import asyncio
            this_task: asyncio.Task or None = None

            @track_entry_and_exit.coro()
            async def coro_consume_files(abspath_or_list, cbs):
                # nonlocal this_task
                # assert this_task is not None, '`this_task` should have been assigned before entering related coro.'

                import modules.data.decode_tf as decode_tf
                import tensorflow as tf

                DEBUG(f'[coro_consume_inputs]: {locals()}')
                on_done, on_succeeded, on_failed, on_progress = amend_blank_cbs(
                    cbs)
                filepaths = abspath_or_list if isinstance(
                    abspath_or_list, list) else [abspath_or_list]
                result = {
                }  # data: tf.data.Dataset::{image_t}, error: optional(str)

                # from helpers.tf_helper import image_example
                # IMPROVE: try to use TFRecordDataset.from_tensors([tf_example])
                data = DataManager._process_files(filepaths, **params_decode)

                result.update({'data': data})
                # # if show inputs
                # try:
                #     asynctask = async_show_image_mats(image_mats)
                #     result.update({'asynctask_id': asynctask.id})
                # except Exception as e:
                #     result.update({'error': e.__repr__()})
                on_done(result)
                # TODO: how to link to the next task (e.g. model.predict) so user can monitor process.
                return result  # == this_task.set_result(result)

            def on_done_consume_inputs(result):
                """
                If using task.set_result, set_exception etc and wait for task instead of data,
                callbacks will be optional.
                """
                nonlocal data
                INFO(f'on_done_consume_inputs: {result}')
                data = result.get('data', None)

            @webapp.on_uploads(namespace="data_manager::ui_web_files",
                               onetime=True)
            def handle_ui_web_files(abspath_or_list):
                nonlocal this_task
                this_task = AsyncManager.run_task(
                    coro_consume_files(abspath_or_list,
                                       (on_done_consume_inputs, )))
                handler_result = {'asynctask_id': this_task.id}
                return handler_result

            # wait until get data uploaded
            import asyncio
            loop = asyncio.get_event_loop()  # block current loop

            async def coro_simple_wait(timeout=None):
                while data is None:  # IMPROVE: implement timeout. maybe wait_for(this_task)
                    await asyncio.sleep(1)

            loop.run_until_complete(coro_simple_wait(timeout=None))
            pass
        else:
            raise ValueError(f"Unsupported data signature: {data_signature}")
        # TODO: consider shuffle, repeat(epoch), batch(batch_size), prefetch(1) for train/predict, use tf.data.Database
        #   data can be tf.Dataset, np.ndarray, or tuple of them. Do this job in each signature handler.
        # tf = safe_import_module("tensorflow")
        # if tf and isinstance(data, tf.data.Dataset):
        #     if params_data.shuffle.fixed_seed:
        #         data.shuffle(buffer_size=10000, seed=params_data.shuffle.fixed_seed)
        return data
 def _process(event_type, abspath_or_list):
     nonlocal data
     INFO(f"clipboard event: path={abspath_or_list}")
     data = DataManager._process_files(abspath_or_list,
                                       **params_decode)
Example #12
0
    def upload_create_or_update(filename_to_update=None, key="file"):
        """
        :return: json object. contains relative filename on success, and error message on failure.
        """
        # redirect_url = request.values.get('redirect', default=request.url, type=str)

        # 1. request -> files(data) -> local uploads folder + json response(error+filename)
        # Accept multiple files
        # file = request.files[key]
        files = request.files.getlist(key)
        if files is None or len(files) == 0:
            ret = {'error': 'no file part found in multipart/form-data'}
            return str(json.dumps(ret)), 400, RESPONSE_JSON_ACCESSCONTROL
        # NOTE: use [] * len(..) carefully.. it just do soft copy. use `for` instead.
        ret = [{} for _ in range(len(files))
               ]  # [{filename: str, error: optional(str)}]
        dispatch_arg = []

        error_count = 0
        for idx, file in enumerate(files):
            if file.filename == "":
                ret[idx].update({
                    'error':
                    "no file name is given or no file selected for uploading"
                })
                error_count += 1
                continue  # bypass to the next one

            if file and osp.splitext(
                    file.filename)[1].lower() in ALLOWED_EXTENSIONS:
                if filename_to_update is None:
                    # TODO: handle chinese filename. str.encode('utf-8')?
                    filepath = secure_filename(file.filename)
                    filepath = get_new_name_if_exists(
                        osp.join(app.config['UPLOAD_FOLDER'], filepath))
                else:
                    filepath = osp.join(app.config['UPLOAD_FOLDER'],
                                        filename_to_update)
                if not osp.isabs(filepath):
                    filepath = osp.join(app.root_path, filepath)
                try:
                    file.save(filepath)  # NOTE: overwrite existed one
                except Exception as e:
                    ret[idx].update(
                        {'error': f"Failed to upload file to {filepath}"})
                    error_count += 1
                    continue  # bypass to the next one
                INFO('file uploaded to: ' + filepath)
                dispatch_arg.append(filepath)
                ret[idx].update({'filename': osp.basename(filepath)})
            else:
                ret[idx].update({
                    'error':
                    f"only accept these image types: {ALLOWED_EXTENSIONS}"
                })
                error_count += 1
                continue  # bypass to the next one
        ret = {'uploaded': ret}

        # 2. dispatch to subscribers of `on_uploads` event
        if error_count < len(files):  # error_count == 0:
            dispatch_results = app.dispatch_handlers(
                app.__class__.EventUploads,
                dispatch_arg if len(dispatch_arg) > 1 else dispatch_arg[0])
            # NOTE: multiple inputs can be consumed by once, so results num can be less than inputs num.
            ret.update({'dispatched:': dispatch_results})

        return str(json.dumps(ret)), 200 if error_count < len(
            files) else 400, RESPONSE_JSON_ACCESSCONTROL
Example #13
0
 def _process(event_type, abspath_or_list):
     INFO(f"clipboard event: path={abspath_or_list}")
     return DataManager._process_files(abspath_or_list,
                                       **params_decode)
    def model_predict(model: object, data: object, **params) -> object:
        params_predict = Params(decode_prediction=Params({}), show_result=Params({})).update_to(params)
        predictions = None
        x, y = ModelManager._validate_input(data)

        import numpy as np
        import tensorflow as tf  # IMPROVE: check availability of ml backends
        # wrapper for different model types
        def _predict(inputs):
            # NOTE: core API for prediction
            if isinstance(model, tf.keras.Model):
                # NOTE: if x is ndarray, result will be ndarray too
                return model.predict(inputs)
            elif callable(model):
                # type(model).__name__ == "tensorflow.python.eager.wrap_function.WrappedFunction"
                input_spec = Params(input_num=None).left_join(params_predict)
                params = {}
                # IMPROVE: judge the base class of model, to append required params
                if model.__module__.startswith("modules.models.tensorlayer"):
                    params.update({'is_train': False})
                if isinstance(inputs, tf.data.Dataset):
                    # TODO: specify InputSpec (inputs element_spec) for prediction
                    if input_spec.input_num is None:
                        pass
                    elif isinstance(input_spec.input_num, int):
                        assert input_spec.input_num > 0, "input_num must > 0"
                        # TODO: more test cases needed
                        # inputs.batch(input_spec.input_num)
                        # result = []
                        # assert iterable(inputs)
                        # for batch in inputs:
                        #     inputs_list = [_ for _ in batch]
                        #     inputs_list = inputs_list[0] if len(inputs_list) == 1 else inputs_list
                        #     result.append(model(inputs_list, **params))
                        # # return tf.stack(result)
                        # return result[0] if len(result) == 1 else None if len(result) == 0 else result
                        if input_spec.input_num > 1:
                            inputs = inputs.batch(
                                input_spec.input_num)  # NOTE: headed with a `batch_size` dim by this step
                            # inputs = inputs.unbatch()
                    else:
                        raise ValueError(f'cannot handle input_spec.input_num={input_spec.input_num}')
                    # NOTE: callable model might not support batch feeding. so it's up to caller to constrain the size.
                    result = []
                    for inputs_ in inputs.as_numpy_iterator():
                        result.append(model(inputs_, **params))  # NOTE: if input_num > 1
                    return result[0] if len(result) == 1 else None if len(result) == 0 else result
                else:
                    result = model(inputs, **params)
                    return result
            else:
                raise TypeError(f"Unsupported model type: {type(model)}")

        predictions = _predict(x)
        if predictions is None or safe_get_len(predictions) == 0:
            WARN("Predictions is blank.")
            return predictions  # None

        if params_predict.decode_prediction.is_defined():
            if params_predict.decode_prediction.name == 'logits_to_index':
                # one-hot array -> index
                if isinstance(predictions, np.ndarray):
                    predictions = np.argmax(predictions, axis=-1)
                elif isinstance(predictions, tf.Tensor):
                    predictions = tf.math.argmax(predictions, axis=-1)
                else:
                    raise TypeError(f"Unsupported type for logits_to_index: {type(predictions)}")
            elif params_predict.decode_prediction.name == 'logits_to_indices_and_probs':
                # for retrain, prediction should be a probs array and need to be sorted by `top_k`
                # NOTE: length of each prediction must be equivalent.
                top_k = params_predict.decode_prediction.get('top_k', safe_get_len(predictions[0]))
                # returns: top_values(=probs), top_idxs
                if isinstance(predictions, np.ndarray):
                    predictions = np_top_k(predictions, top_k)
                elif isinstance(predictions, tf.Tensor):
                    predictions = tf.math.top_k(input=predictions, k=top_k)
                else:
                    raise TypeError(f"Unsupported type for logits_to_indices_and_probs: {type(predictions)}")
            elif params_predict.decode_prediction.name == 'image_denormalize':
                from modules.data.data_manager import DataManager
                predictions = DataManager.denormalize(predictions)
            else:
                raise ValueError(
                    f"Unsupported result decoding: {params_predict.decode_prediction.name}")
        if predictions is None or safe_get_len(predictions) == 0:
            WARN("Predictions is blank (after decoding).")
            return None

        if params_predict.show_result.is_defined():
            if isinstance(predictions, np.ndarray):
                # IMPROVE: support `show_result.inputs_type/outputs_type` e.g.'images''features''label_indexes'
                x_show, p_show, y_show = x, predictions, y  # NOTE: y(=label) is optional (default:None)
                if params_predict.show_result.only_difference:
                    if hasattr(y_show, '__len__'):
                        if p_show.__len__() == y_show.__len__():
                            differences = p_show != y_show
                            x_show, p_show, y_show = x_show[differences], p_show[differences], y_show[differences]
                        else:
                            WARN(f"Cannot dump differences: len of targets is not same as predictions"
                                 f"({y_show.__len__()} vs {p_show.__len__()})")
                    else:
                        WARN(f"Cannot dump differences: unsupported y type(={type(y_show)})")
                    INFO(f"Number of mismatch between prediction and truth: {len(p_show)}")
                if params_predict.show_result.get('top_k', None) is not None:
                    top_k = params_predict.show_result.top_k
                    # TODO: sorting? 1.use tf.math.top_k  2.diff algorithm need to be specified
                    x_show, p_show, y_show = (safe_slice(_, end=top_k) for _ in (x_show, p_show, y_show))
                if len(p_show) > 0:
                    dumps = []
                    for i, p in enumerate(p_show):
                        if not hasattr(y_show, '__len__') or y_show.__len__() <= i:
                            dumps.append(f"{p}")
                        else:
                            dumps.append(f"({p} vs {y_show[i]})")
                    need_to_show = params_predict.show_result.plotter.__len__() > 0
                    need_to_save = params_predict.show_result.save_path.__len__() > 0
                    only_save = params_predict.show_result.only_save
                    if need_to_show or need_to_save:
                        # IMPROVE: use signature to match normalize and `denormalize` routines
                        from modules.data.data_manager import DataManager
                        if hasattr(x_show, "dtype") and x_show.dtype.name.startswith('float'):
                            x_show = DataManager.denormalize(x_show)
                        elif hasattr(x_show, "element_spec") and \
                            hasattr(x_show.element_spec, "dtype") and x_show.element_spec.dtype.name.startswith('float'):
                            x_show = x_show.map(DataManager.denormalize)
                    save_dir, save_paths = None, None
                    if need_to_save:
                        save_dir = path_possibly_formatted(params_predict.show_result.save_path)
                        # save_paths = [osp.join(save_dir, _+'.jpg') for _ in dumps]
                    if params_predict.show_result.plotter == "matplot":
                        onlysave_path = None
                        if only_save:
                            if need_to_save:
                                from helpers.util import tmp_filename_by_time
                                onlysave_path = osp.join(save_dir, tmp_filename_by_time('jpg'))
                                need_to_save = False
                            else:
                                WARN('only_save is true, but save_path is not specified. ignored')
                        show_image_mats(x_show, texts=dumps, title="Predictions", onlysave_path=onlysave_path)
                    else:
                        INFO(f"Predictions{'(only diff)' if 'differences' in vars() else ''}: " + ", ".join(dumps))
                    # if need_to_save:
                    #     save_image_mats(x_show, save_paths)
            else:
                top_k = params_predict.show_result.top_k
                INFO(f"Predictions(top{top_k}): {safe_slice(predictions, end=top_k)}")

        return predictions