Ejemplo n.º 1
0
 def run_task(cls, task_or_coro, new_thread=False, loop=None):
     """
     :param task_or_coro: a coro will be wrapped to a task (in new thread or given loop)
        and then run. a task will be run directly, other args will be ignored_.
     :param new_thread: asynctask will always be created (and run) in a new thread,
       but if you want to spawn another one, set `True` here.
     :param loop: you may choose to create (and run) an asynctask in an existing loop.
     :return: an asynctask, which provides access to `id`, `coro`, `loop`.
     """
     cls.__ensure_init__()
     with cls.__mutex__:
         if new_thread and loop is not None:
             WARN(
                 f'Task run in a new thread will have a new loop. (arg loop ignored: {loop})'
             )
         if asyncio.iscoroutine(task_or_coro):
             if loop is None:
                 loop = cls.__instance__.current_loop if not new_thread else cls.append_new_loop(
                 )
             task = cls.create_task(task_or_coro,
                                    new_thread=new_thread,
                                    loop=loop)
         elif isinstance(task_or_coro, asyncio.Task):
             task = task_or_coro
             task_loop = getattr(task, 'loop', None)
             task_coro = getattr(task, 'coro',
                                 None)  # Py3.8 implemented task.get_coro()
             if task_coro is not None and (new_thread or
                                           (loop is not None
                                            and task_loop is not None
                                            and loop != task_loop)):
                 task = cls.create_task(task_coro,
                                        new_thread=new_thread,
                                        loop=loop)
                 WARN(
                     'Task is requested to run in a new thread or loop. a new task will be created.'
                 )
                 loop = task.loop
             else:
                 loop = task_loop or cls.__instance__.current_loop
             # if new_thread:
             #     WARN('Task is always bound with a loop and cannot be run in new thread. (arg ignored)')
             # if loop is not None and loop != task_or_coro.loop:
             #     WARN(f'Task is always bound with an existing loop. (arg loop ignored: {loop})')
         else:
             raise TypeError(
                 f'Only accept coro object or task, while get a {type(task_or_coro)}'
             )
         # loop.create_task() after loop.run_forever() will not be run, unless activate a `call_soon` for a new batch.
         if getattr(loop, 'id', None) == AsyncLoop.Main:
             # loop.call_soon(task)  # will be pending unless main_loop is running
             loop.run_until_complete(task)
         else:
             loop.call_soon_threadsafe(lambda: {})
         return task
Ejemplo n.º 2
0
 def dispatch_handlers(self,
                       event_name: str,
                       *args,
                       namespace=None,
                       **kwargs):
     """
     :param event_name:
     :param args:
     :param namespace: if None means needn't to compare namespace
     :param kwargs:
     :return:
     """
     handler_results = []
     to_delete = set()
     for _event_name, _namespace, _handler, _is_onetime in self.handlers:
         if _event_name == event_name and (namespace is None
                                           or _namespace == namespace):
             try:
                 # import inspect
                 # DEBUG(f"_handler signature: ({[param.kind.description for param in inspect.signature(_handler).parameters.values()]})")
                 DEBUG(
                     f"[{_event_name}{'@'+(_namespace or '')}] dispatch({args}, {kwargs})"
                 )
                 if _is_onetime:
                     to_delete.add(
                         (_event_name, _namespace, _handler, _is_onetime))
                 handler_result = _handler(*args, **kwargs)
                 handler_results.append(handler_result)
             except Exception as e:
                 WARN(
                     f"Registered handler caused exception ({_event_name}@{_namespace}, "
                     f"which should have been caught in handler side): {e}")
     self.handlers -= to_delete
     return handler_results
Ejemplo n.º 3
0
def preload_gpu_devices(active_indexes: list = None, memory_limit: int = None):
    global __preloaded_gpu___
    if __preloaded_gpu___:
        return
    import tensorflow as tf
    gpus = tf.config.experimental.list_physical_devices('GPU')

    __preloaded_gpu___ = True
    if gpus:
        try:
            if active_indexes is not None:
                for index in active_indexes:
                    tf.config.experimental.set_visible_devices(gpus[index], 'GPU')
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            INFO(f"Num of Physical GPU vs Logical ones: {len(gpus)} vs {len(logical_gpus)}, "
                 f"{len(gpus)-len(logical_gpus)} disabled")
            if memory_limit is None:
                tf.config.experimental.set_memory_growth(gpus[0], True)
                INFO("Physical GPU Memory Growth is turned ON.")
            else:
                tf.config.experimental.set_virtual_device_configuration(gpus[0], [
                    tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memory_limit)])
                INFO(f"Physical GPU Memory Growth is limited under: {memory_limit}")
        except RuntimeError as e:
            # Visible devices must be set before GPUs have been initialized
            ERROR(f"Exception during preload_gpu_devices: {e}")
    else:
        WARN("No physical GPU available.")
Ejemplo n.º 4
0
    def gather_task(cls,
                    *task_or_coros,
                    given_id=None,
                    new_thread=False,
                    loop: asyncio.AbstractEventLoop = None):
        """
        :param task_or_coros:
        :param given_id: task_id will be generally retrieved after task creation. if want to
          use task_id in coro, however, caller may get `new_id()` and send it to `create_task()`.
        :param new_thread: asynctask will always be created (and run) in a new thread,
          but if you want to spawn another one, set `True` here.
        :param loop: you may choose to create (and run) an asynctask in an existing loop.
        :return: an asyncio.Future, which has extra attributes `id`, `coro` and `loop`.
        """
        cls.__ensure_init__()
        with cls.__mutex__:
            if loop is None:
                loop = cls.__instance__.current_loop if not new_thread else cls.append_new_loop(
                )
            last_type = None
            for task_or_coro in task_or_coros:
                last_type = last_type or type(task_or_coro)
                if last_type != type(task_or_coro):
                    raise TypeError(
                        'All items in task_or_coros must have same type.')
            if last_type is asyncio.Task:
                last_task_loop = None
                for task in task_or_coros:
                    last_task_loop = last_task_loop or getattr(
                        task, 'loop', None)
                    if last_task_loop is None or last_task_loop != getattr(
                            task, 'loop', None):
                        raise ValueError('All tasks must have same loop.')
                if loop != last_task_loop:
                    loop = last_task_loop
                    WARN(
                        "Given loop is not same with loop of the tasks and is ignored."
                    )
            task_id = cls.new_id(prefix=getattr(
                loop, 'id', None)) if given_id is None else given_id

            # wrap future as a task. ensure_future() cannot do this.
            async def coro_wait_future(fut):
                await fut

            coroutine = coro_wait_future(
                asyncio.gather(*task_or_coros, loop=loop))
            task = cls.create_task(coroutine,
                                   given_id=task_id,
                                   new_thread=False,
                                   loop=loop)
            cls.__instance__.all_tasks[task_id] = task
            cls.hack_task(task, task_id, coroutine, loop)
            return task
Ejemplo n.º 5
0
 def denormalize(obj, max=255, type_=int):
     if hasmethod(obj, 'max'):
         if obj.max() > 1:
             WARN(
                 f"obj.max exceeds 1.0, no denormalize (nor type cast) will be done."
             )
             return obj
     obj *= max
     if hasattr(obj, 'astype'):
         return obj.astype(type_)  # np.ndarray
     else:
         import tensorflow as tf
         return tf.cast(
             obj, tf.uint8 if type_ is int else tf.float32)  # tf.Tensor
Ejemplo n.º 6
0
def preload_gpu_devices():
    global __preloaded_gpu___
    if __preloaded_gpu___:
        return
    import tensorflow as tf
    gpus = tf.config.experimental.list_physical_devices('GPU')
    __preloaded_gpu___ = True
    if gpus:
        # Restrict TensorFlow to only use the first GPU
        try:
            tf.config.experimental.set_memory_growth(gpus[0], True)
            INFO("Physical GPU Memory Growth is turned ON.")
            tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            INFO(
                f"Num of Physical GPUs: {len(gpus)}, Num of Logical GPU: {len(logical_gpus)}"
            )
        except RuntimeError as e:
            # Visible devices must be set before GPUs have been initialized
            ERROR(f"Exception during preload_gpu_devices: {e}")
    else:
        WARN("No physical GPU available.")
Ejemplo n.º 7
0
    def model_predict(model: object, data: object, **params) -> object:
        params_predict = Params(
            decode_prediction=Params(name='logits_to_index'),
            show_result=Params(top_k=20,
                               only_difference=True)).update_to(params)
        predictions = None
        x, y = ModelManager._validate_input(data)

        import numpy as np
        import tensorflow as tf  # IMPROVE: check availability of ml backends

        # wrapper for different model types
        def _predict(inputs):
            # NOTE: core API for prediction
            if isinstance(model, tf.keras.Model):
                # NOTE: if x is ndarray, result will be ndarray too
                return model.predict(inputs)
            elif callable(model):
                # type(model).__name__ == "tensorflow.python.eager.wrap_function.WrappedFunction"
                if isinstance(inputs, tf.data.Dataset):
                    # IMPROVE: stack result as a tensor
                    result = []
                    for t in inputs:
                        result.append(model(t))
                    return tf.stack(result)
                else:
                    return model(inputs)
            else:
                raise TypeError(f"Unsupported model type: {type(model)}")

        predictions = _predict(x)
        if predictions is None or safe_get_len(predictions) == 0:
            WARN("Predictions is blank.")
            return None

        if params_predict.decode_prediction.is_defined():
            if params_predict.decode_prediction.name == 'logits_to_index':
                # one-hot array -> index
                if isinstance(predictions, np.ndarray):
                    predictions = np.argmax(predictions, axis=-1)
                elif isinstance(predictions, tf.Tensor):
                    predictions = tf.math.argmax(predictions, axis=-1)
                else:
                    raise TypeError(
                        f"Unsupported type for logits_to_index: {type(predictions)}"
                    )
            elif params_predict.decode_prediction.name == 'logits_to_indices_and_probs':
                # for retrain, prediction should be a probs array and need to be sorted by `top_k`
                # NOTE: length of each prediction must be equivalent.
                top_k = params_predict.decode_prediction.get(
                    'top_k', safe_get_len(predictions[0]))
                # returns: top_values(=probs), top_idxs
                if isinstance(predictions, np.ndarray):
                    predictions = np_top_k(predictions, top_k)
                elif isinstance(predictions, tf.Tensor):
                    predictions = tf.math.top_k(input=predictions, k=top_k)
                else:
                    raise TypeError(
                        f"Unsupported type for logits_to_indices_and_probs: {type(predictions)}"
                    )
            else:
                raise ValueError(
                    f"Unsupported result decoding: {params_predict.decode_prediction.name}"
                )
        if predictions is None or safe_get_len(predictions) == 0:
            WARN("Predictions is blank (after decoding).")
            return None

        if params_predict.show_result.is_defined() and isinstance(
                predictions, np.ndarray):
            x_show, p_show, y_show = x, predictions, y  # NOTE: y(=label) is optional (default:None)
            if params_predict.show_result.only_difference:
                if hasattr(y_show, '__len__'):
                    if p_show.__len__() == y_show.__len__():
                        differences = p_show != y_show
                        x_show, p_show, y_show = x_show[differences], p_show[
                            differences], y_show[differences]
                    else:
                        WARN(
                            f"Cannot dump differences: len of targets is not same as predictions"
                            f"({y_show.__len__()} vs {p_show.__len__()})")
                else:
                    WARN(
                        f"Cannot dump differences: unsupported y type(={type(y_show)})"
                    )
                INFO(
                    f"Number of mismatch between prediction and truth: {len(p_show)}"
                )
            if params_predict.show_result.get('top_k', None) is not None:
                top_k = params_predict.show_result.top_k
                # TODO: sorting? 1.use tf.math.top_k  2.diff algorithm need to be specified
                x_show, p_show, y_show = (safe_slice(_, end=top_k)
                                          for _ in (x_show, p_show, y_show))
            if len(p_show) > 0:
                dumps = []
                for i, p in enumerate(p_show):
                    if not hasattr(y_show, '__len__') or y_show.__len__() <= i:
                        dumps.append(f"{p}")
                    else:
                        dumps.append(f"({p} vs {y_show[i]})")
                need_to_show = params_predict.show_result.plotter.__len__() > 0
                need_to_save = params_predict.show_result.save_path.__len__(
                ) > 0
                only_save = params_predict.show_result.only_save
                if need_to_show or need_to_save:

                    def denormalize(x):
                        x = x * 255
                        if hasattr(x, 'astype'):  # np.ndarray
                            return x.astype(np.int32)
                        else:
                            return tf.cast(x, tf.int32)  # tf.Tensor

                    # IMPROVE: use signature to match normalize and `un-normalize` routines
                    if hasattr(
                            x_show,
                            "dtype") and x_show.dtype.name.startswith('float'):
                        x_show = denormalize(x_show)
                    elif hasattr(x_show, "element_spec") and \
                        hasattr(x_show.element_spec, "dtype") and x_show.element_spec.dtype.name.startswith('float'):
                        x_show = x_show.map(denormalize)
                save_dir, save_paths = None, None
                if need_to_save:
                    save_dir = path_possibly_formatted(
                        params_predict.show_result.save_path)
                    # save_paths = [osp.join(save_dir, _+'.jpg') for _ in dumps]
                if params_predict.show_result.plotter == "matplot":
                    onlysave_path = None
                    if only_save:
                        if need_to_save:
                            from helpers.util import tmp_filename_by_time
                            onlysave_path = osp.join(
                                save_dir, tmp_filename_by_time('jpg'))
                            need_to_save = False
                        else:
                            WARN(
                                'only_save is true, but save_path is not specified. ignored'
                            )
                    show_image_mats(x_show,
                                    texts=dumps,
                                    title="Predictions",
                                    onlysave_path=onlysave_path)
                else:
                    INFO(
                        f"Predictions{'(only diff)' if 'differences' in vars() else ''}: "
                        + ", ".join(dumps))
                # if need_to_save:
                #     save_image_mats(x_show, save_paths)
        else:
            top_k = params_predict.show_result.top_k
            INFO(
                f"Predictions(top{top_k}): {safe_slice(predictions, end=top_k)}"
            )
        return predictions
Ejemplo n.º 8
0
    def model_train(model: object, data: object, **params):
        """
        NOTE: Keras常见陷阱:1.Keras先validation_split再shuffle,因此data中如果是负样本排在最后、宜自行先shuffle
        :param model:
        :param data: accept `np.ndarray`, `tf.data.Dataset` or `tf.Tensor`, or a pair of such data if y is available.
        """
        # TODO: confirm if params for tf.Model.compile can be combined with those for tf.Model.fit
        params_train = Params(optimizer='adam',
                              loss='sparse_categorical_crossentropy',
                              metrics=['acc'],
                              validation_split=0.1,
                              epochs=5,
                              batch_size=None,
                              checkpoint=Params(load_weights="latest",
                                                save_weights=Params(
                                                    frequency="epoch",
                                                    max_to_keep=5)),
                              show_result=Params()).update_to(params)
        x_train, y_train = ModelManager._validate_input(data)

        import tensorflow as tf  # IMPROVE: check availability of ml backends
        if isinstance(model, tf.keras.Model):
            # 1.compile and load variables from checkpoint
            model.compile(
                **params_train.fromkeys(['optimizer', 'loss', 'metrics']))
            # CKPT signatures: "tf.train.Checkpoint.restore", "tf.keras.Model.load_weights"
            ckpt_dir, ckpt_path_to_load = None, None
            if params_train.checkpoint.format == "CKPT_dir":
                from config import __abspath__
                ckpt_dir = path_possibly_formatted(
                    params_train.checkpoint.path)
                ckpt_dir = __abspath__(
                    ckpt_dir) if not osp.isabs(ckpt_dir) else ckpt_dir
                ensure_dir_exists(ckpt_dir)
                ckpt_path_to_load = tf.train.latest_checkpoint(ckpt_dir)
            # NOTE: 当使用delayed-build模式时,仅当调用build(batch_input_shape)或compile()+fit(x,y,batch_size)后才能确定weights
            #  ref:https://www.tensorflow.org/api_docs/python/tf/keras/Sequential
            if params_train.checkpoint.load_weights == "latest" \
                    and params_train.checkpoint.signature == "tf.keras.Model.load_weights" \
                    and ckpt_path_to_load is not None:
                model.load_weights(ckpt_path_to_load)

            # 2.prepare callbacks
            callbacks = []
            # callback :: save medium CKPT
            if params_train.checkpoint.save_weights.is_defined(
            ) and ckpt_dir is not None:
                ckpt_path_to_save = osp.join(
                    ckpt_dir, "ckpt.{epoch:02d}-{val_loss:.2f}")
                # NOTE: if save_freq is not equal to 'epoch', which means num of steps, it's will be less reliable
                _params = Params(save_freq='epoch').left_join(
                    params_train.checkpoint.save_weights,
                    key_map={"save_freq": "frequency"})
                _callback = tf.keras.callbacks.ModelCheckpoint(
                    ckpt_path_to_save,  # not checkpoint_dir
                    save_weights_only=True,
                    save_best_only=True,
                    verbose=1,
                    **_params)
                callbacks.append(_callback)
            # callback :: early stop
            if params_train.early_stop.is_defined():
                _params = Params(monitor='val_loss', patience=10).left_join(
                    params_train.early_stop)
                _callback = tf.keras.callbacks.EarlyStopping(**_params)
                callbacks.append(_callback)
            # callback :: progress indicator / verbose
            # IMPROVE: use config for training verbose / progress indicator
            # _callback = tf.keras.callbacks.ProgbarLogger(count_mode='steps', stateful_metrics=None) # PyTest时不能实时输出
            _callback = tf.keras.callbacks.LambdaCallback(
                on_batch_end=lambda batch, logs: INFO(
                    f"batch{batch:05d}: loss={logs.get('loss',None):.4f},acc={logs.get('acc',None):.4f}"
                ))
            callbacks.append(_callback)
            cb_batch_stats = None
            if params_train.collect_batch_stats:
                # when only train several epochs, may collect stats of each batch instead of the epoch average.
                class CallbackCollectBatchStats(tf.keras.callbacks.Callback):
                    def __init__(self):
                        self.current_batch = 0
                        self.batch = []
                        self.loss = []
                        self.acc = []

                    def on_train_batch_end(self, batch, logs=None):
                        self.batch.append(self.current_batch)
                        self.loss.append(logs['loss'])
                        self.acc.append(logs['acc'])
                        self.model.reset_metrics()
                        self.current_batch += 1

                cb_batch_stats = CallbackCollectBatchStats(
                )  # TODO: can plot batch_lsses and batch_acc using this
                callbacks.append(cb_batch_stats)
            if len(callbacks) == 0:
                callbacks = None

            # 3.train the model, and save checkpoints if configured
            # TODO: use model.fit_generator() for batch feeding. `steps_per_epoch` = np.ceil(samples / param.batch_size)
            # NOTE: core API for model training
            params_train_fit = params_train.fromkeys(
                ['validation_split', 'batch_size', 'epochs'])
            INFO(f"Beginning to train: {params_train_fit}")
            history = model.fit(x_train,
                                y_train,
                                **params_train_fit,
                                callbacks=callbacks)  # == core ==
            if cb_batch_stats is not None:
                history.history[
                    'batch'] = cb_batch_stats.batch  # accumulated batch number through epoches
                history.history['batch_loss'] = cb_batch_stats.loss
                history.history['batch_acc'] = cb_batch_stats.acc

            # 4.save checkpiont at last
            if params_train.save_model.is_defined() and ckpt_dir is not None:
                _params = Params(format="SavedModel").left_join(
                    params_train.save_model)
                save_format, ckpt_path_to_save = None, None
                if _params.format == "HDF5":
                    save_format = _ext = "h5"
                    ckpt_path_to_save = osp.join(ckpt_dir,
                                                 f"model_trained.{_ext}")
                else:  # default=SavedModel
                    save_format = "tf"
                    ckpt_path_to_save = osp.join(ckpt_dir, f"model_trained")
                    ensure_dir_exists(ckpt_path_to_save)
                # IMPROVE: consider using tf.saved_model.save()
                model.save(ckpt_path_to_save, save_format=save_format
                           )  # by default, TF2 saves as 'tf' (SavedModel)

            # Optional: output history
            if params_train.show_result.is_defined():
                plot_history = None
                if params_train.show_result.plotter == 'matplot':
                    from helpers.plt_helper import plot_history_by_metrics as plot_history
                if params_train.show_result.plotter.__len__(
                ) > 0 and plot_history is None:
                    WARN(
                        f"Unsupported history plotter: {params_train.show_result.plotter}"
                    )
                if plot_history is not None:
                    plot_history(history,
                                 params_train.show_result.get('metrics', None))
                else:
                    # TODO: check this section
                    hist = history.history
                    INFO(
                        f"Last epoch: "
                        f"ACC(train,val)=({hist['accuracy'][-1]}, {hist['val_accuracy'][-1]}), "
                        f"MSE(train,val)=({hist['mse'][-1]}, {hist['val_mse'][-1]})"
                    )
        else:
            raise TypeError(f"Unsupported model type: {type(model)}")

        return model
Ejemplo n.º 9
0
    def load_data(data_signature: str,
                  category="all",
                  meta_info=None,
                  **params) -> object:
        """
        :param data_signature:
        :param category: 'train', 'test' or 'all'
        :param meta_info: if given as a dict, caller may get meta info of the dataset through it
        :param params:
        :return: if `category`='all', 'train' and 'test' dataset will be returned as a tuple
        """
        data = None
        params_data = Params(timeout=0,
                             need_shuffle=False,
                             shuffle_seed=None,
                             test_split=0.2,
                             decode_x=Params(colormode=None,
                                             resize_w=None,
                                             resize_h=None,
                                             preserve_aspect_ratio=True,
                                             normalize=True,
                                             reshape=None),
                             decode_y=Params()).update_to(params)
        if data_signature == _DataSignature.LabeledFolders.signature:
            params_data = Params(
                file_exts=['jpg'],
                labels_ordered_in_train=None).update_to(params_data)
            import modules.data.dataset_labeled_folders as dataset_labeled_folders
            # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.LabeledFolders)
            path = DataManager._validate_path(params_data.path)
            ds = dataset_labeled_folders.dataset(path,
                                                 category=category,
                                                 meta_info=meta_info,
                                                 **params_data)
            DEBUG(f"loaded tf.data.Dataset: {ds}")
            data = ds
        elif data_signature == _DataSignature.TFKerasDataset.signature:
            # TODO: extract as modules.data.dataset_tf_keras_dataset :: dataset(name, **params)
            from importlib import import_module
            # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.TFKerasDataset)
            lib_dataset = import_module(
                f"tensorflow.keras.datasets.{params_data.name}")
            (x_train, y_train), (x_test,
                                 y_test) = lib_dataset.load_data()  # Tensors
            WARN(
                f"Keras dataset {params_data.name} loaded as is. Ignored configs: colormode, resize_w/h, preserve_aspect_ratio"
            )
            if params_data.decode_x.normalize:
                x_train, x_test = x_train / 255.0, x_test / 255.0
            if params_data.decode_x.reshape.__len__() > 0:
                # TODO: decode_x reshape means image reshape, not matrix reshape
                x_train = x_train.reshape(params_data.decode_x.reshape)
                x_test = x_test.reshape(params_data.decode_x.reshape)
            DEBUG(f"loaded data: y_train={y_train}, y_test={y_test}")
            if category == 'all':
                data = ((x_train, y_train), (x_test, y_test))
            elif category == 'train':
                data = (x_train, y_train)
            elif category == 'test':
                data = (x_test, y_test)
            else:
                raise ValueError(f"Unknown category: {category}")
            # IGNORED: meta_info returns no value. test_split has no use. fixed_seed not used.
        elif data_signature == _DataSignature.SingleFile.signature:
            path = DataManager._validate_path(params_data.path)
            params_decode = Params(encoding='jpg',
                                   colormode=None,
                                   reshape=None,
                                   preserve_aspect_ratio=True,
                                   color_transform=None,
                                   normalize=True).left_join(
                                       params_data.decode_x)
            data = DataManager._process_files(path, **params_decode)
        elif data_signature == _DataSignature.UI_Copy_Files.signature:
            params_decode = Params(encoding='jpg',
                                   colormode=None,
                                   reshape=None,
                                   preserve_aspect_ratio=True,
                                   color_transform=None,
                                   normalize=True).left_join(
                                       params_data.decode_x)

            def _process(event_type, abspath_or_list):
                nonlocal data
                INFO(f"clipboard event: path={abspath_or_list}")
                data = DataManager._process_files(abspath_or_list,
                                                  **params_decode)

            from helpers.qt_helper import ClipboardMonitor
            monitor_type = "Path_File" if params_data.format == "Path" else "PathList"

            # NOTE: use AsyncTask to impl async clipboard monitoring loop.
            # data = ClipboardMonitor([monitor_type]).run(_process, True)  #<- will get blank result on a fault copy
            from async_ import AsyncLoop, AsyncManager

            async def coro_clipboard_monitor():
                ClipboardMonitor([monitor_type]).run(_process, onetime=True)

            task = AsyncManager.run_task(coro_clipboard_monitor(),
                                         loop=None)  # block current loop
            DEBUG(
                f"[input_loop] monitoring clipboard with type {monitor_type} ..."
            )

            # wait until task done TODO: impl a context_manager for simple await
            import asyncio
            loop = asyncio.get_event_loop()  # block current loop

            async def coro_simple_wait(timeout=None):
                while data is None:  # IMPROVE: implement timeout. maybe wait_for(this_task)
                    await asyncio.sleep(1)

            loop.run_until_complete(coro_simple_wait(timeout=None))

        elif data_signature == _DataSignature.UI_Web_Files.signature:
            # path = DataManager._validate_path(params_data.path)
            params_decode = Params(encoding='jpg',
                                   colormode=None,
                                   reshape=None,
                                   preserve_aspect_ratio=True,
                                   color_transform=None,
                                   normalize=True).left_join(
                                       params_data.decode_x)
            data = None

            webapp = ensure_web_app(
            )  # will load config from Path.DeployConfigAbs
            INFO(
                f'waiting for data input from web app {webapp.host}:{webapp.port}'
            )  # IMPROVE: hint upload url
            from async_ import AsyncLoop, AsyncManager, amend_blank_cbs
            from helpers.util import track_entry_and_exit, load_image_mat, async_show_image_mats
            import asyncio
            this_task: asyncio.Task or None = None

            @track_entry_and_exit.coro()
            async def coro_consume_files(abspath_or_list, cbs):
                # nonlocal this_task
                # assert this_task is not None, '`this_task` should have been assigned before entering related coro.'

                import modules.data.decode_tf as decode_tf
                import tensorflow as tf

                DEBUG(f'[coro_consume_inputs]: {locals()}')
                on_done, on_succeeded, on_failed, on_progress = amend_blank_cbs(
                    cbs)
                filepaths = abspath_or_list if isinstance(
                    abspath_or_list, list) else [abspath_or_list]
                result = {
                }  # data: tf.data.Dataset::{image_t}, error: optional(str)

                # from helpers.tf_helper import image_example
                # IMPROVE: try to use TFRecordDataset.from_tensors([tf_example])
                data = DataManager._process_files(filepaths, **params_decode)

                result.update({'data': data})
                # # if show inputs
                # try:
                #     asynctask = async_show_image_mats(image_mats)
                #     result.update({'asynctask_id': asynctask.id})
                # except Exception as e:
                #     result.update({'error': e.__repr__()})
                on_done(result)
                # TODO: how to link to the next task (e.g. model.predict) so user can monitor process.
                return result  # == this_task.set_result(result)

            def on_done_consume_inputs(result):
                """
                If using task.set_result, set_exception etc and wait for task instead of data,
                callbacks will be optional.
                """
                nonlocal data
                INFO(f'on_done_consume_inputs: {result}')
                data = result.get('data', None)

            @webapp.on_uploads(namespace="data_manager::ui_web_files",
                               onetime=True)
            def handle_ui_web_files(abspath_or_list):
                nonlocal this_task
                this_task = AsyncManager.run_task(
                    coro_consume_files(abspath_or_list,
                                       (on_done_consume_inputs, )))
                handler_result = {'asynctask_id': this_task.id}
                return handler_result

            # wait until get data uploaded
            import asyncio
            loop = asyncio.get_event_loop()  # block current loop

            async def coro_simple_wait(timeout=None):
                while data is None:  # IMPROVE: implement timeout. maybe wait_for(this_task)
                    await asyncio.sleep(1)

            loop.run_until_complete(coro_simple_wait(timeout=None))
            pass
        else:
            raise ValueError(f"Unsupported data signature: {data_signature}")
        # TODO: consider shuffle, repeat(epoch), batch(batch_size), prefetch(1) for train/predict, use tf.data.Database
        #   data can be tf.Dataset, np.ndarray, or tuple of them. Do this job in each signature handler.
        # tf = safe_import_module("tensorflow")
        # if tf and isinstance(data, tf.data.Dataset):
        #     if params_data.shuffle.fixed_seed:
        #         data.shuffle(buffer_size=10000, seed=params_data.shuffle.fixed_seed)
        return data
Ejemplo n.º 10
0
    def _slot_Clipboard_OnChanged(self):
        try:
            # --- Only One Instance can be run
            if self.IsInCallback and self.IsBlockInCallback:
                return
            self.IsInCallback = True

            # --- Check Captured Data
            tMimeData: QtCore.QMimeData = self.QtClipboard.mimeData()

            # [A] Urls ---
            if len(tMimeData.urls()) > 0:
                # --- Get List
                tPathList = []
                for tUrl in tMimeData.urls():
                    tUrl: QtCore.QUrl = tUrl
                    tPath: str = tUrl.toLocalFile()
                    # tPath = tPath.replace("/", "\\")  # os compatibility
                    tPath = tPath.replace(r'\/'.replace(os.sep, ''), os.sep)

                    # Check exist
                    if (not os.path.isfile(tPath)) and (not os.path.isdir(tPath)):
                        continue
                    tPathList.append(tPath)

                # --- Distribute Message
                if "Path_File" in self.Mode:
                    for iPath in tPathList:
                        if os.path.isfile(iPath):
                            self.CallResults = self.Callback("Path_File", iPath)

                if "PathList" in self.Mode:
                    self.CallResults = self.Callback("PathList", tPathList)

                if "Path" in self.Mode:
                    for tPath in tPathList:
                        self.CallResults = self.Callback("Path", tPath)

                if "Image" in self.Mode:
                    # --- CV Import
                    from .. import OpenCV as LuPy_Cv2
                    for tPath in tPathList:
                        tExt: str = os.path.splitext(tPath)[-1]
                        if tExt.lower() in [".bmp", ".jpg", ".png"]:
                            tMat = LuPy_Cv2.safe_imread(tPath, -1)
                            self.CallResults = self.Callback("Image", tMat)

            # [B] Image Data
            if tMimeData.hasImage():
                tQImage = tMimeData.imageData()  # RGB32 with 0xffRRGGBB ===> BGRA in np
                tPtr = tQImage.constBits()
                tPtr.setsize(tQImage.byteCount())
                tMat = np.ndarray(buffer=tPtr, shape=[tQImage.height(), tQImage.width(), 4], dtype=np.uint8)

                # --- XOR
                if "Screen" in self.Mode:
                    self.CallResults = self.Callback("Image", tMat)

                elif "Image" in self.Mode:
                    self.CallResults = self.Callback("Image", tMat)

        except Exception as e:
            WARN(f'Exception during clipboard event handling: {e}')
        finally:
            self.IsInCallback = False
            if self.IsRunOnce:
                self.Stop()
        pass
Ejemplo n.º 11
0
    def _slot_Clipboard_OnChanged(self):
        try:
            # --- Only One Instance can be run
            if self.IsInCallback and self.IsBlockInCallback:
                return
            self.IsInCallback = True

            # --- Check Captured Data
            tMimeData: QtCore.QMimeData = self.QtClipboard.mimeData()

            # [A] Urls ---
            if len(tMimeData.urls()) > 0:
                # --- Get List
                tPathList = []
                for tUrl in tMimeData.urls():
                    tUrl: QtCore.QUrl = tUrl
                    tPath: str = tUrl.toLocalFile()
                    tPath = tPath.replace(r'\/'.replace(os.sep, ''), os.sep)

                    # Check exist
                    if (not os.path.isfile(tPath)) and (
                            not os.path.isdir(tPath)):
                        continue
                    tPathList.append(tPath)

                # --- Distribute Message
                if len(tPathList) > 0:
                    if "Path_File" in self.Mode:
                        for tPath in tPathList:
                            if os.path.isfile(tPath):
                                self.CallResults = self.Callback(
                                    "Path_File", tPath)

                    if "PathList" in self.Mode:
                        self.CallResults = self.Callback("PathList", tPathList)

                    if "Path" in self.Mode:
                        for tPath in tPathList:
                            self.CallResults = self.Callback("Path", tPath)

            # [B] Image Data
            if tMimeData.hasImage():
                tQImage = tMimeData.imageData(
                )  # RGB32 with 0xffRRGGBB ===> BGRA in np
                tPtr = tQImage.constBits()
                tPtr.setsize(tQImage.byteCount())
                tMat = np.ndarray(buffer=tPtr,
                                  shape=[tQImage.height(),
                                         tQImage.width(), 4],
                                  dtype=np.uint8)

                if "Image" in self.Mode:
                    self.CallResults = self.Callback("Image", tMat)

        except Exception as e:
            WARN(f'Exception during clipboard event handling: {e}')
        finally:
            self.IsInCallback = False
            if self.IsRunOnce and self.CallResults is not self.__class__.FLAG_NO_RESULT:
                self.stop()  # only stop after a successful handling
        pass
Ejemplo n.º 12
0
    def model_predict(model: object, data: object, **params) -> object:
        params_predict = Params(decode_prediction=Params({}), show_result=Params({})).update_to(params)
        predictions = None
        x, y = ModelManager._validate_input(data)

        import numpy as np
        import tensorflow as tf  # IMPROVE: check availability of ml backends
        # wrapper for different model types
        def _predict(inputs):
            # NOTE: core API for prediction
            if isinstance(model, tf.keras.Model):
                # NOTE: if x is ndarray, result will be ndarray too
                return model.predict(inputs)
            elif callable(model):
                # type(model).__name__ == "tensorflow.python.eager.wrap_function.WrappedFunction"
                input_spec = Params(input_num=None).left_join(params_predict)
                params = {}
                # IMPROVE: judge the base class of model, to append required params
                if model.__module__.startswith("modules.models.tensorlayer"):
                    params.update({'is_train': False})
                if isinstance(inputs, tf.data.Dataset):
                    # TODO: specify InputSpec (inputs element_spec) for prediction
                    if input_spec.input_num is None:
                        pass
                    elif isinstance(input_spec.input_num, int):
                        assert input_spec.input_num > 0, "input_num must > 0"
                        # TODO: more test cases needed
                        # inputs.batch(input_spec.input_num)
                        # result = []
                        # assert iterable(inputs)
                        # for batch in inputs:
                        #     inputs_list = [_ for _ in batch]
                        #     inputs_list = inputs_list[0] if len(inputs_list) == 1 else inputs_list
                        #     result.append(model(inputs_list, **params))
                        # # return tf.stack(result)
                        # return result[0] if len(result) == 1 else None if len(result) == 0 else result
                        if input_spec.input_num > 1:
                            inputs = inputs.batch(
                                input_spec.input_num)  # NOTE: headed with a `batch_size` dim by this step
                            # inputs = inputs.unbatch()
                    else:
                        raise ValueError(f'cannot handle input_spec.input_num={input_spec.input_num}')
                    # NOTE: callable model might not support batch feeding. so it's up to caller to constrain the size.
                    result = []
                    for inputs_ in inputs.as_numpy_iterator():
                        result.append(model(inputs_, **params))  # NOTE: if input_num > 1
                    return result[0] if len(result) == 1 else None if len(result) == 0 else result
                else:
                    result = model(inputs, **params)
                    return result
            else:
                raise TypeError(f"Unsupported model type: {type(model)}")

        predictions = _predict(x)
        if predictions is None or safe_get_len(predictions) == 0:
            WARN("Predictions is blank.")
            return predictions  # None

        if params_predict.decode_prediction.is_defined():
            if params_predict.decode_prediction.name == 'logits_to_index':
                # one-hot array -> index
                if isinstance(predictions, np.ndarray):
                    predictions = np.argmax(predictions, axis=-1)
                elif isinstance(predictions, tf.Tensor):
                    predictions = tf.math.argmax(predictions, axis=-1)
                else:
                    raise TypeError(f"Unsupported type for logits_to_index: {type(predictions)}")
            elif params_predict.decode_prediction.name == 'logits_to_indices_and_probs':
                # for retrain, prediction should be a probs array and need to be sorted by `top_k`
                # NOTE: length of each prediction must be equivalent.
                top_k = params_predict.decode_prediction.get('top_k', safe_get_len(predictions[0]))
                # returns: top_values(=probs), top_idxs
                if isinstance(predictions, np.ndarray):
                    predictions = np_top_k(predictions, top_k)
                elif isinstance(predictions, tf.Tensor):
                    predictions = tf.math.top_k(input=predictions, k=top_k)
                else:
                    raise TypeError(f"Unsupported type for logits_to_indices_and_probs: {type(predictions)}")
            elif params_predict.decode_prediction.name == 'image_denormalize':
                from modules.data.data_manager import DataManager
                predictions = DataManager.denormalize(predictions)
            else:
                raise ValueError(
                    f"Unsupported result decoding: {params_predict.decode_prediction.name}")
        if predictions is None or safe_get_len(predictions) == 0:
            WARN("Predictions is blank (after decoding).")
            return None

        if params_predict.show_result.is_defined():
            if isinstance(predictions, np.ndarray):
                # IMPROVE: support `show_result.inputs_type/outputs_type` e.g.'images''features''label_indexes'
                x_show, p_show, y_show = x, predictions, y  # NOTE: y(=label) is optional (default:None)
                if params_predict.show_result.only_difference:
                    if hasattr(y_show, '__len__'):
                        if p_show.__len__() == y_show.__len__():
                            differences = p_show != y_show
                            x_show, p_show, y_show = x_show[differences], p_show[differences], y_show[differences]
                        else:
                            WARN(f"Cannot dump differences: len of targets is not same as predictions"
                                 f"({y_show.__len__()} vs {p_show.__len__()})")
                    else:
                        WARN(f"Cannot dump differences: unsupported y type(={type(y_show)})")
                    INFO(f"Number of mismatch between prediction and truth: {len(p_show)}")
                if params_predict.show_result.get('top_k', None) is not None:
                    top_k = params_predict.show_result.top_k
                    # TODO: sorting? 1.use tf.math.top_k  2.diff algorithm need to be specified
                    x_show, p_show, y_show = (safe_slice(_, end=top_k) for _ in (x_show, p_show, y_show))
                if len(p_show) > 0:
                    dumps = []
                    for i, p in enumerate(p_show):
                        if not hasattr(y_show, '__len__') or y_show.__len__() <= i:
                            dumps.append(f"{p}")
                        else:
                            dumps.append(f"({p} vs {y_show[i]})")
                    need_to_show = params_predict.show_result.plotter.__len__() > 0
                    need_to_save = params_predict.show_result.save_path.__len__() > 0
                    only_save = params_predict.show_result.only_save
                    if need_to_show or need_to_save:
                        # IMPROVE: use signature to match normalize and `denormalize` routines
                        from modules.data.data_manager import DataManager
                        if hasattr(x_show, "dtype") and x_show.dtype.name.startswith('float'):
                            x_show = DataManager.denormalize(x_show)
                        elif hasattr(x_show, "element_spec") and \
                            hasattr(x_show.element_spec, "dtype") and x_show.element_spec.dtype.name.startswith('float'):
                            x_show = x_show.map(DataManager.denormalize)
                    save_dir, save_paths = None, None
                    if need_to_save:
                        save_dir = path_possibly_formatted(params_predict.show_result.save_path)
                        # save_paths = [osp.join(save_dir, _+'.jpg') for _ in dumps]
                    if params_predict.show_result.plotter == "matplot":
                        onlysave_path = None
                        if only_save:
                            if need_to_save:
                                from helpers.util import tmp_filename_by_time
                                onlysave_path = osp.join(save_dir, tmp_filename_by_time('jpg'))
                                need_to_save = False
                            else:
                                WARN('only_save is true, but save_path is not specified. ignored')
                        show_image_mats(x_show, texts=dumps, title="Predictions", onlysave_path=onlysave_path)
                    else:
                        INFO(f"Predictions{'(only diff)' if 'differences' in vars() else ''}: " + ", ".join(dumps))
                    # if need_to_save:
                    #     save_image_mats(x_show, save_paths)
            else:
                top_k = params_predict.show_result.top_k
                INFO(f"Predictions(top{top_k}): {safe_slice(predictions, end=top_k)}")

        return predictions