Ejemplo n.º 1
0
            async def coro_consume_files(abspath_or_list, cbs):
                # nonlocal this_task
                # assert this_task is not None, '`this_task` should have been assigned before entering related coro.'

                import modules.data.decode_tf as decode_tf
                import tensorflow as tf

                DEBUG(f'[coro_consume_inputs]: {locals()}')
                on_done, on_succeeded, on_failed, on_progress = amend_blank_cbs(
                    cbs)
                filepaths = abspath_or_list if isinstance(
                    abspath_or_list, list) else [abspath_or_list]
                result = {
                }  # data: tf.data.Dataset::{image_t}, error: optional(str)

                # from helpers.tf_helper import image_example
                # IMPROVE: try to use TFRecordDataset.from_tensors([tf_example])
                data = DataManager._process_files(filepaths, **params_decode)

                result.update({'data': data})
                # # if show inputs
                # try:
                #     asynctask = async_show_image_mats(image_mats)
                #     result.update({'asynctask_id': asynctask.id})
                # except Exception as e:
                #     result.update({'error': e.__repr__()})
                on_done(result)
                # TODO: how to link to the next task (e.g. model.predict) so user can monitor process.
                return result  # == this_task.set_result(result)
Ejemplo n.º 2
0
 def dispatch_handlers(self,
                       event_name: str,
                       *args,
                       namespace=None,
                       **kwargs):
     """
     :param event_name:
     :param args:
     :param namespace: if None means needn't to compare namespace
     :param kwargs:
     :return:
     """
     handler_results = []
     to_delete = set()
     for _event_name, _namespace, _handler, _is_onetime in self.handlers:
         if _event_name == event_name and (namespace is None
                                           or _namespace == namespace):
             try:
                 # import inspect
                 # DEBUG(f"_handler signature: ({[param.kind.description for param in inspect.signature(_handler).parameters.values()]})")
                 DEBUG(
                     f"[{_event_name}{'@'+(_namespace or '')}] dispatch({args}, {kwargs})"
                 )
                 if _is_onetime:
                     to_delete.add(
                         (_event_name, _namespace, _handler, _is_onetime))
                 handler_result = _handler(*args, **kwargs)
                 handler_results.append(handler_result)
             except Exception as e:
                 WARN(
                     f"Registered handler caused exception ({_event_name}@{_namespace}, "
                     f"which should have been caught in handler side): {e}")
     self.handlers -= to_delete
     return handler_results
Ejemplo n.º 3
0
def async_preload_gpu_devices():
    """
    Preload in another loop/thread, hopefully call this during waiting for user inputs or other waiting period.
    """
    # IMPROVE: needn't to run in an aysncio loop (host in a new thread), to run in a new thread is enough.
    from async_ import AsyncLoop, AsyncManager

    async def coro_simple_run(): preload_gpu_devices()
    loop = AsyncManager.get_loop(AsyncLoop.DataProcess)
    DEBUG(f"[tensorflow] preload gpu devices in another thread...")
    task = AsyncManager.run_task(coro_simple_run(), loop=loop)
    return task
Ejemplo n.º 4
0
    def async_run(self, **params):
        """
        Generally we need to launch web app in another loop/thread, to not block ML operations.
        """
        webapp = self

        # IMPROVE: web app need not to run in an aysncio loop (host in a new thread), to run in a new thread is enough.
        from async_ import AsyncLoop, AsyncManager

        async def coro_webapp_run():
            webapp.run(**params)

        webapp_loop = AsyncManager.get_loop(AsyncLoop.WebApp)
        task = AsyncManager.run_task(coro_webapp_run(), loop=webapp_loop)
        DEBUG(
            f"[webapp_loop] listening to port {params.get('port', '<unknown>')} ..."
        )
        return task
Ejemplo n.º 5
0
    def load_model(model_signature: str, **params) -> object:
        """
        NOTE: Keras常见陷阱:1.TF卷积核与Theano卷积核shape相同,加载时需用测试样本验证其表现,Keras无法区别
        :param model_signature:
        :param params:
        """
        model = None
        inputs, outputs = {}, {}  # {name: shape} dicts
        if model_signature == _ModelSignature.TFSavedModel.signature:
            import tensorflow as tf  # IMPROVE: check availability of ml backends
            # format_ = ModelManager._validate_format(params['format'], _ModelSignature.TFSavedModel)
            path = ModelManager._validate_path(params.get('path', None))
            model = tf.saved_model.load(path, params.get('tags',
                                                         None))  # == core ==
            if params.get('signature_', None) is not None:
                model = model.signatures[params['signature_']]
            # TODO: append inputs, outputs spec to model object? so that predict() can adapt the fed inputs
            if hasattr(model, 'inputs') and hasattr(model,
                                                    'structured_outpus'):
                inputs = {model.inputs[0].name: model.inputs[0].shape}
                outputs = {
                    'default': model.structured_outputs['default']
                }  # IMPROVE: iterate
            pass
        elif model_signature == _ModelSignature.TFHub_KerasLayer.signature:
            import tensorflow_hub as tf_hub
            # format_ = ModelManager._validate_format(params['format'], _ModelSignature.TFSavedModel)
            path = ModelManager._validate_path(params.get('path', None))
            params_model = Params(input_shape=None,
                                  trainable=False).update_to(params)
            if params_model.input_shape.__len__() == 4:
                params_model.input_shape = params_model.input_shape[1:]
            # NOTE: it will be delayed-build pattern when `input_shape` is None. no weights info available until build.
            model = tf_hub.KerasLayer(path,
                                      input_shape=params_model.input_shape)
            model.trainable = params_model.trainable
            pass
        elif model_signature == _ModelSignature.KerasSequential.signature:
            # IMPROVE: check availability of ml backends
            from tensorflow.keras import Sequential, layers
            name = params['name']
            # IMPROVE:parse name -> layers, or use structural config for iteration
            if name == '{conv-pool}*2-flat-dense-drop-dense':
                # NOTE: only for _test_\TF_1x_to_2x_3, output is len=10 logits
                model = Sequential([
                    # NOTE: 1.TF2.x已无需限定Input层的维度,甚至各层间都能自动衔接
                    #      2.Conv层中无需设定上一层的(h,w),只需设定filter数、kernel维度、padding(使h,w保持)等
                    #      3.但若不指定input_shape,Optimizer将无法加载其之前被保存的参数,只能重新初始化
                    layers.Conv2D(32, (5, 5),
                                  strides=(1, 1),
                                  padding='same',
                                  activation='relu'),
                    layers.MaxPooling2D(pool_size=(2, 2),
                                        strides=(2, 2),
                                        padding='same'),
                    layers.Conv2D(64, (5, 5),
                                  strides=(1, 1),
                                  padding='same',
                                  activation='relu'),
                    layers.MaxPooling2D(pool_size=(2, 2),
                                        strides=(2, 2),
                                        padding='same'),
                    layers.Flatten(),  # 下面的神经网络需要1维的数据
                    layers.Dense(1024, activation='relu'),
                    layers.Dropout(0.5),  # TODO: 关闭Dropout @evluate,predict
                    layers.Dense(10, activation='softmax')
                ])
            elif name == 'dense-dense_softmax':
                params_model = Params(embedding_size=1024,
                                      class_count=None).update_to(params)
                if params_model.class_count is None:
                    raise ValueError('class_count must be specified')
                model = Sequential([
                    layers.Dense(params_model.embedding_size,
                                 activation='relu'),
                    layers.Dense(params_model.class_count,
                                 activation='softmax')
                ])
                # TODO: need to return intermediate tf.Tensor required by embedding, loss calculation and evaluation.
            else:
                raise ValueError(f"Undefined model: {name}")
            pass
        elif model_signature == _ModelSignature.KerasModels_LoadModel.signature:
            import tensorflow as tf  # IMPROVE: check availability of ml backends
            format_ = ModelManager._validate_format(
                params['format'], _ModelSignature.KerasModels_LoadModel)
            params_model = Params(path='', path_formatted='').update_to(params)
            path = ModelManager._validate_path(params_model.path)
            model = tf.keras.models.load_model(path)  # == core ==
        elif model_signature == _ModelSignature.TF_ImportGraphDef.signature:
            import tensorflow as tf  # IMPROVE: check availability of ml backends
            format_ = ModelManager._validate_format(
                params['format'], _ModelSignature.TF_ImportGraphDef)
            params_model = Params(inputs='', outputs='').update_to(params)
            path = ModelManager._validate_path(params_model.path)

            # import PB model (frozen) in TF2.x. ref:https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
            # ref:https://www.tensorflow.org/api_docs/python/tf/compat/v1/wrap_function
            def wrap_frozen_graph(pb_path, inputs, outputs, prefix=""):
                def _imports_graph_def():
                    tf.compat.v1.import_graph_def(
                        graph_def,
                        name=prefix)  # turn off the default prefix "import/"

                graph_def = tf.compat.v1.GraphDef()
                loaded = graph_def.ParseFromString(open(
                    pb_path, 'rb').read())  # == core ==
                wrapped_import = tf.compat.v1.wrap_function(
                    _imports_graph_def, [])  # == core ==
                import_graph = wrapped_import.graph
                return wrapped_import.prune(
                    tf.nest.map_structure(import_graph.as_graph_element,
                                          inputs),
                    tf.nest.map_structure(import_graph.as_graph_element,
                                          outputs))

            model = wrap_frozen_graph(path,
                                      inputs=params_model.inputs,
                                      outputs=params_model.outputs)
            test_img = tf.ones(
                [1, 224, 224, 3],
                dtype=tf.float32)  # fixed shape is for test ONLY
            DEBUG(f"wrap_func test result: {model(test_img).shape}")
        else:
            raise ValueError(f"Unsupported model signature: {model_signature}")
        INFO(f"type of loaded model={type(model)}")
        INFO(f"  inputs={inputs}, outputs={outputs}")
        return model
Ejemplo n.º 6
0
    def load_data(data_signature: str,
                  category="all",
                  meta_info=None,
                  **params) -> object:
        """
        :param data_signature:
        :param category: 'train', 'test' or 'all'
        :param meta_info: if given as a dict, caller may get meta info of the dataset through it
        :param params:
        :return: if `category`='all', 'train' and 'test' dataset will be returned as a tuple
        """
        data = None
        params_data = Params(timeout=0,
                             need_shuffle=False,
                             shuffle_seed=None,
                             test_split=0.2,
                             decode_x=Params(colormode=None,
                                             resize_w=None,
                                             resize_h=None,
                                             preserve_aspect_ratio=True,
                                             normalize=True,
                                             reshape=None),
                             decode_y=Params()).update_to(params)
        if data_signature == _DataSignature.LabeledFolders.signature:
            params_data = Params(
                file_exts=['jpg'],
                labels_ordered_in_train=None).update_to(params_data)
            import modules.data.dataset_labeled_folders as dataset_labeled_folders
            # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.LabeledFolders)
            path = DataManager._validate_path(params_data.path)
            ds = dataset_labeled_folders.dataset(path,
                                                 category=category,
                                                 meta_info=meta_info,
                                                 **params_data)
            DEBUG(f"loaded tf.data.Dataset: {ds}")
            data = ds
        elif data_signature == _DataSignature.TFKerasDataset.signature:
            # TODO: extract as modules.data.dataset_tf_keras_dataset :: dataset(name, **params)
            from importlib import import_module
            # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.TFKerasDataset)
            lib_dataset = import_module(
                f"tensorflow.keras.datasets.{params_data.name}")
            (x_train, y_train), (x_test,
                                 y_test) = lib_dataset.load_data()  # Tensors
            WARN(
                f"Keras dataset {params_data.name} loaded as is. Ignored configs: colormode, resize_w/h, preserve_aspect_ratio"
            )
            if params_data.decode_x.normalize:
                x_train, x_test = x_train / 255.0, x_test / 255.0
            if params_data.decode_x.reshape.__len__() > 0:
                # TODO: decode_x reshape means image reshape, not matrix reshape
                x_train = x_train.reshape(params_data.decode_x.reshape)
                x_test = x_test.reshape(params_data.decode_x.reshape)
            DEBUG(f"loaded data: y_train={y_train}, y_test={y_test}")
            if category == 'all':
                data = ((x_train, y_train), (x_test, y_test))
            elif category == 'train':
                data = (x_train, y_train)
            elif category == 'test':
                data = (x_test, y_test)
            else:
                raise ValueError(f"Unknown category: {category}")
            # IGNORED: meta_info returns no value. test_split has no use. fixed_seed not used.
        elif data_signature == _DataSignature.SingleFile.signature:
            path = DataManager._validate_path(params_data.path)
            params_decode = Params(encoding='jpg',
                                   colormode=None,
                                   reshape=None,
                                   preserve_aspect_ratio=True,
                                   color_transform=None,
                                   normalize=True).left_join(
                                       params_data.decode_x)
            data = DataManager._process_files(path, **params_decode)
        elif data_signature == _DataSignature.UI_Copy_Files.signature:
            params_decode = Params(encoding='jpg',
                                   colormode=None,
                                   reshape=None,
                                   preserve_aspect_ratio=True,
                                   color_transform=None,
                                   normalize=True).left_join(
                                       params_data.decode_x)

            def _process(event_type, abspath_or_list):
                nonlocal data
                INFO(f"clipboard event: path={abspath_or_list}")
                data = DataManager._process_files(abspath_or_list,
                                                  **params_decode)

            from helpers.qt_helper import ClipboardMonitor
            monitor_type = "Path_File" if params_data.format == "Path" else "PathList"

            # NOTE: use AsyncTask to impl async clipboard monitoring loop.
            # data = ClipboardMonitor([monitor_type]).run(_process, True)  #<- will get blank result on a fault copy
            from async_ import AsyncLoop, AsyncManager

            async def coro_clipboard_monitor():
                ClipboardMonitor([monitor_type]).run(_process, onetime=True)

            task = AsyncManager.run_task(coro_clipboard_monitor(),
                                         loop=None)  # block current loop
            DEBUG(
                f"[input_loop] monitoring clipboard with type {monitor_type} ..."
            )

            # wait until task done TODO: impl a context_manager for simple await
            import asyncio
            loop = asyncio.get_event_loop()  # block current loop

            async def coro_simple_wait(timeout=None):
                while data is None:  # IMPROVE: implement timeout. maybe wait_for(this_task)
                    await asyncio.sleep(1)

            loop.run_until_complete(coro_simple_wait(timeout=None))

        elif data_signature == _DataSignature.UI_Web_Files.signature:
            # path = DataManager._validate_path(params_data.path)
            params_decode = Params(encoding='jpg',
                                   colormode=None,
                                   reshape=None,
                                   preserve_aspect_ratio=True,
                                   color_transform=None,
                                   normalize=True).left_join(
                                       params_data.decode_x)
            data = None

            webapp = ensure_web_app(
            )  # will load config from Path.DeployConfigAbs
            INFO(
                f'waiting for data input from web app {webapp.host}:{webapp.port}'
            )  # IMPROVE: hint upload url
            from async_ import AsyncLoop, AsyncManager, amend_blank_cbs
            from helpers.util import track_entry_and_exit, load_image_mat, async_show_image_mats
            import asyncio
            this_task: asyncio.Task or None = None

            @track_entry_and_exit.coro()
            async def coro_consume_files(abspath_or_list, cbs):
                # nonlocal this_task
                # assert this_task is not None, '`this_task` should have been assigned before entering related coro.'

                import modules.data.decode_tf as decode_tf
                import tensorflow as tf

                DEBUG(f'[coro_consume_inputs]: {locals()}')
                on_done, on_succeeded, on_failed, on_progress = amend_blank_cbs(
                    cbs)
                filepaths = abspath_or_list if isinstance(
                    abspath_or_list, list) else [abspath_or_list]
                result = {
                }  # data: tf.data.Dataset::{image_t}, error: optional(str)

                # from helpers.tf_helper import image_example
                # IMPROVE: try to use TFRecordDataset.from_tensors([tf_example])
                data = DataManager._process_files(filepaths, **params_decode)

                result.update({'data': data})
                # # if show inputs
                # try:
                #     asynctask = async_show_image_mats(image_mats)
                #     result.update({'asynctask_id': asynctask.id})
                # except Exception as e:
                #     result.update({'error': e.__repr__()})
                on_done(result)
                # TODO: how to link to the next task (e.g. model.predict) so user can monitor process.
                return result  # == this_task.set_result(result)

            def on_done_consume_inputs(result):
                """
                If using task.set_result, set_exception etc and wait for task instead of data,
                callbacks will be optional.
                """
                nonlocal data
                INFO(f'on_done_consume_inputs: {result}')
                data = result.get('data', None)

            @webapp.on_uploads(namespace="data_manager::ui_web_files",
                               onetime=True)
            def handle_ui_web_files(abspath_or_list):
                nonlocal this_task
                this_task = AsyncManager.run_task(
                    coro_consume_files(abspath_or_list,
                                       (on_done_consume_inputs, )))
                handler_result = {'asynctask_id': this_task.id}
                return handler_result

            # wait until get data uploaded
            import asyncio
            loop = asyncio.get_event_loop()  # block current loop

            async def coro_simple_wait(timeout=None):
                while data is None:  # IMPROVE: implement timeout. maybe wait_for(this_task)
                    await asyncio.sleep(1)

            loop.run_until_complete(coro_simple_wait(timeout=None))
            pass
        else:
            raise ValueError(f"Unsupported data signature: {data_signature}")
        # TODO: consider shuffle, repeat(epoch), batch(batch_size), prefetch(1) for train/predict, use tf.data.Database
        #   data can be tf.Dataset, np.ndarray, or tuple of them. Do this job in each signature handler.
        # tf = safe_import_module("tensorflow")
        # if tf and isinstance(data, tf.data.Dataset):
        #     if params_data.shuffle.fixed_seed:
        #         data.shuffle(buffer_size=10000, seed=params_data.shuffle.fixed_seed)
        return data
Ejemplo n.º 7
0
 def inner_done_cb(fut):  # TODO: fut == task? make sure
     DEBUG(
         f'[inner_done_callback] task={id}, result={fut.result()}, except={fut.exception()}'
     )