async def coro_consume_files(abspath_or_list, cbs): # nonlocal this_task # assert this_task is not None, '`this_task` should have been assigned before entering related coro.' import modules.data.decode_tf as decode_tf import tensorflow as tf DEBUG(f'[coro_consume_inputs]: {locals()}') on_done, on_succeeded, on_failed, on_progress = amend_blank_cbs( cbs) filepaths = abspath_or_list if isinstance( abspath_or_list, list) else [abspath_or_list] result = { } # data: tf.data.Dataset::{image_t}, error: optional(str) # from helpers.tf_helper import image_example # IMPROVE: try to use TFRecordDataset.from_tensors([tf_example]) data = DataManager._process_files(filepaths, **params_decode) result.update({'data': data}) # # if show inputs # try: # asynctask = async_show_image_mats(image_mats) # result.update({'asynctask_id': asynctask.id}) # except Exception as e: # result.update({'error': e.__repr__()}) on_done(result) # TODO: how to link to the next task (e.g. model.predict) so user can monitor process. return result # == this_task.set_result(result)
def dispatch_handlers(self, event_name: str, *args, namespace=None, **kwargs): """ :param event_name: :param args: :param namespace: if None means needn't to compare namespace :param kwargs: :return: """ handler_results = [] to_delete = set() for _event_name, _namespace, _handler, _is_onetime in self.handlers: if _event_name == event_name and (namespace is None or _namespace == namespace): try: # import inspect # DEBUG(f"_handler signature: ({[param.kind.description for param in inspect.signature(_handler).parameters.values()]})") DEBUG( f"[{_event_name}{'@'+(_namespace or '')}] dispatch({args}, {kwargs})" ) if _is_onetime: to_delete.add( (_event_name, _namespace, _handler, _is_onetime)) handler_result = _handler(*args, **kwargs) handler_results.append(handler_result) except Exception as e: WARN( f"Registered handler caused exception ({_event_name}@{_namespace}, " f"which should have been caught in handler side): {e}") self.handlers -= to_delete return handler_results
def async_preload_gpu_devices(): """ Preload in another loop/thread, hopefully call this during waiting for user inputs or other waiting period. """ # IMPROVE: needn't to run in an aysncio loop (host in a new thread), to run in a new thread is enough. from async_ import AsyncLoop, AsyncManager async def coro_simple_run(): preload_gpu_devices() loop = AsyncManager.get_loop(AsyncLoop.DataProcess) DEBUG(f"[tensorflow] preload gpu devices in another thread...") task = AsyncManager.run_task(coro_simple_run(), loop=loop) return task
def async_run(self, **params): """ Generally we need to launch web app in another loop/thread, to not block ML operations. """ webapp = self # IMPROVE: web app need not to run in an aysncio loop (host in a new thread), to run in a new thread is enough. from async_ import AsyncLoop, AsyncManager async def coro_webapp_run(): webapp.run(**params) webapp_loop = AsyncManager.get_loop(AsyncLoop.WebApp) task = AsyncManager.run_task(coro_webapp_run(), loop=webapp_loop) DEBUG( f"[webapp_loop] listening to port {params.get('port', '<unknown>')} ..." ) return task
def load_model(model_signature: str, **params) -> object: """ NOTE: Keras常见陷阱:1.TF卷积核与Theano卷积核shape相同,加载时需用测试样本验证其表现,Keras无法区别 :param model_signature: :param params: """ model = None inputs, outputs = {}, {} # {name: shape} dicts if model_signature == _ModelSignature.TFSavedModel.signature: import tensorflow as tf # IMPROVE: check availability of ml backends # format_ = ModelManager._validate_format(params['format'], _ModelSignature.TFSavedModel) path = ModelManager._validate_path(params.get('path', None)) model = tf.saved_model.load(path, params.get('tags', None)) # == core == if params.get('signature_', None) is not None: model = model.signatures[params['signature_']] # TODO: append inputs, outputs spec to model object? so that predict() can adapt the fed inputs if hasattr(model, 'inputs') and hasattr(model, 'structured_outpus'): inputs = {model.inputs[0].name: model.inputs[0].shape} outputs = { 'default': model.structured_outputs['default'] } # IMPROVE: iterate pass elif model_signature == _ModelSignature.TFHub_KerasLayer.signature: import tensorflow_hub as tf_hub # format_ = ModelManager._validate_format(params['format'], _ModelSignature.TFSavedModel) path = ModelManager._validate_path(params.get('path', None)) params_model = Params(input_shape=None, trainable=False).update_to(params) if params_model.input_shape.__len__() == 4: params_model.input_shape = params_model.input_shape[1:] # NOTE: it will be delayed-build pattern when `input_shape` is None. no weights info available until build. model = tf_hub.KerasLayer(path, input_shape=params_model.input_shape) model.trainable = params_model.trainable pass elif model_signature == _ModelSignature.KerasSequential.signature: # IMPROVE: check availability of ml backends from tensorflow.keras import Sequential, layers name = params['name'] # IMPROVE:parse name -> layers, or use structural config for iteration if name == '{conv-pool}*2-flat-dense-drop-dense': # NOTE: only for _test_\TF_1x_to_2x_3, output is len=10 logits model = Sequential([ # NOTE: 1.TF2.x已无需限定Input层的维度,甚至各层间都能自动衔接 # 2.Conv层中无需设定上一层的(h,w),只需设定filter数、kernel维度、padding(使h,w保持)等 # 3.但若不指定input_shape,Optimizer将无法加载其之前被保存的参数,只能重新初始化 layers.Conv2D(32, (5, 5), strides=(1, 1), padding='same', activation='relu'), layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), layers.Conv2D(64, (5, 5), strides=(1, 1), padding='same', activation='relu'), layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), layers.Flatten(), # 下面的神经网络需要1维的数据 layers.Dense(1024, activation='relu'), layers.Dropout(0.5), # TODO: 关闭Dropout @evluate,predict layers.Dense(10, activation='softmax') ]) elif name == 'dense-dense_softmax': params_model = Params(embedding_size=1024, class_count=None).update_to(params) if params_model.class_count is None: raise ValueError('class_count must be specified') model = Sequential([ layers.Dense(params_model.embedding_size, activation='relu'), layers.Dense(params_model.class_count, activation='softmax') ]) # TODO: need to return intermediate tf.Tensor required by embedding, loss calculation and evaluation. else: raise ValueError(f"Undefined model: {name}") pass elif model_signature == _ModelSignature.KerasModels_LoadModel.signature: import tensorflow as tf # IMPROVE: check availability of ml backends format_ = ModelManager._validate_format( params['format'], _ModelSignature.KerasModels_LoadModel) params_model = Params(path='', path_formatted='').update_to(params) path = ModelManager._validate_path(params_model.path) model = tf.keras.models.load_model(path) # == core == elif model_signature == _ModelSignature.TF_ImportGraphDef.signature: import tensorflow as tf # IMPROVE: check availability of ml backends format_ = ModelManager._validate_format( params['format'], _ModelSignature.TF_ImportGraphDef) params_model = Params(inputs='', outputs='').update_to(params) path = ModelManager._validate_path(params_model.path) # import PB model (frozen) in TF2.x. ref:https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt # ref:https://www.tensorflow.org/api_docs/python/tf/compat/v1/wrap_function def wrap_frozen_graph(pb_path, inputs, outputs, prefix=""): def _imports_graph_def(): tf.compat.v1.import_graph_def( graph_def, name=prefix) # turn off the default prefix "import/" graph_def = tf.compat.v1.GraphDef() loaded = graph_def.ParseFromString(open( pb_path, 'rb').read()) # == core == wrapped_import = tf.compat.v1.wrap_function( _imports_graph_def, []) # == core == import_graph = wrapped_import.graph return wrapped_import.prune( tf.nest.map_structure(import_graph.as_graph_element, inputs), tf.nest.map_structure(import_graph.as_graph_element, outputs)) model = wrap_frozen_graph(path, inputs=params_model.inputs, outputs=params_model.outputs) test_img = tf.ones( [1, 224, 224, 3], dtype=tf.float32) # fixed shape is for test ONLY DEBUG(f"wrap_func test result: {model(test_img).shape}") else: raise ValueError(f"Unsupported model signature: {model_signature}") INFO(f"type of loaded model={type(model)}") INFO(f" inputs={inputs}, outputs={outputs}") return model
def load_data(data_signature: str, category="all", meta_info=None, **params) -> object: """ :param data_signature: :param category: 'train', 'test' or 'all' :param meta_info: if given as a dict, caller may get meta info of the dataset through it :param params: :return: if `category`='all', 'train' and 'test' dataset will be returned as a tuple """ data = None params_data = Params(timeout=0, need_shuffle=False, shuffle_seed=None, test_split=0.2, decode_x=Params(colormode=None, resize_w=None, resize_h=None, preserve_aspect_ratio=True, normalize=True, reshape=None), decode_y=Params()).update_to(params) if data_signature == _DataSignature.LabeledFolders.signature: params_data = Params( file_exts=['jpg'], labels_ordered_in_train=None).update_to(params_data) import modules.data.dataset_labeled_folders as dataset_labeled_folders # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.LabeledFolders) path = DataManager._validate_path(params_data.path) ds = dataset_labeled_folders.dataset(path, category=category, meta_info=meta_info, **params_data) DEBUG(f"loaded tf.data.Dataset: {ds}") data = ds elif data_signature == _DataSignature.TFKerasDataset.signature: # TODO: extract as modules.data.dataset_tf_keras_dataset :: dataset(name, **params) from importlib import import_module # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.TFKerasDataset) lib_dataset = import_module( f"tensorflow.keras.datasets.{params_data.name}") (x_train, y_train), (x_test, y_test) = lib_dataset.load_data() # Tensors WARN( f"Keras dataset {params_data.name} loaded as is. Ignored configs: colormode, resize_w/h, preserve_aspect_ratio" ) if params_data.decode_x.normalize: x_train, x_test = x_train / 255.0, x_test / 255.0 if params_data.decode_x.reshape.__len__() > 0: # TODO: decode_x reshape means image reshape, not matrix reshape x_train = x_train.reshape(params_data.decode_x.reshape) x_test = x_test.reshape(params_data.decode_x.reshape) DEBUG(f"loaded data: y_train={y_train}, y_test={y_test}") if category == 'all': data = ((x_train, y_train), (x_test, y_test)) elif category == 'train': data = (x_train, y_train) elif category == 'test': data = (x_test, y_test) else: raise ValueError(f"Unknown category: {category}") # IGNORED: meta_info returns no value. test_split has no use. fixed_seed not used. elif data_signature == _DataSignature.SingleFile.signature: path = DataManager._validate_path(params_data.path) params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) data = DataManager._process_files(path, **params_decode) elif data_signature == _DataSignature.UI_Copy_Files.signature: params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) def _process(event_type, abspath_or_list): nonlocal data INFO(f"clipboard event: path={abspath_or_list}") data = DataManager._process_files(abspath_or_list, **params_decode) from helpers.qt_helper import ClipboardMonitor monitor_type = "Path_File" if params_data.format == "Path" else "PathList" # NOTE: use AsyncTask to impl async clipboard monitoring loop. # data = ClipboardMonitor([monitor_type]).run(_process, True) #<- will get blank result on a fault copy from async_ import AsyncLoop, AsyncManager async def coro_clipboard_monitor(): ClipboardMonitor([monitor_type]).run(_process, onetime=True) task = AsyncManager.run_task(coro_clipboard_monitor(), loop=None) # block current loop DEBUG( f"[input_loop] monitoring clipboard with type {monitor_type} ..." ) # wait until task done TODO: impl a context_manager for simple await import asyncio loop = asyncio.get_event_loop() # block current loop async def coro_simple_wait(timeout=None): while data is None: # IMPROVE: implement timeout. maybe wait_for(this_task) await asyncio.sleep(1) loop.run_until_complete(coro_simple_wait(timeout=None)) elif data_signature == _DataSignature.UI_Web_Files.signature: # path = DataManager._validate_path(params_data.path) params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) data = None webapp = ensure_web_app( ) # will load config from Path.DeployConfigAbs INFO( f'waiting for data input from web app {webapp.host}:{webapp.port}' ) # IMPROVE: hint upload url from async_ import AsyncLoop, AsyncManager, amend_blank_cbs from helpers.util import track_entry_and_exit, load_image_mat, async_show_image_mats import asyncio this_task: asyncio.Task or None = None @track_entry_and_exit.coro() async def coro_consume_files(abspath_or_list, cbs): # nonlocal this_task # assert this_task is not None, '`this_task` should have been assigned before entering related coro.' import modules.data.decode_tf as decode_tf import tensorflow as tf DEBUG(f'[coro_consume_inputs]: {locals()}') on_done, on_succeeded, on_failed, on_progress = amend_blank_cbs( cbs) filepaths = abspath_or_list if isinstance( abspath_or_list, list) else [abspath_or_list] result = { } # data: tf.data.Dataset::{image_t}, error: optional(str) # from helpers.tf_helper import image_example # IMPROVE: try to use TFRecordDataset.from_tensors([tf_example]) data = DataManager._process_files(filepaths, **params_decode) result.update({'data': data}) # # if show inputs # try: # asynctask = async_show_image_mats(image_mats) # result.update({'asynctask_id': asynctask.id}) # except Exception as e: # result.update({'error': e.__repr__()}) on_done(result) # TODO: how to link to the next task (e.g. model.predict) so user can monitor process. return result # == this_task.set_result(result) def on_done_consume_inputs(result): """ If using task.set_result, set_exception etc and wait for task instead of data, callbacks will be optional. """ nonlocal data INFO(f'on_done_consume_inputs: {result}') data = result.get('data', None) @webapp.on_uploads(namespace="data_manager::ui_web_files", onetime=True) def handle_ui_web_files(abspath_or_list): nonlocal this_task this_task = AsyncManager.run_task( coro_consume_files(abspath_or_list, (on_done_consume_inputs, ))) handler_result = {'asynctask_id': this_task.id} return handler_result # wait until get data uploaded import asyncio loop = asyncio.get_event_loop() # block current loop async def coro_simple_wait(timeout=None): while data is None: # IMPROVE: implement timeout. maybe wait_for(this_task) await asyncio.sleep(1) loop.run_until_complete(coro_simple_wait(timeout=None)) pass else: raise ValueError(f"Unsupported data signature: {data_signature}") # TODO: consider shuffle, repeat(epoch), batch(batch_size), prefetch(1) for train/predict, use tf.data.Database # data can be tf.Dataset, np.ndarray, or tuple of them. Do this job in each signature handler. # tf = safe_import_module("tensorflow") # if tf and isinstance(data, tf.data.Dataset): # if params_data.shuffle.fixed_seed: # data.shuffle(buffer_size=10000, seed=params_data.shuffle.fixed_seed) return data
def inner_done_cb(fut): # TODO: fut == task? make sure DEBUG( f'[inner_done_callback] task={id}, result={fut.result()}, except={fut.exception()}' )