def handle_ui_web_files(abspath_or_list): nonlocal this_task this_task = AsyncManager.run_task( coro_consume_files(abspath_or_list, (on_done_consume_inputs, ))) handler_result = {'asynctask_id': this_task.id} return handler_result
def async_show_image_mat(image_mat, text=None, title=None, cell_size: tuple = None, image_name=None): """ :return: an async task object """ from async_ import AsyncLoop, AsyncManager ui_loop = AsyncManager.get_loop(AsyncLoop.UIThread) coro = coro_show_image_mat(image_mat, text=text, title=title, cell_size=cell_size, block=False, image_name=image_name) task = AsyncManager.create_task(coro, loop=ui_loop) return AsyncManager.run_task(task, loop=ui_loop) # possibly only one task in this batch
def async_preload_gpu_devices(): """ Preload in another loop/thread, hopefully call this during waiting for user inputs or other waiting period. """ # IMPROVE: needn't to run in an aysncio loop (host in a new thread), to run in a new thread is enough. from async_ import AsyncLoop, AsyncManager async def coro_simple_run(): preload_gpu_devices() loop = AsyncManager.get_loop(AsyncLoop.DataProcess) DEBUG(f"[tensorflow] preload gpu devices in another thread...") task = AsyncManager.run_task(coro_simple_run(), loop=loop) return task
def async_run(self, **params): """ Generally we need to launch web app in another loop/thread, to not block ML operations. """ webapp = self # IMPROVE: web app need not to run in an aysncio loop (host in a new thread), to run in a new thread is enough. from async_ import AsyncLoop, AsyncManager async def coro_webapp_run(): webapp.run(**params) webapp_loop = AsyncManager.get_loop(AsyncLoop.WebApp) task = AsyncManager.run_task(coro_webapp_run(), loop=webapp_loop) DEBUG( f"[webapp_loop] listening to port {params.get('port', '<unknown>')} ..." ) return task
def load_data(data_signature: str, category="all", meta_info=None, **params) -> object: """ :param data_signature: :param category: 'train', 'test' or 'all' :param meta_info: if given as a dict, caller may get meta info of the dataset through it :param params: :return: if `category`='all', 'train' and 'test' dataset will be returned as a tuple """ data = None params_data = Params(timeout=0, need_shuffle=False, shuffle_seed=None, test_split=0.2, decode_x=Params(colormode=None, resize_w=None, resize_h=None, preserve_aspect_ratio=True, normalize=True, reshape=None), decode_y=Params()).update_to(params) if data_signature == _DataSignature.LabeledFolders.signature: params_data = Params( file_exts=['jpg'], labels_ordered_in_train=None).update_to(params_data) import modules.data.dataset_labeled_folders as dataset_labeled_folders # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.LabeledFolders) path = DataManager._validate_path(params_data.path) ds = dataset_labeled_folders.dataset(path, category=category, meta_info=meta_info, **params_data) DEBUG(f"loaded tf.data.Dataset: {ds}") data = ds elif data_signature == _DataSignature.TFKerasDataset.signature: # TODO: extract as modules.data.dataset_tf_keras_dataset :: dataset(name, **params) from importlib import import_module # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.TFKerasDataset) lib_dataset = import_module( f"tensorflow.keras.datasets.{params_data.name}") (x_train, y_train), (x_test, y_test) = lib_dataset.load_data() # Tensors WARN( f"Keras dataset {params_data.name} loaded as is. Ignored configs: colormode, resize_w/h, preserve_aspect_ratio" ) if params_data.decode_x.normalize: x_train, x_test = x_train / 255.0, x_test / 255.0 if params_data.decode_x.reshape.__len__() > 0: # TODO: decode_x reshape means image reshape, not matrix reshape x_train = x_train.reshape(params_data.decode_x.reshape) x_test = x_test.reshape(params_data.decode_x.reshape) DEBUG(f"loaded data: y_train={y_train}, y_test={y_test}") if category == 'all': data = ((x_train, y_train), (x_test, y_test)) elif category == 'train': data = (x_train, y_train) elif category == 'test': data = (x_test, y_test) else: raise ValueError(f"Unknown category: {category}") # IGNORED: meta_info returns no value. test_split has no use. fixed_seed not used. elif data_signature == _DataSignature.SingleFile.signature: path = DataManager._validate_path(params_data.path) params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) data = DataManager._process_files(path, **params_decode) elif data_signature == _DataSignature.UI_Copy_Files.signature: params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) def _process(event_type, abspath_or_list): nonlocal data INFO(f"clipboard event: path={abspath_or_list}") data = DataManager._process_files(abspath_or_list, **params_decode) from helpers.qt_helper import ClipboardMonitor monitor_type = "Path_File" if params_data.format == "Path" else "PathList" # NOTE: use AsyncTask to impl async clipboard monitoring loop. # data = ClipboardMonitor([monitor_type]).run(_process, True) #<- will get blank result on a fault copy from async_ import AsyncLoop, AsyncManager async def coro_clipboard_monitor(): ClipboardMonitor([monitor_type]).run(_process, onetime=True) task = AsyncManager.run_task(coro_clipboard_monitor(), loop=None) # block current loop DEBUG( f"[input_loop] monitoring clipboard with type {monitor_type} ..." ) # wait until task done TODO: impl a context_manager for simple await import asyncio loop = asyncio.get_event_loop() # block current loop async def coro_simple_wait(timeout=None): while data is None: # IMPROVE: implement timeout. maybe wait_for(this_task) await asyncio.sleep(1) loop.run_until_complete(coro_simple_wait(timeout=None)) elif data_signature == _DataSignature.UI_Web_Files.signature: # path = DataManager._validate_path(params_data.path) params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) data = None webapp = ensure_web_app( ) # will load config from Path.DeployConfigAbs INFO( f'waiting for data input from web app {webapp.host}:{webapp.port}' ) # IMPROVE: hint upload url from async_ import AsyncLoop, AsyncManager, amend_blank_cbs from helpers.util import track_entry_and_exit, load_image_mat, async_show_image_mats import asyncio this_task: asyncio.Task or None = None @track_entry_and_exit.coro() async def coro_consume_files(abspath_or_list, cbs): # nonlocal this_task # assert this_task is not None, '`this_task` should have been assigned before entering related coro.' import modules.data.decode_tf as decode_tf import tensorflow as tf DEBUG(f'[coro_consume_inputs]: {locals()}') on_done, on_succeeded, on_failed, on_progress = amend_blank_cbs( cbs) filepaths = abspath_or_list if isinstance( abspath_or_list, list) else [abspath_or_list] result = { } # data: tf.data.Dataset::{image_t}, error: optional(str) # from helpers.tf_helper import image_example # IMPROVE: try to use TFRecordDataset.from_tensors([tf_example]) data = DataManager._process_files(filepaths, **params_decode) result.update({'data': data}) # # if show inputs # try: # asynctask = async_show_image_mats(image_mats) # result.update({'asynctask_id': asynctask.id}) # except Exception as e: # result.update({'error': e.__repr__()}) on_done(result) # TODO: how to link to the next task (e.g. model.predict) so user can monitor process. return result # == this_task.set_result(result) def on_done_consume_inputs(result): """ If using task.set_result, set_exception etc and wait for task instead of data, callbacks will be optional. """ nonlocal data INFO(f'on_done_consume_inputs: {result}') data = result.get('data', None) @webapp.on_uploads(namespace="data_manager::ui_web_files", onetime=True) def handle_ui_web_files(abspath_or_list): nonlocal this_task this_task = AsyncManager.run_task( coro_consume_files(abspath_or_list, (on_done_consume_inputs, ))) handler_result = {'asynctask_id': this_task.id} return handler_result # wait until get data uploaded import asyncio loop = asyncio.get_event_loop() # block current loop async def coro_simple_wait(timeout=None): while data is None: # IMPROVE: implement timeout. maybe wait_for(this_task) await asyncio.sleep(1) loop.run_until_complete(coro_simple_wait(timeout=None)) pass else: raise ValueError(f"Unsupported data signature: {data_signature}") # TODO: consider shuffle, repeat(epoch), batch(batch_size), prefetch(1) for train/predict, use tf.data.Database # data can be tf.Dataset, np.ndarray, or tuple of them. Do this job in each signature handler. # tf = safe_import_module("tensorflow") # if tf and isinstance(data, tf.data.Dataset): # if params_data.shuffle.fixed_seed: # data.shuffle(buffer_size=10000, seed=params_data.shuffle.fixed_seed) return data