def run_task(cls, task_or_coro, new_thread=False, loop=None): """ :param task_or_coro: a coro will be wrapped to a task (in new thread or given loop) and then run. a task will be run directly, other args will be ignored_. :param new_thread: asynctask will always be created (and run) in a new thread, but if you want to spawn another one, set `True` here. :param loop: you may choose to create (and run) an asynctask in an existing loop. :return: an asynctask, which provides access to `id`, `coro`, `loop`. """ cls.__ensure_init__() with cls.__mutex__: if new_thread and loop is not None: WARN( f'Task run in a new thread will have a new loop. (arg loop ignored: {loop})' ) if asyncio.iscoroutine(task_or_coro): if loop is None: loop = cls.__instance__.current_loop if not new_thread else cls.append_new_loop( ) task = cls.create_task(task_or_coro, new_thread=new_thread, loop=loop) elif isinstance(task_or_coro, asyncio.Task): task = task_or_coro task_loop = getattr(task, 'loop', None) task_coro = getattr(task, 'coro', None) # Py3.8 implemented task.get_coro() if task_coro is not None and (new_thread or (loop is not None and task_loop is not None and loop != task_loop)): task = cls.create_task(task_coro, new_thread=new_thread, loop=loop) WARN( 'Task is requested to run in a new thread or loop. a new task will be created.' ) loop = task.loop else: loop = task_loop or cls.__instance__.current_loop # if new_thread: # WARN('Task is always bound with a loop and cannot be run in new thread. (arg ignored)') # if loop is not None and loop != task_or_coro.loop: # WARN(f'Task is always bound with an existing loop. (arg loop ignored: {loop})') else: raise TypeError( f'Only accept coro object or task, while get a {type(task_or_coro)}' ) # loop.create_task() after loop.run_forever() will not be run, unless activate a `call_soon` for a new batch. if getattr(loop, 'id', None) == AsyncLoop.Main: # loop.call_soon(task) # will be pending unless main_loop is running loop.run_until_complete(task) else: loop.call_soon_threadsafe(lambda: {}) return task
def dispatch_handlers(self, event_name: str, *args, namespace=None, **kwargs): """ :param event_name: :param args: :param namespace: if None means needn't to compare namespace :param kwargs: :return: """ handler_results = [] to_delete = set() for _event_name, _namespace, _handler, _is_onetime in self.handlers: if _event_name == event_name and (namespace is None or _namespace == namespace): try: # import inspect # DEBUG(f"_handler signature: ({[param.kind.description for param in inspect.signature(_handler).parameters.values()]})") DEBUG( f"[{_event_name}{'@'+(_namespace or '')}] dispatch({args}, {kwargs})" ) if _is_onetime: to_delete.add( (_event_name, _namespace, _handler, _is_onetime)) handler_result = _handler(*args, **kwargs) handler_results.append(handler_result) except Exception as e: WARN( f"Registered handler caused exception ({_event_name}@{_namespace}, " f"which should have been caught in handler side): {e}") self.handlers -= to_delete return handler_results
def preload_gpu_devices(active_indexes: list = None, memory_limit: int = None): global __preloaded_gpu___ if __preloaded_gpu___: return import tensorflow as tf gpus = tf.config.experimental.list_physical_devices('GPU') __preloaded_gpu___ = True if gpus: try: if active_indexes is not None: for index in active_indexes: tf.config.experimental.set_visible_devices(gpus[index], 'GPU') logical_gpus = tf.config.experimental.list_logical_devices('GPU') INFO(f"Num of Physical GPU vs Logical ones: {len(gpus)} vs {len(logical_gpus)}, " f"{len(gpus)-len(logical_gpus)} disabled") if memory_limit is None: tf.config.experimental.set_memory_growth(gpus[0], True) INFO("Physical GPU Memory Growth is turned ON.") else: tf.config.experimental.set_virtual_device_configuration(gpus[0], [ tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memory_limit)]) INFO(f"Physical GPU Memory Growth is limited under: {memory_limit}") except RuntimeError as e: # Visible devices must be set before GPUs have been initialized ERROR(f"Exception during preload_gpu_devices: {e}") else: WARN("No physical GPU available.")
def gather_task(cls, *task_or_coros, given_id=None, new_thread=False, loop: asyncio.AbstractEventLoop = None): """ :param task_or_coros: :param given_id: task_id will be generally retrieved after task creation. if want to use task_id in coro, however, caller may get `new_id()` and send it to `create_task()`. :param new_thread: asynctask will always be created (and run) in a new thread, but if you want to spawn another one, set `True` here. :param loop: you may choose to create (and run) an asynctask in an existing loop. :return: an asyncio.Future, which has extra attributes `id`, `coro` and `loop`. """ cls.__ensure_init__() with cls.__mutex__: if loop is None: loop = cls.__instance__.current_loop if not new_thread else cls.append_new_loop( ) last_type = None for task_or_coro in task_or_coros: last_type = last_type or type(task_or_coro) if last_type != type(task_or_coro): raise TypeError( 'All items in task_or_coros must have same type.') if last_type is asyncio.Task: last_task_loop = None for task in task_or_coros: last_task_loop = last_task_loop or getattr( task, 'loop', None) if last_task_loop is None or last_task_loop != getattr( task, 'loop', None): raise ValueError('All tasks must have same loop.') if loop != last_task_loop: loop = last_task_loop WARN( "Given loop is not same with loop of the tasks and is ignored." ) task_id = cls.new_id(prefix=getattr( loop, 'id', None)) if given_id is None else given_id # wrap future as a task. ensure_future() cannot do this. async def coro_wait_future(fut): await fut coroutine = coro_wait_future( asyncio.gather(*task_or_coros, loop=loop)) task = cls.create_task(coroutine, given_id=task_id, new_thread=False, loop=loop) cls.__instance__.all_tasks[task_id] = task cls.hack_task(task, task_id, coroutine, loop) return task
def denormalize(obj, max=255, type_=int): if hasmethod(obj, 'max'): if obj.max() > 1: WARN( f"obj.max exceeds 1.0, no denormalize (nor type cast) will be done." ) return obj obj *= max if hasattr(obj, 'astype'): return obj.astype(type_) # np.ndarray else: import tensorflow as tf return tf.cast( obj, tf.uint8 if type_ is int else tf.float32) # tf.Tensor
def preload_gpu_devices(): global __preloaded_gpu___ if __preloaded_gpu___: return import tensorflow as tf gpus = tf.config.experimental.list_physical_devices('GPU') __preloaded_gpu___ = True if gpus: # Restrict TensorFlow to only use the first GPU try: tf.config.experimental.set_memory_growth(gpus[0], True) INFO("Physical GPU Memory Growth is turned ON.") tf.config.experimental.set_visible_devices(gpus[0], 'GPU') logical_gpus = tf.config.experimental.list_logical_devices('GPU') INFO( f"Num of Physical GPUs: {len(gpus)}, Num of Logical GPU: {len(logical_gpus)}" ) except RuntimeError as e: # Visible devices must be set before GPUs have been initialized ERROR(f"Exception during preload_gpu_devices: {e}") else: WARN("No physical GPU available.")
def model_predict(model: object, data: object, **params) -> object: params_predict = Params( decode_prediction=Params(name='logits_to_index'), show_result=Params(top_k=20, only_difference=True)).update_to(params) predictions = None x, y = ModelManager._validate_input(data) import numpy as np import tensorflow as tf # IMPROVE: check availability of ml backends # wrapper for different model types def _predict(inputs): # NOTE: core API for prediction if isinstance(model, tf.keras.Model): # NOTE: if x is ndarray, result will be ndarray too return model.predict(inputs) elif callable(model): # type(model).__name__ == "tensorflow.python.eager.wrap_function.WrappedFunction" if isinstance(inputs, tf.data.Dataset): # IMPROVE: stack result as a tensor result = [] for t in inputs: result.append(model(t)) return tf.stack(result) else: return model(inputs) else: raise TypeError(f"Unsupported model type: {type(model)}") predictions = _predict(x) if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank.") return None if params_predict.decode_prediction.is_defined(): if params_predict.decode_prediction.name == 'logits_to_index': # one-hot array -> index if isinstance(predictions, np.ndarray): predictions = np.argmax(predictions, axis=-1) elif isinstance(predictions, tf.Tensor): predictions = tf.math.argmax(predictions, axis=-1) else: raise TypeError( f"Unsupported type for logits_to_index: {type(predictions)}" ) elif params_predict.decode_prediction.name == 'logits_to_indices_and_probs': # for retrain, prediction should be a probs array and need to be sorted by `top_k` # NOTE: length of each prediction must be equivalent. top_k = params_predict.decode_prediction.get( 'top_k', safe_get_len(predictions[0])) # returns: top_values(=probs), top_idxs if isinstance(predictions, np.ndarray): predictions = np_top_k(predictions, top_k) elif isinstance(predictions, tf.Tensor): predictions = tf.math.top_k(input=predictions, k=top_k) else: raise TypeError( f"Unsupported type for logits_to_indices_and_probs: {type(predictions)}" ) else: raise ValueError( f"Unsupported result decoding: {params_predict.decode_prediction.name}" ) if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank (after decoding).") return None if params_predict.show_result.is_defined() and isinstance( predictions, np.ndarray): x_show, p_show, y_show = x, predictions, y # NOTE: y(=label) is optional (default:None) if params_predict.show_result.only_difference: if hasattr(y_show, '__len__'): if p_show.__len__() == y_show.__len__(): differences = p_show != y_show x_show, p_show, y_show = x_show[differences], p_show[ differences], y_show[differences] else: WARN( f"Cannot dump differences: len of targets is not same as predictions" f"({y_show.__len__()} vs {p_show.__len__()})") else: WARN( f"Cannot dump differences: unsupported y type(={type(y_show)})" ) INFO( f"Number of mismatch between prediction and truth: {len(p_show)}" ) if params_predict.show_result.get('top_k', None) is not None: top_k = params_predict.show_result.top_k # TODO: sorting? 1.use tf.math.top_k 2.diff algorithm need to be specified x_show, p_show, y_show = (safe_slice(_, end=top_k) for _ in (x_show, p_show, y_show)) if len(p_show) > 0: dumps = [] for i, p in enumerate(p_show): if not hasattr(y_show, '__len__') or y_show.__len__() <= i: dumps.append(f"{p}") else: dumps.append(f"({p} vs {y_show[i]})") need_to_show = params_predict.show_result.plotter.__len__() > 0 need_to_save = params_predict.show_result.save_path.__len__( ) > 0 only_save = params_predict.show_result.only_save if need_to_show or need_to_save: def denormalize(x): x = x * 255 if hasattr(x, 'astype'): # np.ndarray return x.astype(np.int32) else: return tf.cast(x, tf.int32) # tf.Tensor # IMPROVE: use signature to match normalize and `un-normalize` routines if hasattr( x_show, "dtype") and x_show.dtype.name.startswith('float'): x_show = denormalize(x_show) elif hasattr(x_show, "element_spec") and \ hasattr(x_show.element_spec, "dtype") and x_show.element_spec.dtype.name.startswith('float'): x_show = x_show.map(denormalize) save_dir, save_paths = None, None if need_to_save: save_dir = path_possibly_formatted( params_predict.show_result.save_path) # save_paths = [osp.join(save_dir, _+'.jpg') for _ in dumps] if params_predict.show_result.plotter == "matplot": onlysave_path = None if only_save: if need_to_save: from helpers.util import tmp_filename_by_time onlysave_path = osp.join( save_dir, tmp_filename_by_time('jpg')) need_to_save = False else: WARN( 'only_save is true, but save_path is not specified. ignored' ) show_image_mats(x_show, texts=dumps, title="Predictions", onlysave_path=onlysave_path) else: INFO( f"Predictions{'(only diff)' if 'differences' in vars() else ''}: " + ", ".join(dumps)) # if need_to_save: # save_image_mats(x_show, save_paths) else: top_k = params_predict.show_result.top_k INFO( f"Predictions(top{top_k}): {safe_slice(predictions, end=top_k)}" ) return predictions
def model_train(model: object, data: object, **params): """ NOTE: Keras常见陷阱:1.Keras先validation_split再shuffle,因此data中如果是负样本排在最后、宜自行先shuffle :param model: :param data: accept `np.ndarray`, `tf.data.Dataset` or `tf.Tensor`, or a pair of such data if y is available. """ # TODO: confirm if params for tf.Model.compile can be combined with those for tf.Model.fit params_train = Params(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'], validation_split=0.1, epochs=5, batch_size=None, checkpoint=Params(load_weights="latest", save_weights=Params( frequency="epoch", max_to_keep=5)), show_result=Params()).update_to(params) x_train, y_train = ModelManager._validate_input(data) import tensorflow as tf # IMPROVE: check availability of ml backends if isinstance(model, tf.keras.Model): # 1.compile and load variables from checkpoint model.compile( **params_train.fromkeys(['optimizer', 'loss', 'metrics'])) # CKPT signatures: "tf.train.Checkpoint.restore", "tf.keras.Model.load_weights" ckpt_dir, ckpt_path_to_load = None, None if params_train.checkpoint.format == "CKPT_dir": from config import __abspath__ ckpt_dir = path_possibly_formatted( params_train.checkpoint.path) ckpt_dir = __abspath__( ckpt_dir) if not osp.isabs(ckpt_dir) else ckpt_dir ensure_dir_exists(ckpt_dir) ckpt_path_to_load = tf.train.latest_checkpoint(ckpt_dir) # NOTE: 当使用delayed-build模式时,仅当调用build(batch_input_shape)或compile()+fit(x,y,batch_size)后才能确定weights # ref:https://www.tensorflow.org/api_docs/python/tf/keras/Sequential if params_train.checkpoint.load_weights == "latest" \ and params_train.checkpoint.signature == "tf.keras.Model.load_weights" \ and ckpt_path_to_load is not None: model.load_weights(ckpt_path_to_load) # 2.prepare callbacks callbacks = [] # callback :: save medium CKPT if params_train.checkpoint.save_weights.is_defined( ) and ckpt_dir is not None: ckpt_path_to_save = osp.join( ckpt_dir, "ckpt.{epoch:02d}-{val_loss:.2f}") # NOTE: if save_freq is not equal to 'epoch', which means num of steps, it's will be less reliable _params = Params(save_freq='epoch').left_join( params_train.checkpoint.save_weights, key_map={"save_freq": "frequency"}) _callback = tf.keras.callbacks.ModelCheckpoint( ckpt_path_to_save, # not checkpoint_dir save_weights_only=True, save_best_only=True, verbose=1, **_params) callbacks.append(_callback) # callback :: early stop if params_train.early_stop.is_defined(): _params = Params(monitor='val_loss', patience=10).left_join( params_train.early_stop) _callback = tf.keras.callbacks.EarlyStopping(**_params) callbacks.append(_callback) # callback :: progress indicator / verbose # IMPROVE: use config for training verbose / progress indicator # _callback = tf.keras.callbacks.ProgbarLogger(count_mode='steps', stateful_metrics=None) # PyTest时不能实时输出 _callback = tf.keras.callbacks.LambdaCallback( on_batch_end=lambda batch, logs: INFO( f"batch{batch:05d}: loss={logs.get('loss',None):.4f},acc={logs.get('acc',None):.4f}" )) callbacks.append(_callback) cb_batch_stats = None if params_train.collect_batch_stats: # when only train several epochs, may collect stats of each batch instead of the epoch average. class CallbackCollectBatchStats(tf.keras.callbacks.Callback): def __init__(self): self.current_batch = 0 self.batch = [] self.loss = [] self.acc = [] def on_train_batch_end(self, batch, logs=None): self.batch.append(self.current_batch) self.loss.append(logs['loss']) self.acc.append(logs['acc']) self.model.reset_metrics() self.current_batch += 1 cb_batch_stats = CallbackCollectBatchStats( ) # TODO: can plot batch_lsses and batch_acc using this callbacks.append(cb_batch_stats) if len(callbacks) == 0: callbacks = None # 3.train the model, and save checkpoints if configured # TODO: use model.fit_generator() for batch feeding. `steps_per_epoch` = np.ceil(samples / param.batch_size) # NOTE: core API for model training params_train_fit = params_train.fromkeys( ['validation_split', 'batch_size', 'epochs']) INFO(f"Beginning to train: {params_train_fit}") history = model.fit(x_train, y_train, **params_train_fit, callbacks=callbacks) # == core == if cb_batch_stats is not None: history.history[ 'batch'] = cb_batch_stats.batch # accumulated batch number through epoches history.history['batch_loss'] = cb_batch_stats.loss history.history['batch_acc'] = cb_batch_stats.acc # 4.save checkpiont at last if params_train.save_model.is_defined() and ckpt_dir is not None: _params = Params(format="SavedModel").left_join( params_train.save_model) save_format, ckpt_path_to_save = None, None if _params.format == "HDF5": save_format = _ext = "h5" ckpt_path_to_save = osp.join(ckpt_dir, f"model_trained.{_ext}") else: # default=SavedModel save_format = "tf" ckpt_path_to_save = osp.join(ckpt_dir, f"model_trained") ensure_dir_exists(ckpt_path_to_save) # IMPROVE: consider using tf.saved_model.save() model.save(ckpt_path_to_save, save_format=save_format ) # by default, TF2 saves as 'tf' (SavedModel) # Optional: output history if params_train.show_result.is_defined(): plot_history = None if params_train.show_result.plotter == 'matplot': from helpers.plt_helper import plot_history_by_metrics as plot_history if params_train.show_result.plotter.__len__( ) > 0 and plot_history is None: WARN( f"Unsupported history plotter: {params_train.show_result.plotter}" ) if plot_history is not None: plot_history(history, params_train.show_result.get('metrics', None)) else: # TODO: check this section hist = history.history INFO( f"Last epoch: " f"ACC(train,val)=({hist['accuracy'][-1]}, {hist['val_accuracy'][-1]}), " f"MSE(train,val)=({hist['mse'][-1]}, {hist['val_mse'][-1]})" ) else: raise TypeError(f"Unsupported model type: {type(model)}") return model
def load_data(data_signature: str, category="all", meta_info=None, **params) -> object: """ :param data_signature: :param category: 'train', 'test' or 'all' :param meta_info: if given as a dict, caller may get meta info of the dataset through it :param params: :return: if `category`='all', 'train' and 'test' dataset will be returned as a tuple """ data = None params_data = Params(timeout=0, need_shuffle=False, shuffle_seed=None, test_split=0.2, decode_x=Params(colormode=None, resize_w=None, resize_h=None, preserve_aspect_ratio=True, normalize=True, reshape=None), decode_y=Params()).update_to(params) if data_signature == _DataSignature.LabeledFolders.signature: params_data = Params( file_exts=['jpg'], labels_ordered_in_train=None).update_to(params_data) import modules.data.dataset_labeled_folders as dataset_labeled_folders # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.LabeledFolders) path = DataManager._validate_path(params_data.path) ds = dataset_labeled_folders.dataset(path, category=category, meta_info=meta_info, **params_data) DEBUG(f"loaded tf.data.Dataset: {ds}") data = ds elif data_signature == _DataSignature.TFKerasDataset.signature: # TODO: extract as modules.data.dataset_tf_keras_dataset :: dataset(name, **params) from importlib import import_module # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.TFKerasDataset) lib_dataset = import_module( f"tensorflow.keras.datasets.{params_data.name}") (x_train, y_train), (x_test, y_test) = lib_dataset.load_data() # Tensors WARN( f"Keras dataset {params_data.name} loaded as is. Ignored configs: colormode, resize_w/h, preserve_aspect_ratio" ) if params_data.decode_x.normalize: x_train, x_test = x_train / 255.0, x_test / 255.0 if params_data.decode_x.reshape.__len__() > 0: # TODO: decode_x reshape means image reshape, not matrix reshape x_train = x_train.reshape(params_data.decode_x.reshape) x_test = x_test.reshape(params_data.decode_x.reshape) DEBUG(f"loaded data: y_train={y_train}, y_test={y_test}") if category == 'all': data = ((x_train, y_train), (x_test, y_test)) elif category == 'train': data = (x_train, y_train) elif category == 'test': data = (x_test, y_test) else: raise ValueError(f"Unknown category: {category}") # IGNORED: meta_info returns no value. test_split has no use. fixed_seed not used. elif data_signature == _DataSignature.SingleFile.signature: path = DataManager._validate_path(params_data.path) params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) data = DataManager._process_files(path, **params_decode) elif data_signature == _DataSignature.UI_Copy_Files.signature: params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) def _process(event_type, abspath_or_list): nonlocal data INFO(f"clipboard event: path={abspath_or_list}") data = DataManager._process_files(abspath_or_list, **params_decode) from helpers.qt_helper import ClipboardMonitor monitor_type = "Path_File" if params_data.format == "Path" else "PathList" # NOTE: use AsyncTask to impl async clipboard monitoring loop. # data = ClipboardMonitor([monitor_type]).run(_process, True) #<- will get blank result on a fault copy from async_ import AsyncLoop, AsyncManager async def coro_clipboard_monitor(): ClipboardMonitor([monitor_type]).run(_process, onetime=True) task = AsyncManager.run_task(coro_clipboard_monitor(), loop=None) # block current loop DEBUG( f"[input_loop] monitoring clipboard with type {monitor_type} ..." ) # wait until task done TODO: impl a context_manager for simple await import asyncio loop = asyncio.get_event_loop() # block current loop async def coro_simple_wait(timeout=None): while data is None: # IMPROVE: implement timeout. maybe wait_for(this_task) await asyncio.sleep(1) loop.run_until_complete(coro_simple_wait(timeout=None)) elif data_signature == _DataSignature.UI_Web_Files.signature: # path = DataManager._validate_path(params_data.path) params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) data = None webapp = ensure_web_app( ) # will load config from Path.DeployConfigAbs INFO( f'waiting for data input from web app {webapp.host}:{webapp.port}' ) # IMPROVE: hint upload url from async_ import AsyncLoop, AsyncManager, amend_blank_cbs from helpers.util import track_entry_and_exit, load_image_mat, async_show_image_mats import asyncio this_task: asyncio.Task or None = None @track_entry_and_exit.coro() async def coro_consume_files(abspath_or_list, cbs): # nonlocal this_task # assert this_task is not None, '`this_task` should have been assigned before entering related coro.' import modules.data.decode_tf as decode_tf import tensorflow as tf DEBUG(f'[coro_consume_inputs]: {locals()}') on_done, on_succeeded, on_failed, on_progress = amend_blank_cbs( cbs) filepaths = abspath_or_list if isinstance( abspath_or_list, list) else [abspath_or_list] result = { } # data: tf.data.Dataset::{image_t}, error: optional(str) # from helpers.tf_helper import image_example # IMPROVE: try to use TFRecordDataset.from_tensors([tf_example]) data = DataManager._process_files(filepaths, **params_decode) result.update({'data': data}) # # if show inputs # try: # asynctask = async_show_image_mats(image_mats) # result.update({'asynctask_id': asynctask.id}) # except Exception as e: # result.update({'error': e.__repr__()}) on_done(result) # TODO: how to link to the next task (e.g. model.predict) so user can monitor process. return result # == this_task.set_result(result) def on_done_consume_inputs(result): """ If using task.set_result, set_exception etc and wait for task instead of data, callbacks will be optional. """ nonlocal data INFO(f'on_done_consume_inputs: {result}') data = result.get('data', None) @webapp.on_uploads(namespace="data_manager::ui_web_files", onetime=True) def handle_ui_web_files(abspath_or_list): nonlocal this_task this_task = AsyncManager.run_task( coro_consume_files(abspath_or_list, (on_done_consume_inputs, ))) handler_result = {'asynctask_id': this_task.id} return handler_result # wait until get data uploaded import asyncio loop = asyncio.get_event_loop() # block current loop async def coro_simple_wait(timeout=None): while data is None: # IMPROVE: implement timeout. maybe wait_for(this_task) await asyncio.sleep(1) loop.run_until_complete(coro_simple_wait(timeout=None)) pass else: raise ValueError(f"Unsupported data signature: {data_signature}") # TODO: consider shuffle, repeat(epoch), batch(batch_size), prefetch(1) for train/predict, use tf.data.Database # data can be tf.Dataset, np.ndarray, or tuple of them. Do this job in each signature handler. # tf = safe_import_module("tensorflow") # if tf and isinstance(data, tf.data.Dataset): # if params_data.shuffle.fixed_seed: # data.shuffle(buffer_size=10000, seed=params_data.shuffle.fixed_seed) return data
def _slot_Clipboard_OnChanged(self): try: # --- Only One Instance can be run if self.IsInCallback and self.IsBlockInCallback: return self.IsInCallback = True # --- Check Captured Data tMimeData: QtCore.QMimeData = self.QtClipboard.mimeData() # [A] Urls --- if len(tMimeData.urls()) > 0: # --- Get List tPathList = [] for tUrl in tMimeData.urls(): tUrl: QtCore.QUrl = tUrl tPath: str = tUrl.toLocalFile() # tPath = tPath.replace("/", "\\") # os compatibility tPath = tPath.replace(r'\/'.replace(os.sep, ''), os.sep) # Check exist if (not os.path.isfile(tPath)) and (not os.path.isdir(tPath)): continue tPathList.append(tPath) # --- Distribute Message if "Path_File" in self.Mode: for iPath in tPathList: if os.path.isfile(iPath): self.CallResults = self.Callback("Path_File", iPath) if "PathList" in self.Mode: self.CallResults = self.Callback("PathList", tPathList) if "Path" in self.Mode: for tPath in tPathList: self.CallResults = self.Callback("Path", tPath) if "Image" in self.Mode: # --- CV Import from .. import OpenCV as LuPy_Cv2 for tPath in tPathList: tExt: str = os.path.splitext(tPath)[-1] if tExt.lower() in [".bmp", ".jpg", ".png"]: tMat = LuPy_Cv2.safe_imread(tPath, -1) self.CallResults = self.Callback("Image", tMat) # [B] Image Data if tMimeData.hasImage(): tQImage = tMimeData.imageData() # RGB32 with 0xffRRGGBB ===> BGRA in np tPtr = tQImage.constBits() tPtr.setsize(tQImage.byteCount()) tMat = np.ndarray(buffer=tPtr, shape=[tQImage.height(), tQImage.width(), 4], dtype=np.uint8) # --- XOR if "Screen" in self.Mode: self.CallResults = self.Callback("Image", tMat) elif "Image" in self.Mode: self.CallResults = self.Callback("Image", tMat) except Exception as e: WARN(f'Exception during clipboard event handling: {e}') finally: self.IsInCallback = False if self.IsRunOnce: self.Stop() pass
def _slot_Clipboard_OnChanged(self): try: # --- Only One Instance can be run if self.IsInCallback and self.IsBlockInCallback: return self.IsInCallback = True # --- Check Captured Data tMimeData: QtCore.QMimeData = self.QtClipboard.mimeData() # [A] Urls --- if len(tMimeData.urls()) > 0: # --- Get List tPathList = [] for tUrl in tMimeData.urls(): tUrl: QtCore.QUrl = tUrl tPath: str = tUrl.toLocalFile() tPath = tPath.replace(r'\/'.replace(os.sep, ''), os.sep) # Check exist if (not os.path.isfile(tPath)) and ( not os.path.isdir(tPath)): continue tPathList.append(tPath) # --- Distribute Message if len(tPathList) > 0: if "Path_File" in self.Mode: for tPath in tPathList: if os.path.isfile(tPath): self.CallResults = self.Callback( "Path_File", tPath) if "PathList" in self.Mode: self.CallResults = self.Callback("PathList", tPathList) if "Path" in self.Mode: for tPath in tPathList: self.CallResults = self.Callback("Path", tPath) # [B] Image Data if tMimeData.hasImage(): tQImage = tMimeData.imageData( ) # RGB32 with 0xffRRGGBB ===> BGRA in np tPtr = tQImage.constBits() tPtr.setsize(tQImage.byteCount()) tMat = np.ndarray(buffer=tPtr, shape=[tQImage.height(), tQImage.width(), 4], dtype=np.uint8) if "Image" in self.Mode: self.CallResults = self.Callback("Image", tMat) except Exception as e: WARN(f'Exception during clipboard event handling: {e}') finally: self.IsInCallback = False if self.IsRunOnce and self.CallResults is not self.__class__.FLAG_NO_RESULT: self.stop() # only stop after a successful handling pass
def model_predict(model: object, data: object, **params) -> object: params_predict = Params(decode_prediction=Params({}), show_result=Params({})).update_to(params) predictions = None x, y = ModelManager._validate_input(data) import numpy as np import tensorflow as tf # IMPROVE: check availability of ml backends # wrapper for different model types def _predict(inputs): # NOTE: core API for prediction if isinstance(model, tf.keras.Model): # NOTE: if x is ndarray, result will be ndarray too return model.predict(inputs) elif callable(model): # type(model).__name__ == "tensorflow.python.eager.wrap_function.WrappedFunction" input_spec = Params(input_num=None).left_join(params_predict) params = {} # IMPROVE: judge the base class of model, to append required params if model.__module__.startswith("modules.models.tensorlayer"): params.update({'is_train': False}) if isinstance(inputs, tf.data.Dataset): # TODO: specify InputSpec (inputs element_spec) for prediction if input_spec.input_num is None: pass elif isinstance(input_spec.input_num, int): assert input_spec.input_num > 0, "input_num must > 0" # TODO: more test cases needed # inputs.batch(input_spec.input_num) # result = [] # assert iterable(inputs) # for batch in inputs: # inputs_list = [_ for _ in batch] # inputs_list = inputs_list[0] if len(inputs_list) == 1 else inputs_list # result.append(model(inputs_list, **params)) # # return tf.stack(result) # return result[0] if len(result) == 1 else None if len(result) == 0 else result if input_spec.input_num > 1: inputs = inputs.batch( input_spec.input_num) # NOTE: headed with a `batch_size` dim by this step # inputs = inputs.unbatch() else: raise ValueError(f'cannot handle input_spec.input_num={input_spec.input_num}') # NOTE: callable model might not support batch feeding. so it's up to caller to constrain the size. result = [] for inputs_ in inputs.as_numpy_iterator(): result.append(model(inputs_, **params)) # NOTE: if input_num > 1 return result[0] if len(result) == 1 else None if len(result) == 0 else result else: result = model(inputs, **params) return result else: raise TypeError(f"Unsupported model type: {type(model)}") predictions = _predict(x) if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank.") return predictions # None if params_predict.decode_prediction.is_defined(): if params_predict.decode_prediction.name == 'logits_to_index': # one-hot array -> index if isinstance(predictions, np.ndarray): predictions = np.argmax(predictions, axis=-1) elif isinstance(predictions, tf.Tensor): predictions = tf.math.argmax(predictions, axis=-1) else: raise TypeError(f"Unsupported type for logits_to_index: {type(predictions)}") elif params_predict.decode_prediction.name == 'logits_to_indices_and_probs': # for retrain, prediction should be a probs array and need to be sorted by `top_k` # NOTE: length of each prediction must be equivalent. top_k = params_predict.decode_prediction.get('top_k', safe_get_len(predictions[0])) # returns: top_values(=probs), top_idxs if isinstance(predictions, np.ndarray): predictions = np_top_k(predictions, top_k) elif isinstance(predictions, tf.Tensor): predictions = tf.math.top_k(input=predictions, k=top_k) else: raise TypeError(f"Unsupported type for logits_to_indices_and_probs: {type(predictions)}") elif params_predict.decode_prediction.name == 'image_denormalize': from modules.data.data_manager import DataManager predictions = DataManager.denormalize(predictions) else: raise ValueError( f"Unsupported result decoding: {params_predict.decode_prediction.name}") if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank (after decoding).") return None if params_predict.show_result.is_defined(): if isinstance(predictions, np.ndarray): # IMPROVE: support `show_result.inputs_type/outputs_type` e.g.'images''features''label_indexes' x_show, p_show, y_show = x, predictions, y # NOTE: y(=label) is optional (default:None) if params_predict.show_result.only_difference: if hasattr(y_show, '__len__'): if p_show.__len__() == y_show.__len__(): differences = p_show != y_show x_show, p_show, y_show = x_show[differences], p_show[differences], y_show[differences] else: WARN(f"Cannot dump differences: len of targets is not same as predictions" f"({y_show.__len__()} vs {p_show.__len__()})") else: WARN(f"Cannot dump differences: unsupported y type(={type(y_show)})") INFO(f"Number of mismatch between prediction and truth: {len(p_show)}") if params_predict.show_result.get('top_k', None) is not None: top_k = params_predict.show_result.top_k # TODO: sorting? 1.use tf.math.top_k 2.diff algorithm need to be specified x_show, p_show, y_show = (safe_slice(_, end=top_k) for _ in (x_show, p_show, y_show)) if len(p_show) > 0: dumps = [] for i, p in enumerate(p_show): if not hasattr(y_show, '__len__') or y_show.__len__() <= i: dumps.append(f"{p}") else: dumps.append(f"({p} vs {y_show[i]})") need_to_show = params_predict.show_result.plotter.__len__() > 0 need_to_save = params_predict.show_result.save_path.__len__() > 0 only_save = params_predict.show_result.only_save if need_to_show or need_to_save: # IMPROVE: use signature to match normalize and `denormalize` routines from modules.data.data_manager import DataManager if hasattr(x_show, "dtype") and x_show.dtype.name.startswith('float'): x_show = DataManager.denormalize(x_show) elif hasattr(x_show, "element_spec") and \ hasattr(x_show.element_spec, "dtype") and x_show.element_spec.dtype.name.startswith('float'): x_show = x_show.map(DataManager.denormalize) save_dir, save_paths = None, None if need_to_save: save_dir = path_possibly_formatted(params_predict.show_result.save_path) # save_paths = [osp.join(save_dir, _+'.jpg') for _ in dumps] if params_predict.show_result.plotter == "matplot": onlysave_path = None if only_save: if need_to_save: from helpers.util import tmp_filename_by_time onlysave_path = osp.join(save_dir, tmp_filename_by_time('jpg')) need_to_save = False else: WARN('only_save is true, but save_path is not specified. ignored') show_image_mats(x_show, texts=dumps, title="Predictions", onlysave_path=onlysave_path) else: INFO(f"Predictions{'(only diff)' if 'differences' in vars() else ''}: " + ", ".join(dumps)) # if need_to_save: # save_image_mats(x_show, save_paths) else: top_k = params_predict.show_result.top_k INFO(f"Predictions(top{top_k}): {safe_slice(predictions, end=top_k)}") return predictions