def preload_gpu_devices(active_indexes: list = None, memory_limit: int = None): global __preloaded_gpu___ if __preloaded_gpu___: return import tensorflow as tf gpus = tf.config.experimental.list_physical_devices('GPU') __preloaded_gpu___ = True if gpus: try: if active_indexes is not None: for index in active_indexes: tf.config.experimental.set_visible_devices(gpus[index], 'GPU') logical_gpus = tf.config.experimental.list_logical_devices('GPU') INFO(f"Num of Physical GPU vs Logical ones: {len(gpus)} vs {len(logical_gpus)}, " f"{len(gpus)-len(logical_gpus)} disabled") if memory_limit is None: tf.config.experimental.set_memory_growth(gpus[0], True) INFO("Physical GPU Memory Growth is turned ON.") else: tf.config.experimental.set_virtual_device_configuration(gpus[0], [ tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memory_limit)]) INFO(f"Physical GPU Memory Growth is limited under: {memory_limit}") except RuntimeError as e: # Visible devices must be set before GPUs have been initialized ERROR(f"Exception during preload_gpu_devices: {e}") else: WARN("No physical GPU available.")
def on_done_consume_inputs(result): """ If using task.set_result, set_exception etc and wait for task instead of data, callbacks will be optional. """ nonlocal data INFO(f'on_done_consume_inputs: {result}') data = result.get('data', None)
def run(self, pCallback: callable, pIsRunOnce: bool = False): """ :param pCallback: func like (pMode:str, pVal:any). pMode is defined in __init__() :param pIsRunOnce: bool, False: run only once """ self.Callback = pCallback self.IsRunOnce = pIsRunOnce # --- Connect & Run INFO("") INFO("/////////////////////////////////////") INFO("// --- Begin Monitor Clipboard --- //") INFO("/////////////////////////////////////") # --- Run Monitor App self.QtApp = QtWidgets.QApplication([]) if self.QtApp is None else self.QtApp self.QtClipboard = self.QtApp.clipboard() self.QtClipboard.dataChanged.connect(self._slot_Clipboard_OnChanged) self.QtApp.exec() return self.CallResults
def run(self, cb: callable, onetime: bool = False): """ :param cb: func like (mode:str, value:any). mode is defined in __init__() :param onetime: bool, False: run only once """ self.Callback = cb self.IsRunOnce = onetime # --- Connect & Run INFO("") INFO("/////////////////////////////////////") INFO("// --- Begin Monitor Clipboard --- //") INFO("/////////////////////////////////////") # --- Run Monitor App self.QtApp = QtWidgets.QApplication( []) if self.QtApp is None else self.QtApp self.QtClipboard = self.QtApp.clipboard() self.QtClipboard.dataChanged.connect(self._slot_Clipboard_OnChanged) self.QtApp.exec() return self.CallResults if self.CallResults is not self.__class__.FLAG_NO_RESULT else None
def preload_gpu_devices(): global __preloaded_gpu___ if __preloaded_gpu___: return import tensorflow as tf gpus = tf.config.experimental.list_physical_devices('GPU') __preloaded_gpu___ = True if gpus: # Restrict TensorFlow to only use the first GPU try: tf.config.experimental.set_memory_growth(gpus[0], True) INFO("Physical GPU Memory Growth is turned ON.") tf.config.experimental.set_visible_devices(gpus[0], 'GPU') logical_gpus = tf.config.experimental.list_logical_devices('GPU') INFO( f"Num of Physical GPUs: {len(gpus)}, Num of Logical GPU: {len(logical_gpus)}" ) except RuntimeError as e: # Visible devices must be set before GPUs have been initialized ERROR(f"Exception during preload_gpu_devices: {e}") else: WARN("No physical GPU available.")
def model_evaluate(model: object, data: object, **params) -> object: eval_metrics = None x_test, y_test = ModelManager._validate_input(data) import tensorflow as tf # IMPROVE: check availability of ml backends if isinstance(model, tf.keras.Model): # NOTE: core API for model evaluation eval_metrics = model.evaluate(x_test, y_test) dumps = [f"{name}={value:8.4}" for name, value in zip(model.metrics_names, eval_metrics)] INFO("Evaluation: " + ", ".join(dumps)) else: raise TypeError(f"Unsupported model type: {type(model)}") return eval_metrics
def model_predict(model: object, data: object, **params) -> object: params_predict = Params( decode_prediction=Params(name='logits_to_index'), show_result=Params(top_k=20, only_difference=True)).update_to(params) predictions = None x, y = ModelManager._validate_input(data) import numpy as np import tensorflow as tf # IMPROVE: check availability of ml backends # wrapper for different model types def _predict(inputs): # NOTE: core API for prediction if isinstance(model, tf.keras.Model): # NOTE: if x is ndarray, result will be ndarray too return model.predict(inputs) elif callable(model): # type(model).__name__ == "tensorflow.python.eager.wrap_function.WrappedFunction" if isinstance(inputs, tf.data.Dataset): # IMPROVE: stack result as a tensor result = [] for t in inputs: result.append(model(t)) return tf.stack(result) else: return model(inputs) else: raise TypeError(f"Unsupported model type: {type(model)}") predictions = _predict(x) if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank.") return None if params_predict.decode_prediction.is_defined(): if params_predict.decode_prediction.name == 'logits_to_index': # one-hot array -> index if isinstance(predictions, np.ndarray): predictions = np.argmax(predictions, axis=-1) elif isinstance(predictions, tf.Tensor): predictions = tf.math.argmax(predictions, axis=-1) else: raise TypeError( f"Unsupported type for logits_to_index: {type(predictions)}" ) elif params_predict.decode_prediction.name == 'logits_to_indices_and_probs': # for retrain, prediction should be a probs array and need to be sorted by `top_k` # NOTE: length of each prediction must be equivalent. top_k = params_predict.decode_prediction.get( 'top_k', safe_get_len(predictions[0])) # returns: top_values(=probs), top_idxs if isinstance(predictions, np.ndarray): predictions = np_top_k(predictions, top_k) elif isinstance(predictions, tf.Tensor): predictions = tf.math.top_k(input=predictions, k=top_k) else: raise TypeError( f"Unsupported type for logits_to_indices_and_probs: {type(predictions)}" ) else: raise ValueError( f"Unsupported result decoding: {params_predict.decode_prediction.name}" ) if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank (after decoding).") return None if params_predict.show_result.is_defined() and isinstance( predictions, np.ndarray): x_show, p_show, y_show = x, predictions, y # NOTE: y(=label) is optional (default:None) if params_predict.show_result.only_difference: if hasattr(y_show, '__len__'): if p_show.__len__() == y_show.__len__(): differences = p_show != y_show x_show, p_show, y_show = x_show[differences], p_show[ differences], y_show[differences] else: WARN( f"Cannot dump differences: len of targets is not same as predictions" f"({y_show.__len__()} vs {p_show.__len__()})") else: WARN( f"Cannot dump differences: unsupported y type(={type(y_show)})" ) INFO( f"Number of mismatch between prediction and truth: {len(p_show)}" ) if params_predict.show_result.get('top_k', None) is not None: top_k = params_predict.show_result.top_k # TODO: sorting? 1.use tf.math.top_k 2.diff algorithm need to be specified x_show, p_show, y_show = (safe_slice(_, end=top_k) for _ in (x_show, p_show, y_show)) if len(p_show) > 0: dumps = [] for i, p in enumerate(p_show): if not hasattr(y_show, '__len__') or y_show.__len__() <= i: dumps.append(f"{p}") else: dumps.append(f"({p} vs {y_show[i]})") need_to_show = params_predict.show_result.plotter.__len__() > 0 need_to_save = params_predict.show_result.save_path.__len__( ) > 0 only_save = params_predict.show_result.only_save if need_to_show or need_to_save: def denormalize(x): x = x * 255 if hasattr(x, 'astype'): # np.ndarray return x.astype(np.int32) else: return tf.cast(x, tf.int32) # tf.Tensor # IMPROVE: use signature to match normalize and `un-normalize` routines if hasattr( x_show, "dtype") and x_show.dtype.name.startswith('float'): x_show = denormalize(x_show) elif hasattr(x_show, "element_spec") and \ hasattr(x_show.element_spec, "dtype") and x_show.element_spec.dtype.name.startswith('float'): x_show = x_show.map(denormalize) save_dir, save_paths = None, None if need_to_save: save_dir = path_possibly_formatted( params_predict.show_result.save_path) # save_paths = [osp.join(save_dir, _+'.jpg') for _ in dumps] if params_predict.show_result.plotter == "matplot": onlysave_path = None if only_save: if need_to_save: from helpers.util import tmp_filename_by_time onlysave_path = osp.join( save_dir, tmp_filename_by_time('jpg')) need_to_save = False else: WARN( 'only_save is true, but save_path is not specified. ignored' ) show_image_mats(x_show, texts=dumps, title="Predictions", onlysave_path=onlysave_path) else: INFO( f"Predictions{'(only diff)' if 'differences' in vars() else ''}: " + ", ".join(dumps)) # if need_to_save: # save_image_mats(x_show, save_paths) else: top_k = params_predict.show_result.top_k INFO( f"Predictions(top{top_k}): {safe_slice(predictions, end=top_k)}" ) return predictions
def model_train(model: object, data: object, **params): """ NOTE: Keras常见陷阱:1.Keras先validation_split再shuffle,因此data中如果是负样本排在最后、宜自行先shuffle :param model: :param data: accept `np.ndarray`, `tf.data.Dataset` or `tf.Tensor`, or a pair of such data if y is available. """ # TODO: confirm if params for tf.Model.compile can be combined with those for tf.Model.fit params_train = Params(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'], validation_split=0.1, epochs=5, batch_size=None, checkpoint=Params(load_weights="latest", save_weights=Params( frequency="epoch", max_to_keep=5)), show_result=Params()).update_to(params) x_train, y_train = ModelManager._validate_input(data) import tensorflow as tf # IMPROVE: check availability of ml backends if isinstance(model, tf.keras.Model): # 1.compile and load variables from checkpoint model.compile( **params_train.fromkeys(['optimizer', 'loss', 'metrics'])) # CKPT signatures: "tf.train.Checkpoint.restore", "tf.keras.Model.load_weights" ckpt_dir, ckpt_path_to_load = None, None if params_train.checkpoint.format == "CKPT_dir": from config import __abspath__ ckpt_dir = path_possibly_formatted( params_train.checkpoint.path) ckpt_dir = __abspath__( ckpt_dir) if not osp.isabs(ckpt_dir) else ckpt_dir ensure_dir_exists(ckpt_dir) ckpt_path_to_load = tf.train.latest_checkpoint(ckpt_dir) # NOTE: 当使用delayed-build模式时,仅当调用build(batch_input_shape)或compile()+fit(x,y,batch_size)后才能确定weights # ref:https://www.tensorflow.org/api_docs/python/tf/keras/Sequential if params_train.checkpoint.load_weights == "latest" \ and params_train.checkpoint.signature == "tf.keras.Model.load_weights" \ and ckpt_path_to_load is not None: model.load_weights(ckpt_path_to_load) # 2.prepare callbacks callbacks = [] # callback :: save medium CKPT if params_train.checkpoint.save_weights.is_defined( ) and ckpt_dir is not None: ckpt_path_to_save = osp.join( ckpt_dir, "ckpt.{epoch:02d}-{val_loss:.2f}") # NOTE: if save_freq is not equal to 'epoch', which means num of steps, it's will be less reliable _params = Params(save_freq='epoch').left_join( params_train.checkpoint.save_weights, key_map={"save_freq": "frequency"}) _callback = tf.keras.callbacks.ModelCheckpoint( ckpt_path_to_save, # not checkpoint_dir save_weights_only=True, save_best_only=True, verbose=1, **_params) callbacks.append(_callback) # callback :: early stop if params_train.early_stop.is_defined(): _params = Params(monitor='val_loss', patience=10).left_join( params_train.early_stop) _callback = tf.keras.callbacks.EarlyStopping(**_params) callbacks.append(_callback) # callback :: progress indicator / verbose # IMPROVE: use config for training verbose / progress indicator # _callback = tf.keras.callbacks.ProgbarLogger(count_mode='steps', stateful_metrics=None) # PyTest时不能实时输出 _callback = tf.keras.callbacks.LambdaCallback( on_batch_end=lambda batch, logs: INFO( f"batch{batch:05d}: loss={logs.get('loss',None):.4f},acc={logs.get('acc',None):.4f}" )) callbacks.append(_callback) cb_batch_stats = None if params_train.collect_batch_stats: # when only train several epochs, may collect stats of each batch instead of the epoch average. class CallbackCollectBatchStats(tf.keras.callbacks.Callback): def __init__(self): self.current_batch = 0 self.batch = [] self.loss = [] self.acc = [] def on_train_batch_end(self, batch, logs=None): self.batch.append(self.current_batch) self.loss.append(logs['loss']) self.acc.append(logs['acc']) self.model.reset_metrics() self.current_batch += 1 cb_batch_stats = CallbackCollectBatchStats( ) # TODO: can plot batch_lsses and batch_acc using this callbacks.append(cb_batch_stats) if len(callbacks) == 0: callbacks = None # 3.train the model, and save checkpoints if configured # TODO: use model.fit_generator() for batch feeding. `steps_per_epoch` = np.ceil(samples / param.batch_size) # NOTE: core API for model training params_train_fit = params_train.fromkeys( ['validation_split', 'batch_size', 'epochs']) INFO(f"Beginning to train: {params_train_fit}") history = model.fit(x_train, y_train, **params_train_fit, callbacks=callbacks) # == core == if cb_batch_stats is not None: history.history[ 'batch'] = cb_batch_stats.batch # accumulated batch number through epoches history.history['batch_loss'] = cb_batch_stats.loss history.history['batch_acc'] = cb_batch_stats.acc # 4.save checkpiont at last if params_train.save_model.is_defined() and ckpt_dir is not None: _params = Params(format="SavedModel").left_join( params_train.save_model) save_format, ckpt_path_to_save = None, None if _params.format == "HDF5": save_format = _ext = "h5" ckpt_path_to_save = osp.join(ckpt_dir, f"model_trained.{_ext}") else: # default=SavedModel save_format = "tf" ckpt_path_to_save = osp.join(ckpt_dir, f"model_trained") ensure_dir_exists(ckpt_path_to_save) # IMPROVE: consider using tf.saved_model.save() model.save(ckpt_path_to_save, save_format=save_format ) # by default, TF2 saves as 'tf' (SavedModel) # Optional: output history if params_train.show_result.is_defined(): plot_history = None if params_train.show_result.plotter == 'matplot': from helpers.plt_helper import plot_history_by_metrics as plot_history if params_train.show_result.plotter.__len__( ) > 0 and plot_history is None: WARN( f"Unsupported history plotter: {params_train.show_result.plotter}" ) if plot_history is not None: plot_history(history, params_train.show_result.get('metrics', None)) else: # TODO: check this section hist = history.history INFO( f"Last epoch: " f"ACC(train,val)=({hist['accuracy'][-1]}, {hist['val_accuracy'][-1]}), " f"MSE(train,val)=({hist['mse'][-1]}, {hist['val_mse'][-1]})" ) else: raise TypeError(f"Unsupported model type: {type(model)}") return model
def load_model(model_signature: str, **params) -> object: """ NOTE: Keras常见陷阱:1.TF卷积核与Theano卷积核shape相同,加载时需用测试样本验证其表现,Keras无法区别 :param model_signature: :param params: """ model = None inputs, outputs = {}, {} # {name: shape} dicts if model_signature == _ModelSignature.TFSavedModel.signature: import tensorflow as tf # IMPROVE: check availability of ml backends # format_ = ModelManager._validate_format(params['format'], _ModelSignature.TFSavedModel) path = ModelManager._validate_path(params.get('path', None)) model = tf.saved_model.load(path, params.get('tags', None)) # == core == if params.get('signature_', None) is not None: model = model.signatures[params['signature_']] # TODO: append inputs, outputs spec to model object? so that predict() can adapt the fed inputs if hasattr(model, 'inputs') and hasattr(model, 'structured_outpus'): inputs = {model.inputs[0].name: model.inputs[0].shape} outputs = { 'default': model.structured_outputs['default'] } # IMPROVE: iterate pass elif model_signature == _ModelSignature.TFHub_KerasLayer.signature: import tensorflow_hub as tf_hub # format_ = ModelManager._validate_format(params['format'], _ModelSignature.TFSavedModel) path = ModelManager._validate_path(params.get('path', None)) params_model = Params(input_shape=None, trainable=False).update_to(params) if params_model.input_shape.__len__() == 4: params_model.input_shape = params_model.input_shape[1:] # NOTE: it will be delayed-build pattern when `input_shape` is None. no weights info available until build. model = tf_hub.KerasLayer(path, input_shape=params_model.input_shape) model.trainable = params_model.trainable pass elif model_signature == _ModelSignature.KerasSequential.signature: # IMPROVE: check availability of ml backends from tensorflow.keras import Sequential, layers name = params['name'] # IMPROVE:parse name -> layers, or use structural config for iteration if name == '{conv-pool}*2-flat-dense-drop-dense': # NOTE: only for _test_\TF_1x_to_2x_3, output is len=10 logits model = Sequential([ # NOTE: 1.TF2.x已无需限定Input层的维度,甚至各层间都能自动衔接 # 2.Conv层中无需设定上一层的(h,w),只需设定filter数、kernel维度、padding(使h,w保持)等 # 3.但若不指定input_shape,Optimizer将无法加载其之前被保存的参数,只能重新初始化 layers.Conv2D(32, (5, 5), strides=(1, 1), padding='same', activation='relu'), layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), layers.Conv2D(64, (5, 5), strides=(1, 1), padding='same', activation='relu'), layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), layers.Flatten(), # 下面的神经网络需要1维的数据 layers.Dense(1024, activation='relu'), layers.Dropout(0.5), # TODO: 关闭Dropout @evluate,predict layers.Dense(10, activation='softmax') ]) elif name == 'dense-dense_softmax': params_model = Params(embedding_size=1024, class_count=None).update_to(params) if params_model.class_count is None: raise ValueError('class_count must be specified') model = Sequential([ layers.Dense(params_model.embedding_size, activation='relu'), layers.Dense(params_model.class_count, activation='softmax') ]) # TODO: need to return intermediate tf.Tensor required by embedding, loss calculation and evaluation. else: raise ValueError(f"Undefined model: {name}") pass elif model_signature == _ModelSignature.KerasModels_LoadModel.signature: import tensorflow as tf # IMPROVE: check availability of ml backends format_ = ModelManager._validate_format( params['format'], _ModelSignature.KerasModels_LoadModel) params_model = Params(path='', path_formatted='').update_to(params) path = ModelManager._validate_path(params_model.path) model = tf.keras.models.load_model(path) # == core == elif model_signature == _ModelSignature.TF_ImportGraphDef.signature: import tensorflow as tf # IMPROVE: check availability of ml backends format_ = ModelManager._validate_format( params['format'], _ModelSignature.TF_ImportGraphDef) params_model = Params(inputs='', outputs='').update_to(params) path = ModelManager._validate_path(params_model.path) # import PB model (frozen) in TF2.x. ref:https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt # ref:https://www.tensorflow.org/api_docs/python/tf/compat/v1/wrap_function def wrap_frozen_graph(pb_path, inputs, outputs, prefix=""): def _imports_graph_def(): tf.compat.v1.import_graph_def( graph_def, name=prefix) # turn off the default prefix "import/" graph_def = tf.compat.v1.GraphDef() loaded = graph_def.ParseFromString(open( pb_path, 'rb').read()) # == core == wrapped_import = tf.compat.v1.wrap_function( _imports_graph_def, []) # == core == import_graph = wrapped_import.graph return wrapped_import.prune( tf.nest.map_structure(import_graph.as_graph_element, inputs), tf.nest.map_structure(import_graph.as_graph_element, outputs)) model = wrap_frozen_graph(path, inputs=params_model.inputs, outputs=params_model.outputs) test_img = tf.ones( [1, 224, 224, 3], dtype=tf.float32) # fixed shape is for test ONLY DEBUG(f"wrap_func test result: {model(test_img).shape}") else: raise ValueError(f"Unsupported model signature: {model_signature}") INFO(f"type of loaded model={type(model)}") INFO(f" inputs={inputs}, outputs={outputs}") return model
def load_data(data_signature: str, category="all", meta_info=None, **params) -> object: """ :param data_signature: :param category: 'train', 'test' or 'all' :param meta_info: if given as a dict, caller may get meta info of the dataset through it :param params: :return: if `category`='all', 'train' and 'test' dataset will be returned as a tuple """ data = None params_data = Params(timeout=0, need_shuffle=False, shuffle_seed=None, test_split=0.2, decode_x=Params(colormode=None, resize_w=None, resize_h=None, preserve_aspect_ratio=True, normalize=True, reshape=None), decode_y=Params()).update_to(params) if data_signature == _DataSignature.LabeledFolders.signature: params_data = Params( file_exts=['jpg'], labels_ordered_in_train=None).update_to(params_data) import modules.data.dataset_labeled_folders as dataset_labeled_folders # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.LabeledFolders) path = DataManager._validate_path(params_data.path) ds = dataset_labeled_folders.dataset(path, category=category, meta_info=meta_info, **params_data) DEBUG(f"loaded tf.data.Dataset: {ds}") data = ds elif data_signature == _DataSignature.TFKerasDataset.signature: # TODO: extract as modules.data.dataset_tf_keras_dataset :: dataset(name, **params) from importlib import import_module # format_ = DataManager._validate_format(kwargs['format'], _DataSignature.TFKerasDataset) lib_dataset = import_module( f"tensorflow.keras.datasets.{params_data.name}") (x_train, y_train), (x_test, y_test) = lib_dataset.load_data() # Tensors WARN( f"Keras dataset {params_data.name} loaded as is. Ignored configs: colormode, resize_w/h, preserve_aspect_ratio" ) if params_data.decode_x.normalize: x_train, x_test = x_train / 255.0, x_test / 255.0 if params_data.decode_x.reshape.__len__() > 0: # TODO: decode_x reshape means image reshape, not matrix reshape x_train = x_train.reshape(params_data.decode_x.reshape) x_test = x_test.reshape(params_data.decode_x.reshape) DEBUG(f"loaded data: y_train={y_train}, y_test={y_test}") if category == 'all': data = ((x_train, y_train), (x_test, y_test)) elif category == 'train': data = (x_train, y_train) elif category == 'test': data = (x_test, y_test) else: raise ValueError(f"Unknown category: {category}") # IGNORED: meta_info returns no value. test_split has no use. fixed_seed not used. elif data_signature == _DataSignature.SingleFile.signature: path = DataManager._validate_path(params_data.path) params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) data = DataManager._process_files(path, **params_decode) elif data_signature == _DataSignature.UI_Copy_Files.signature: params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) def _process(event_type, abspath_or_list): nonlocal data INFO(f"clipboard event: path={abspath_or_list}") data = DataManager._process_files(abspath_or_list, **params_decode) from helpers.qt_helper import ClipboardMonitor monitor_type = "Path_File" if params_data.format == "Path" else "PathList" # NOTE: use AsyncTask to impl async clipboard monitoring loop. # data = ClipboardMonitor([monitor_type]).run(_process, True) #<- will get blank result on a fault copy from async_ import AsyncLoop, AsyncManager async def coro_clipboard_monitor(): ClipboardMonitor([monitor_type]).run(_process, onetime=True) task = AsyncManager.run_task(coro_clipboard_monitor(), loop=None) # block current loop DEBUG( f"[input_loop] monitoring clipboard with type {monitor_type} ..." ) # wait until task done TODO: impl a context_manager for simple await import asyncio loop = asyncio.get_event_loop() # block current loop async def coro_simple_wait(timeout=None): while data is None: # IMPROVE: implement timeout. maybe wait_for(this_task) await asyncio.sleep(1) loop.run_until_complete(coro_simple_wait(timeout=None)) elif data_signature == _DataSignature.UI_Web_Files.signature: # path = DataManager._validate_path(params_data.path) params_decode = Params(encoding='jpg', colormode=None, reshape=None, preserve_aspect_ratio=True, color_transform=None, normalize=True).left_join( params_data.decode_x) data = None webapp = ensure_web_app( ) # will load config from Path.DeployConfigAbs INFO( f'waiting for data input from web app {webapp.host}:{webapp.port}' ) # IMPROVE: hint upload url from async_ import AsyncLoop, AsyncManager, amend_blank_cbs from helpers.util import track_entry_and_exit, load_image_mat, async_show_image_mats import asyncio this_task: asyncio.Task or None = None @track_entry_and_exit.coro() async def coro_consume_files(abspath_or_list, cbs): # nonlocal this_task # assert this_task is not None, '`this_task` should have been assigned before entering related coro.' import modules.data.decode_tf as decode_tf import tensorflow as tf DEBUG(f'[coro_consume_inputs]: {locals()}') on_done, on_succeeded, on_failed, on_progress = amend_blank_cbs( cbs) filepaths = abspath_or_list if isinstance( abspath_or_list, list) else [abspath_or_list] result = { } # data: tf.data.Dataset::{image_t}, error: optional(str) # from helpers.tf_helper import image_example # IMPROVE: try to use TFRecordDataset.from_tensors([tf_example]) data = DataManager._process_files(filepaths, **params_decode) result.update({'data': data}) # # if show inputs # try: # asynctask = async_show_image_mats(image_mats) # result.update({'asynctask_id': asynctask.id}) # except Exception as e: # result.update({'error': e.__repr__()}) on_done(result) # TODO: how to link to the next task (e.g. model.predict) so user can monitor process. return result # == this_task.set_result(result) def on_done_consume_inputs(result): """ If using task.set_result, set_exception etc and wait for task instead of data, callbacks will be optional. """ nonlocal data INFO(f'on_done_consume_inputs: {result}') data = result.get('data', None) @webapp.on_uploads(namespace="data_manager::ui_web_files", onetime=True) def handle_ui_web_files(abspath_or_list): nonlocal this_task this_task = AsyncManager.run_task( coro_consume_files(abspath_or_list, (on_done_consume_inputs, ))) handler_result = {'asynctask_id': this_task.id} return handler_result # wait until get data uploaded import asyncio loop = asyncio.get_event_loop() # block current loop async def coro_simple_wait(timeout=None): while data is None: # IMPROVE: implement timeout. maybe wait_for(this_task) await asyncio.sleep(1) loop.run_until_complete(coro_simple_wait(timeout=None)) pass else: raise ValueError(f"Unsupported data signature: {data_signature}") # TODO: consider shuffle, repeat(epoch), batch(batch_size), prefetch(1) for train/predict, use tf.data.Database # data can be tf.Dataset, np.ndarray, or tuple of them. Do this job in each signature handler. # tf = safe_import_module("tensorflow") # if tf and isinstance(data, tf.data.Dataset): # if params_data.shuffle.fixed_seed: # data.shuffle(buffer_size=10000, seed=params_data.shuffle.fixed_seed) return data
def _process(event_type, abspath_or_list): nonlocal data INFO(f"clipboard event: path={abspath_or_list}") data = DataManager._process_files(abspath_or_list, **params_decode)
def upload_create_or_update(filename_to_update=None, key="file"): """ :return: json object. contains relative filename on success, and error message on failure. """ # redirect_url = request.values.get('redirect', default=request.url, type=str) # 1. request -> files(data) -> local uploads folder + json response(error+filename) # Accept multiple files # file = request.files[key] files = request.files.getlist(key) if files is None or len(files) == 0: ret = {'error': 'no file part found in multipart/form-data'} return str(json.dumps(ret)), 400, RESPONSE_JSON_ACCESSCONTROL # NOTE: use [] * len(..) carefully.. it just do soft copy. use `for` instead. ret = [{} for _ in range(len(files)) ] # [{filename: str, error: optional(str)}] dispatch_arg = [] error_count = 0 for idx, file in enumerate(files): if file.filename == "": ret[idx].update({ 'error': "no file name is given or no file selected for uploading" }) error_count += 1 continue # bypass to the next one if file and osp.splitext( file.filename)[1].lower() in ALLOWED_EXTENSIONS: if filename_to_update is None: # TODO: handle chinese filename. str.encode('utf-8')? filepath = secure_filename(file.filename) filepath = get_new_name_if_exists( osp.join(app.config['UPLOAD_FOLDER'], filepath)) else: filepath = osp.join(app.config['UPLOAD_FOLDER'], filename_to_update) if not osp.isabs(filepath): filepath = osp.join(app.root_path, filepath) try: file.save(filepath) # NOTE: overwrite existed one except Exception as e: ret[idx].update( {'error': f"Failed to upload file to {filepath}"}) error_count += 1 continue # bypass to the next one INFO('file uploaded to: ' + filepath) dispatch_arg.append(filepath) ret[idx].update({'filename': osp.basename(filepath)}) else: ret[idx].update({ 'error': f"only accept these image types: {ALLOWED_EXTENSIONS}" }) error_count += 1 continue # bypass to the next one ret = {'uploaded': ret} # 2. dispatch to subscribers of `on_uploads` event if error_count < len(files): # error_count == 0: dispatch_results = app.dispatch_handlers( app.__class__.EventUploads, dispatch_arg if len(dispatch_arg) > 1 else dispatch_arg[0]) # NOTE: multiple inputs can be consumed by once, so results num can be less than inputs num. ret.update({'dispatched:': dispatch_results}) return str(json.dumps(ret)), 200 if error_count < len( files) else 400, RESPONSE_JSON_ACCESSCONTROL
def _process(event_type, abspath_or_list): INFO(f"clipboard event: path={abspath_or_list}") return DataManager._process_files(abspath_or_list, **params_decode)
def model_predict(model: object, data: object, **params) -> object: params_predict = Params(decode_prediction=Params({}), show_result=Params({})).update_to(params) predictions = None x, y = ModelManager._validate_input(data) import numpy as np import tensorflow as tf # IMPROVE: check availability of ml backends # wrapper for different model types def _predict(inputs): # NOTE: core API for prediction if isinstance(model, tf.keras.Model): # NOTE: if x is ndarray, result will be ndarray too return model.predict(inputs) elif callable(model): # type(model).__name__ == "tensorflow.python.eager.wrap_function.WrappedFunction" input_spec = Params(input_num=None).left_join(params_predict) params = {} # IMPROVE: judge the base class of model, to append required params if model.__module__.startswith("modules.models.tensorlayer"): params.update({'is_train': False}) if isinstance(inputs, tf.data.Dataset): # TODO: specify InputSpec (inputs element_spec) for prediction if input_spec.input_num is None: pass elif isinstance(input_spec.input_num, int): assert input_spec.input_num > 0, "input_num must > 0" # TODO: more test cases needed # inputs.batch(input_spec.input_num) # result = [] # assert iterable(inputs) # for batch in inputs: # inputs_list = [_ for _ in batch] # inputs_list = inputs_list[0] if len(inputs_list) == 1 else inputs_list # result.append(model(inputs_list, **params)) # # return tf.stack(result) # return result[0] if len(result) == 1 else None if len(result) == 0 else result if input_spec.input_num > 1: inputs = inputs.batch( input_spec.input_num) # NOTE: headed with a `batch_size` dim by this step # inputs = inputs.unbatch() else: raise ValueError(f'cannot handle input_spec.input_num={input_spec.input_num}') # NOTE: callable model might not support batch feeding. so it's up to caller to constrain the size. result = [] for inputs_ in inputs.as_numpy_iterator(): result.append(model(inputs_, **params)) # NOTE: if input_num > 1 return result[0] if len(result) == 1 else None if len(result) == 0 else result else: result = model(inputs, **params) return result else: raise TypeError(f"Unsupported model type: {type(model)}") predictions = _predict(x) if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank.") return predictions # None if params_predict.decode_prediction.is_defined(): if params_predict.decode_prediction.name == 'logits_to_index': # one-hot array -> index if isinstance(predictions, np.ndarray): predictions = np.argmax(predictions, axis=-1) elif isinstance(predictions, tf.Tensor): predictions = tf.math.argmax(predictions, axis=-1) else: raise TypeError(f"Unsupported type for logits_to_index: {type(predictions)}") elif params_predict.decode_prediction.name == 'logits_to_indices_and_probs': # for retrain, prediction should be a probs array and need to be sorted by `top_k` # NOTE: length of each prediction must be equivalent. top_k = params_predict.decode_prediction.get('top_k', safe_get_len(predictions[0])) # returns: top_values(=probs), top_idxs if isinstance(predictions, np.ndarray): predictions = np_top_k(predictions, top_k) elif isinstance(predictions, tf.Tensor): predictions = tf.math.top_k(input=predictions, k=top_k) else: raise TypeError(f"Unsupported type for logits_to_indices_and_probs: {type(predictions)}") elif params_predict.decode_prediction.name == 'image_denormalize': from modules.data.data_manager import DataManager predictions = DataManager.denormalize(predictions) else: raise ValueError( f"Unsupported result decoding: {params_predict.decode_prediction.name}") if predictions is None or safe_get_len(predictions) == 0: WARN("Predictions is blank (after decoding).") return None if params_predict.show_result.is_defined(): if isinstance(predictions, np.ndarray): # IMPROVE: support `show_result.inputs_type/outputs_type` e.g.'images''features''label_indexes' x_show, p_show, y_show = x, predictions, y # NOTE: y(=label) is optional (default:None) if params_predict.show_result.only_difference: if hasattr(y_show, '__len__'): if p_show.__len__() == y_show.__len__(): differences = p_show != y_show x_show, p_show, y_show = x_show[differences], p_show[differences], y_show[differences] else: WARN(f"Cannot dump differences: len of targets is not same as predictions" f"({y_show.__len__()} vs {p_show.__len__()})") else: WARN(f"Cannot dump differences: unsupported y type(={type(y_show)})") INFO(f"Number of mismatch between prediction and truth: {len(p_show)}") if params_predict.show_result.get('top_k', None) is not None: top_k = params_predict.show_result.top_k # TODO: sorting? 1.use tf.math.top_k 2.diff algorithm need to be specified x_show, p_show, y_show = (safe_slice(_, end=top_k) for _ in (x_show, p_show, y_show)) if len(p_show) > 0: dumps = [] for i, p in enumerate(p_show): if not hasattr(y_show, '__len__') or y_show.__len__() <= i: dumps.append(f"{p}") else: dumps.append(f"({p} vs {y_show[i]})") need_to_show = params_predict.show_result.plotter.__len__() > 0 need_to_save = params_predict.show_result.save_path.__len__() > 0 only_save = params_predict.show_result.only_save if need_to_show or need_to_save: # IMPROVE: use signature to match normalize and `denormalize` routines from modules.data.data_manager import DataManager if hasattr(x_show, "dtype") and x_show.dtype.name.startswith('float'): x_show = DataManager.denormalize(x_show) elif hasattr(x_show, "element_spec") and \ hasattr(x_show.element_spec, "dtype") and x_show.element_spec.dtype.name.startswith('float'): x_show = x_show.map(DataManager.denormalize) save_dir, save_paths = None, None if need_to_save: save_dir = path_possibly_formatted(params_predict.show_result.save_path) # save_paths = [osp.join(save_dir, _+'.jpg') for _ in dumps] if params_predict.show_result.plotter == "matplot": onlysave_path = None if only_save: if need_to_save: from helpers.util import tmp_filename_by_time onlysave_path = osp.join(save_dir, tmp_filename_by_time('jpg')) need_to_save = False else: WARN('only_save is true, but save_path is not specified. ignored') show_image_mats(x_show, texts=dumps, title="Predictions", onlysave_path=onlysave_path) else: INFO(f"Predictions{'(only diff)' if 'differences' in vars() else ''}: " + ", ".join(dumps)) # if need_to_save: # save_image_mats(x_show, save_paths) else: top_k = params_predict.show_result.top_k INFO(f"Predictions(top{top_k}): {safe_slice(predictions, end=top_k)}") return predictions