def replace_graph_def_of_saved_model(input_model, output_model, graph_def): model_variables_dir = os.path.join(input_model, 'variables') if not os.path.exists(output_model): os.makedirs(output_model) export_variables_dir = os.path.join(output_model, 'variables') export_saved_model = os.path.join(output_model, 'saved_model.pb') checkpoint_file = os.path.join(export_variables_dir, 'checkpoint') if not os.path.exists(export_variables_dir): os.makedirs(export_variables_dir) with open(checkpoint_file, 'w') as f: f.write("model_checkpoint_path: \"variables\"\n") from tensorflow.python.saved_model import loader_impl saved_model = loader_impl.parse_saved_model(input_model) meta_graph = saved_model.meta_graphs[0] # not all saved model have variables try: with tf.compat.v1.Session(graph=tf.Graph()) as sess: loaded = tf.compat.v1.saved_model.loader.load( sess, ["serve"], input_model) # sess.run('init_all_tables') saver = tf.compat.v1.train.Saver() saver.save(sess, os.path.join(export_variables_dir, 'variables'), write_meta_graph=False, write_state=False) except: logger.info('no variables in the saved model') meta_graph.graph_def.CopyFrom(graph_def) from tensorflow.python.lib.io import file_io file_io.write_string_to_file(export_saved_model, saved_model.SerializeToString())
def save(self, root=None): if not root: root = cfg.default_workspace root = os.path.abspath(os.path.expanduser(root)) if os.path.exists(root): import shutil shutil.rmtree(root) os.makedirs(root, exist_ok=True) from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import tag_constants from lpot.adaptor.tf_utils.util import get_tensor_by_name builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(root) sigs = {} with tf.compat.v1.Session(graph=tf.Graph()) as sess: tf.import_graph_def(self.sess.graph.as_graph_def(), name="") g = tf.compat.v1.get_default_graph() inp = [get_tensor_by_name(g, x) for x in self._input_tensor_names] out = [get_tensor_by_name(g, x) for x in self._output_tensor_names] sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \ tf.compat.v1.saved_model.signature_def_utils.predict_signature_def( {k: v for k, v in zip(self._input_tensor_names, inp)}, {k: v for k, v in zip(self._output_tensor_names, out)}) builder.add_meta_graph_and_variables(sess, [tag_constants.SERVING], signature_def_map=sigs) builder.save() logger.info("Save quantized model at %s" % root)
def get_estimator_graph(estimator, input_fn): with tf.Graph().as_default() as g: features, input_hooks = estimator._get_features_from_input_fn( input_fn, tf.estimator.ModeKeys.PREDICT) estimator_spec = estimator._call_model_fn( features, None, tf.estimator.ModeKeys.PREDICT, estimator.config) outputs = [tensor.name for tensor in estimator_spec.predictions.values()] if\ isinstance(estimator_spec.predictions, dict) else \ [estimator_spec.predictions.name] logger.info('estimator output tensor names is {}'.format(outputs)) with tf.compat.v1.Session(graph=g) as sess: sess.run(tf.compat.v1.global_variables_initializer()) # Freezing a graph requires output_node_names, which can be found in # estimator_spec.predictions that contains prediction tensors as a # dictionary # When a model uses Iterator, we need to have 'MakeIterator' (default # name used by TF) in the output_node_names as well. output_nodes = list( set([output.split(':')[0] for output in outputs])) if 'MakeIterator' in [node.op for node in g.as_graph_def().node]: output_nodes.append('MakeIterator') graph_def = tf.compat.v1.graph_util.convert_variables_to_constants( sess, g.as_graph_def(), output_nodes) graph = tf.Graph() with graph.as_default(): tf.import_graph_def(graph_def, name='') return graph
def save(self, path): path = os.path.expanduser(path) os.makedirs(path, exist_ok=True) try: with open(os.path.join(path, "best_configure.json"), 'w') as f: json.dump(self.tune_cfg, f) logger.info("save config file of quantized model to path %s" % path) except IOError as e: logger.error("Unable to save configure file and weights. %s" % e)
def validate_graph_node(graph_def, node_names): if len(node_names) == 0: return False all_node_name = [node.name for node in graph_def.node] for user_input_name in node_names: if user_input_name not in all_node_name: logger.info("Input node name {} doesn't exist in the model, \ please check the yaml.".format(user_input_name)) return False return True
def save(self, root=None): if not root: root = cfg.default_workspace + '/save.pb' root = os.path.abspath(os.path.expanduser(root)) # if not have suffix, default append .pb os.makedirs(os.path.dirname(root), exist_ok=True) pb_file = root if os.path.split(root)[-1].endswith( '.pb') else root + '.pb' f = tf.io.gfile.GFile(pb_file, 'wb') f.write(self.graph_def.SerializeToString()) logger.info("Save quantized model at %s" % pb_file)
def save(self, root=None): if not root: root = cfg.default_workspace root = os.path.abspath(os.path.expanduser(root)) os.makedirs(root, exist_ok=True) try: with open(os.path.join(root, "best_configure.json"), 'w') as f: json.dump(self.tune_cfg, f) logger.info("Save config file of quantized model at %s" % root) except IOError as e: logger.error("Unable to save configure file and weights. %s" % e)
def save(self, root=None): if not root: root = cfg.default_workspace root = os.path.abspath(os.path.expanduser(root)) os.makedirs(root, exist_ok=True) assert root != self._model, 'saved location should be different with original saved ' \ 'model path' import shutil file_names = os.listdir(self._model) for f in file_names: shutil.move(os.path.join(self._model, f), root) logger.info("Save quantized model at %s" % root)
def LoadAnnotations(self, annotations): """Load annotations dictionary into COCO datastructure. See http://mscoco.org/dataset/#format for a description of the annotations format. As above, this function replicates the default behavior of the API but does not require writing to external storage. Args: annotations: python list holding object detection results where each detection is encoded as a dict with required keys ['image_id', 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on `detection_type`. Returns: a coco.COCO datastructure holding object detection annotations results Raises: ValueError: if annotations is not a list ValueError: if annotations do not correspond to the images contained in self. """ results = coco.COCO() results.dataset['images'] = [img for img in self.dataset['images']] logger.info( 'Loading and preparing annotation results...') tic = time.time() if not isinstance(annotations, list): raise ValueError('annotations is not a list of objects') annotation_img_ids = [ann['image_id'] for ann in annotations] if (set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds()))): raise ValueError('Results do not correspond to current coco set') results.dataset['categories'] = copy.deepcopy( self.dataset['categories']) if self._detection_type == 'bbox': for idx, ann in enumerate(annotations): bb = ann['bbox'] ann['area'] = bb[2] * bb[3] ann['id'] = idx + 1 ann['iscrowd'] = 0 elif self._detection_type == 'segmentation': for idx, ann in enumerate(annotations): ann['area'] = mask.area(ann['segmentation']) ann['bbox'] = mask.toBbox(ann['segmentation']) ann['id'] = idx + 1 ann['iscrowd'] = 0 logger.info('DONE (t=%0.2fs)', (time.time() - tic)) results.dataset['annotations'] = annotations results.createIndex() return results
def save(self, path): path = os.path.expanduser(path) os.makedirs(path, exist_ok=True) try: with open(os.path.join(path, "best_configure.yaml"), 'w') as f: yaml.dump(self.tune_cfg, f, default_flow_style=False) torch.save(self._model.state_dict(), os.path.join(path, "best_model_weights.pt")) logger.info( "save config file and weights of quantized model to path %s" % path) except IOError as e: logger.error("Unable to save configure file and weights. %s" % e)
def save(self, root=None): if not root: root = cfg.default_workspace root = os.path.abspath(os.path.expanduser(root)) os.makedirs(root, exist_ok=True) try: with open(os.path.join(root, "best_configure.yaml"), 'w') as f: yaml.dump(self.tune_cfg, f, default_flow_style=False) torch.save(self._model.state_dict(), os.path.join(root, "best_model_weights.pt")) logger.info( "Save config file and weights of quantized model at %s" % root) except IOError as e: logger.error("Unable to save configure file and weights. %s" % e)
def estimator_session(model, input_tensor_names, output_tensor_names, **kwargs): """Build session with estimator model Args: model (tf.estimator.Estimator): tf.estimator.Estimator object input_tensor_names (list of string): input_tensor_names of model output_tensor_names (list of string): output_tensor_names of model kwargs (dict): other required parameters, like input_fn Returns: sess (tf.compat.v1.Session): tf.compat.v1.Session object input_tensor_names (list of string): validated input_tensor_names output_tensor_names (list of string): validated output_tensor_names """ assert 'input_fn' in kwargs, 'input func should be supplied for estimator session....' with tf.Graph().as_default() as g: features, input_hooks = model._get_features_from_input_fn( kwargs['input_fn'], tf.estimator.ModeKeys.PREDICT) estimator_spec = model._call_model_fn(features, None, tf.estimator.ModeKeys.PREDICT, model.config) if len(output_tensor_names) == 0: outputs = [tensor.name for tensor in estimator_spec.predictions.values()] if\ isinstance(estimator_spec.predictions, dict) else \ [estimator_spec.predictions.name] else: outputs = output_tensor_names logger.info('estimator output tensor names is {}'.format(outputs)) with tf.compat.v1.Session(graph=g) as sess: sess.run(tf.compat.v1.global_variables_initializer()) # Freezing a graph requires output_node_names, which can be found in # estimator_spec.predictions that contains prediction tensors as a # dictionary # When a model uses Iterator, we need to have 'MakeIterator' (default # name used by TF) in the output_node_names as well. output_nodes = list( set([output.split(':')[0] for output in outputs])) if 'MakeIterator' in [node.op for node in g.as_graph_def().node]: output_nodes.append('MakeIterator') graph_def = tf.compat.v1.graph_util.convert_variables_to_constants( sess, g.as_graph_def(), output_nodes) return graph_def_session(graph_def, input_tensor_names, outputs)
def __call__(self): assert self._q_model, 'set q_model for algorithm' if len(self.algorithms) == 0: return self._q_model assert self._origin_model, 'set origin model for algorithm' assert self._dataloader, 'set dataloader for algorithm' assert self._adaptor, 'set adaptor for algorithm' assert self._calib_iter, 'set calibration iteration for algorithm' logger.info('Start to pass algorithm to model...') for algo in self.algorithms: self._q_model = algo(self._origin_model, self._q_model, \ self._adaptor, \ self._dataloader, \ self._calib_iter) return self._q_model
def _update_best_result(self, best_result_file): if not self.hpopt_trials: raise Exception('No trials loaded to get best result') trials_results = pd.DataFrame(self.hpopt_trials.results) if not trials_results[trials_results.acc_loss <= self.loss_function_config['acc_th']].empty: # If accuracy threshold reached, choose best latency best_result = trials_results[trials_results.acc_loss <= self.loss_function_config['acc_th']] \ .reset_index(drop=True).sort_values(by=['lat_diff', 'acc_loss'], ascending=[False, True]) \ .reset_index(drop=True).loc[0] else: # If accuracy threshold is not reached, choose based on loss function best_result = \ trials_results.sort_values('loss', ascending=True).reset_index(drop=True).loc[0] update_best_result = False if not self.best_result['best_loss']: update_best_result = True elif self.best_result['best_acc_loss'] <= self.loss_function_config[ 'acc_th']: if best_result['acc_loss'] <= self.loss_function_config['acc_th'] \ and best_result['lat_diff'] > self.best_result['best_lat_diff']: update_best_result = True else: if best_result['acc_loss'] <= self.loss_function_config['acc_th'] or \ best_result['loss'] < self.best_result['best_loss']: update_best_result = True if update_best_result: best_result.to_csv(best_result_file, header=False) self.best_result['best_loss'] = best_result['loss'] self.best_result['best_acc_loss'] = best_result['acc_loss'] self.best_result['best_lat_diff'] = best_result['lat_diff'] self.best_result['quantization_ratio'] = best_result[ 'quantization_ratio'] logger.info( 'Trial iteration end: {} / {} best loss: {} acc_loss: {} lat_diff: {} ' 'quantization_ratio: {}'.format( len(self.hpopt_trials.trials), self.max_trials, self.best_result['best_loss'], self.best_result['best_acc_loss'], self.best_result['best_lat_diff'], self.best_result['quantization_ratio']))
def save(self, root=None): if not root: root = cfg.default_workspace root = os.path.abspath(os.path.expanduser(root)) os.makedirs(root, exist_ok=True) if isinstance(self._model, mx.gluon.HybridBlock): self._model.export(root) logger.info('Save quantized hybrid block model at %s' % root) else: symbol, arg_params, aux_params = self._model symbol.save(root + '-symbol.json') save_dict = {('arg:%s' % k): v.as_in_context(mx.cpu()) for k, v in arg_params.items()} save_dict.update(\ {('aux:%s' % k): v.as_in_context(mx.cpu()) for k, v in aux_params.items()}) mx.nd.save(root + '-0000.params', save_dict) logger.info('Save quantized symbol model at %s' % root)
def validate_graph_node(graph_def, node_names): """Validate nodes exist in the graph_def Args: graph_def (tf.compat.v1.GraphDef): tf.compat.v1.GraphDef object node_names (list of string): node names to be validated """ if len(node_names) == 0: return False all_node_name = [node.name for node in graph_def.node] for user_input_name in node_names: if user_input_name not in all_node_name: logger.info( str("Input node name {} doesn't exist in the model, " + "please check the yaml.").format(user_input_name)) return False return True
def object_evaluation(self, tune_cfg, model): # check if config was alredy evaluated op_cfgs = {} op_cfgs['calib_iteration'] = int(self.calib_iter[0]) op_cfgs['op'] = {} for param, configs in tune_cfg.items(): op_cfgs['op'][(param)] = configs history = self._find_history(op_cfgs) if history: self.last_tune_result = history['tune_result'] self.last_qmodel = None self.cfg_evaluated = True logger.info('This tuning config was evaluated!') return history['result'] self.last_qmodel = self.adaptor.quantize(op_cfgs, self.model, self.calib_dataloader) self.last_tune_result = self._evaluate(self.last_qmodel) logger.info('last_tune_result: {}'.format(self.last_tune_result)) saved_tune_cfg = copy.deepcopy(op_cfgs) saved_last_tune_result = copy.deepcopy(self.last_tune_result) # prepare result result = self._compute_metrics(tune_cfg, self.last_tune_result[0], self.last_tune_result[1]) result['source'] = 'tpe' self._add_tuning_history(saved_tune_cfg, saved_last_tune_result, result=result) logger.info( 'Current iteration loss: {} acc_loss: {} lat_diff: {} quantization_ratio: {}' .format(result['loss'], result['acc_loss'], result['lat_diff'], result['quantization_ratio'])) return result
def stop(self, timeout, trials_count): """Check if need to stop traversing the tuning space, either accuracy goal is met or timeout is reach. Returns: bool: True if need stop, otherwise False """ need_stop = False if not self.cfg_evaluated: if self.objective.compare(self.best_tune_result, self.baseline): del self.best_tune_result del self.best_qmodel self.best_tune_result = self.last_tune_result self.best_qmodel = self.last_qmodel self.adaptor.save(self.best_qmodel, os.path.dirname(self.deploy_path)) else: del self.last_qmodel last_tune_msg = '[accuracy: {:.4f}, {}: {:.4f}]'.format(self.last_tune_result[0], str(self.objective.measurer), self.last_tune_result[1]) \ if self.last_tune_result else 'n/a' best_tune_msg = '[accuracy: {:.4f}, {}: {:.4f}]'.format(self.best_tune_result[0], str(self.objective.measurer), self.best_tune_result[1]) \ if self.best_tune_result else 'n/a' logger.info('Tune {} result is: {}, Best tune result is: {}'.format( trials_count, last_tune_msg, best_tune_msg)) if timeout == 0 and self.best_tune_result: need_stop = True elif trials_count >= self.cfg.tuning.exit_policy.max_trials: need_stop = True else: need_stop = False return need_stop
def save(self, root): if os.path.split(root)[0] != '' and not os.path.exists( os.path.split(root)[0]): raise ValueError('"root" directory does not exists.') if isinstance(self._model, mx.gluon.HybridBlock): logger.info("Save MXNet HybridBlock quantization model!") self._model.export(root) logger.info('Saving quantized model at %s' % root) else: symbol, arg_params, aux_params = self._model symbol.save(root + '-symbol.json') save_dict = {('arg:%s' % k): v.as_in_context(mx.cpu()) for k, v in arg_params.items()} save_dict.update(\ {('aux:%s' % k): v.as_in_context(mx.cpu()) for k, v in aux_params.items()}) mx.nd.save(root + '-0000.params', save_dict) logger.info('Saving symbol into file at %s' % root)
import copy import os from pathlib import Path from functools import partial import numpy as np import hyperopt as hpo from hyperopt import fmin, hp, STATUS_OK, Trials from lpot.utils import logger from lpot.strategy.strategy import strategy_registry, TuneStrategy try: import pandas as pd except ImportError: pd = None logger.info( 'Pandas package is required for best result and CSV files generation.') @strategy_registry class TpeTuneStrategy(TuneStrategy): """The tuning strategy using tpe search in tuning space. Args: model (object): The FP32 model specified for low precision tuning. conf (Conf): The Conf class instance initialized from user yaml config file. q_dataloader (generator): Data loader for calibration, mandatory for post-training quantization. It is iterable and should yield a tuple (input, label) for calibration dataset containing label, or yield (input, _) for label-free calibration
def _weight_empirical(self): for node in self.fp32_graph.node: if node.name not in self.fp32_node_mapping: self.fp32_node_mapping[node.name] = node else: self.logger.warning('Duplicate node name {}'.format(node.name)) for node_name in self.node_mapping: node = self.node_mapping[node_name] node_op = node.op if 'QuantizedConv2D' not in node_op: continue int8_filter = self.node_mapping[self.get_node_name_from_input( node.input[1])] int8_value = tensor_util.MakeNdarray( int8_filter.attr['value'].tensor) tr_int8_value = int8_value.transpose([3, 0, 1, 2]) fp32_filter_name = self.get_node_name_from_input( node.input[1]).split('_qint8_const')[0] fp32_filter = self.fp32_node_mapping[fp32_filter_name] fp32_value = tensor_util.MakeNdarray( fp32_filter.attr['value'].tensor) tr_fp32_value = fp32_value.transpose([3, 0, 1, 2]) # if bias fused, then offset to min/max filter should be 5 offset = 5 if 'Bias' in node_op else 4 min_filter_node = self.node_mapping[ node.input[offset]] max_filter_node = self.node_mapping[ node.input[offset + 1]] channel_size = 1 if not min_filter_node.attr[ 'value'].tensor.tensor_shape.dim else min_filter_node.attr[ 'value'].tensor.tensor_shape.dim[0].size if channel_size == 1: max_filter_tensor = [] min_filter_tensor = [] max_filter_tensor.append( (max_filter_node.attr['value'].tensor.float_val)[0]) min_filter_tensor.append( (min_filter_node.attr['value'].tensor.float_val)[0]) else: max_filter_tensor = tensor_util.MakeNdarray( max_filter_node.attr['value'].tensor) min_filter_tensor = tensor_util.MakeNdarray( min_filter_node.attr['value'].tensor) tr_quantized_fp32_value = np.zeros_like(tr_fp32_value) tr_corrected_int8_value = np.zeros_like(tr_int8_value) for i in range(channel_size): scale = max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i])) / 127 tr_quantized_fp32_value[i] = tr_int8_value[i].astype(np.float64) * scale delta_mean = np.mean((tr_fp32_value[i] - tr_quantized_fp32_value[i]).flatten()) var_ratio = np.std(tr_fp32_value[i].flatten()) / \ np.std(tr_quantized_fp32_value[i].flatten()) if \ np.std(tr_quantized_fp32_value[i].flatten()) != 0 else 1 tr_corrected_int8_value[i] = (var_ratio / scale) * (tr_fp32_value[i] + delta_mean) correct_int8_value = tr_int8_value.transpose([1, 2, 3, 0]) assert int8_value.shape == correct_int8_value.shape, \ 'correct filter shape should equal with origin filter shape' bias = int8_value.astype(np.float32) - correct_int8_value.astype(np.float32) if np.sum(bias) != 0 : logger.info('Correct int8 weight....') int8_filter.attr['value'].CopyFrom( attr_value_pb2.AttrValue( tensor=tensor_util.make_tensor_proto( correct_int8_value, dtypes.qint8, int8_value.shape))) return self.input_graph
def traverse(self): """Tpe traverse logic. """ logger.info('Start tpe strategy') # prepare log file trials_file = os.path.join(os.path.dirname(self.history_path), 'tpe_trials.csv') best_result_file = os.path.join(os.path.dirname(self.history_path), 'tpe_best_result.csv') logger.debug('trials_file: {} '.format(trials_file) + \ 'best_result_file:{}'.format(best_result_file)) if Path(trials_file).exists(): os.remove(trials_file) status = True tuning_history = self._find_self_tuning_history() if tuning_history and not self.warm_start: # prepare loss function scaling (best result from basic can be used) best_lat, worse_acc_loss = 0, 0 for history in tuning_history['history']: acc_loss, lat_diff = self._calculate_acc_lat_diff( history['tune_result'][0], history['tune_result'][1]) if lat_diff > best_lat: best_lat = lat_diff if acc_loss > worse_acc_loss: worse_acc_loss = acc_loss self._calculate_loss_function_scaling_components( worse_acc_loss, best_lat, self.loss_function_config) first_run_cfg = self.add_loss_to_tuned_history_and_find_best( tuning_history['history']) # Prepare hpopt config with best cfg from history self._configure_hpopt_search_space_and_params(first_run_cfg) # Run first iteration with best result from history trials_count = len(self.hpopt_trials.trials) + 1 logger.info('First iteration start.') fmin(partial(self.object_evaluation, model=self.model), space=self.hpopt_search_space, algo=self._algo, max_evals=trials_count, trials=self.hpopt_trials, show_progressbar=False) if pd is not None: self._save_trials(trials_file) self._update_best_result(best_result_file) # Prepare full hpopt search space new_tune_cfgs = self._prepare_final_searchspace( first_run_cfg, self.opwise_tune_cfgs) status = self._configure_hpopt_search_space_and_params( new_tune_cfgs) elif not self.warm_start: self._calculate_loss_function_scaling_components( 0.01, 2, self.loss_function_config) status = self._configure_hpopt_search_space_and_params( self.opwise_tune_cfgs) if status: trials_count = len(self.hpopt_trials.trials) + 1 # get fp32 model baseline if self.baseline is None: logger.info('Getting FP32 model baseline...') self.baseline = self._evaluate(self.model) self._add_tuning_history() logger.info('FP32 baseline is: ' + ('[{:.4f}, {:.4f}]'.format( *self.baseline) if self.baseline else 'None')) if not self.objective.relative: self.loss_function_config['acc_th'] =\ (self.baseline[0] - self.objective.acc_goal) / self.baseline[0] # start trials exit = False while not exit: self.cfg_evaluated = False logger.info('Trial iteration start: {} / {}'.format( trials_count, self.max_trials)) fmin(partial(self.object_evaluation, model=self.model), space=self.hpopt_search_space, algo=self._algo, max_evals=trials_count, trials=self.hpopt_trials, show_progressbar=False) trials_count += 1 if pd is not None: self._save_trials(trials_file) self._update_best_result(best_result_file) self._save() if self.stop(self.cfg.tuning.exit_policy.timeout, trials_count): exit = True else: logger.info('Can\'t create search space for input model!')