def __init__(self, xgraph, inputs_func, work_dir=os.path.join(os.getcwd(), 'work')): super(ExternalQuantizer, self).__init__(xgraph, inputs_func, work_dir) self.gen = TfGenerator() self.partition_graphs = {} self.res = {} self.q_output = QuantizerOutput(name=xgraph.get_name())
def __init__(self, xgraph, inputs_func, work_dir=os.path.join(os.getcwd(), 'work'), quant_iter=1, **kwargs): super(DECENTQuantizer, self).__init__(xgraph, inputs_func, work_dir) self.quant_iter = quant_iter self.gen = TfGenerator() self.partition_graphs = {} self.res = {} self.kwargs = kwargs self.q_output = QuantizerOutput(name=xgraph.get_name())
class ExternalQuantizer(XGraphBaseSubgraphQuantizer, ABC): xgraph_factory = XGraphFactory() xgraph_partitioner = XGraphPartitioner() def __init__(self, xgraph, inputs_func, work_dir=os.path.join(os.getcwd(), 'work')): super(ExternalQuantizer, self).__init__(xgraph, inputs_func, work_dir) self.gen = TfGenerator() self.partition_graphs = {} self.res = {} self.q_output = QuantizerOutput(name=xgraph.get_name()) def _propagate_quant_info(self, xgraph): # setup empty vqi and vqo for every layer w/o vai_quant for layer in xgraph.get_layers(): if 'vai_quant' not in layer.attrs: layer.attrs['vai_quant'] = ['vai_quant_in', 'vai_quant_out'] layer.attrs['vai_quant_in'] = '' layer.attrs['vai_quant_out'] = '' # for every layer for layer in xgraph.get_layers(): # if the layer has non empty vqo, propagate it to the output layers if layer.attrs['vai_quant_out'] != '': l_vqo = layer.attrs['vai_quant_out'] # for every output layer for t_idx, t_name in enumerate(layer.tops): t_layer = xgraph.get(t_name) # if the input quant is not specified in the output layer if t_layer.attrs['vai_quant_in'] == '': # get quant info from current layer, two by two t_vqi = [l_vqo[2 * t_idx], l_vqo[2 * t_idx + 1]] t_layer.attrs['vai_quant_in'] = t_vqi # if the layer has non empty vqi, propagate it to the input layers if layer.attrs['vai_quant_in'] != '': l_vqi = layer.attrs['vai_quant_in'] # for every input layer for b_idx, b_name in enumerate(layer.bottoms): b_layer = xgraph.get(b_name) if b_layer.attrs['vai_quant_out'] == '': b_vqo = [l_vqi[2 * b_idx], l_vqi[2 * b_idx + 1]] b_layer.attrs['vai_quant_out'] = b_vqo def quantize(self): # NOTE For Conv2Dtranspose layers we need the specific batch size in tensorflow 1.13 batch_size = list(self.inputs_func(0).values())[0].shape[0] fs = self.gen.generate( self.xgraph, 'graph', subgraphs_only=True, layout='NHWC', batch_size=batch_size) assert len(fs) == 1, 'Too many partitions' partition_key = list(fs.keys())[0] pb_path = list(fs.values())[0] self.partition_graphs[partition_key] = pb_path q_xgraph = super(ExternalQuantizer, self).quantize() self.xgraph.meta_attrs["is_quantized"] = True for qkey in self.q_output.keys(): if 'quant_keys' not in self.xgraph.meta_attrs: self.xgraph.meta_attrs['quant_keys'] = [qkey] else: self.xgraph.meta_attrs['quant_keys'].append(qkey) quant_file = self.q_output.get_q_file(qkey) quant_info_file = self.q_output.get_q_info(qkey) quant_orig_pb = self.q_output.get_orig_pb(qkey) self.xgraph.meta_attrs[qkey] = { 'q_file': quant_file, 'q_info': quant_info_file, 'orig_pb': quant_orig_pb} return q_xgraph
class DECENTQuantizer(XGraphBaseSubgraphQuantizer): # try: # if hasattr(tf.contrib, 'decent_q'): # from tensorflow.contrib import decent_q # except Exception as e: # warnings.warn("Could not import decent_q module") try: # from tensorflow.contrib import decent_q import tensorflow as tf if hasattr(tf, 'contrib') and hasattr(tf.contrib, 'decent_q'): from tensorflow.contrib import decent_q else: warnings.warn("Could not import decent_q module. Please check" " if installed.") except ImportError: warnings.warn("Could not import decent_q module. Please check" " if installed.") xgraph_factory = XGraphFactory() xgraph_partitioner = XGraphPartitioner() def __init__(self, xgraph, inputs_func, work_dir=os.path.join(os.getcwd(), 'work'), quant_iter=1, **kwargs): super(DECENTQuantizer, self).__init__(xgraph, inputs_func, work_dir) self.quant_iter = quant_iter self.gen = TfGenerator() self.partition_graphs = {} self.res = {} self.kwargs = kwargs self.q_output = QuantizerOutput(name=xgraph.get_name()) def quantize_subgraph(self, xgraph, inputs, input_names, output_names): # type: (XGraph, Dict[str, numpy.ndarray]) """ Quantize subgraph with inputs """ # Import Tensorflow only when needed to avoid strict dependency import tensorflow as tf frozen_graph = self.partition_graphs[xgraph.get_name()] logger.info("Load frozen graph from: {}".format(frozen_graph)) input_graph_def = tf.compat.v1.GraphDef() with tf.io.gfile.GFile(frozen_graph, "rb") as f: input_graph_def.ParseFromString(f.read()) logger.info("Quantization input: {} and output names: {}".format( input_names, output_names)) input_shapes = [X.shapes.tolist() for X in xgraph.get_input_layers()] def inputs_func(iter): import numpy as np nonlocal inputs return inputs logger.info("START decent quantization for graph partition: {}".format( xgraph.get_name())) q_config = self.decent_q.QuantizeConfig(input_nodes=input_names, output_nodes=output_names, input_shapes=input_shapes, output_dir=self.work_dir, method='1', calib_iter=self.quant_iter) self.decent_q.quantize_frozen(input_graph_def, inputs_func, q_config) netcfg = os.path.join(self.work_dir, "deploy_model.pb") q_eval_file = os.path.join(self.work_dir, "quantize_eval_model.pb") quant_info_file = os.path.join( self.work_dir, 'quant_info_{}.txt'.format(xgraph.get_name())) self._save_quant_info(netcfg, quant_info_file) self.q_output.add(xgraph.get_name(), netcfg, quant_info_file, frozen_graph, q_eval_file) # TODO # Add quantization info to corresponding XLayers self._add_quant_info_to_xgraph(netcfg) def quantize(self) -> None: """Quantize the XGraph model using the decent_q quantizer""" # NOTE For Conv2Dtranspose layers we need the specific batch size in # tensorflow 1.13 batch_size = list(self.inputs_func(0).values())[0].shape[0] fs = self.gen.generate(self.xgraph, 'graph', subgraphs_only=True, layout='NHWC', batch_size=batch_size, out_dir=self.work_dir, **self.kwargs) if len(fs) != 1: raise ValueError("DECENT quantization currently only supports" " models with one DPU compatible partition," " but got: {}".format(len(fs))) partition_key = list(fs.keys())[0] pb_path = list(fs.values())[0] self.partition_graphs[partition_key] = pb_path q_xgraph = super(DECENTQuantizer, self).quantize() self.xgraph.meta_attrs["is_quantized"] = True for qkey in self.q_output.keys(): if 'quant_keys' not in self.xgraph.meta_attrs: self.xgraph.meta_attrs['quant_keys'] = [qkey] else: self.xgraph.meta_attrs['quant_keys'].append(qkey) quant_file = self.q_output.get_q_file(qkey) quant_info_file = self.q_output.get_q_info(qkey) quant_orig_pb = self.q_output.get_orig_pb(qkey) quant_eval_file = self.q_output.get_q_eval(qkey) self.xgraph.meta_attrs[qkey] = { 'q_file': quant_file, 'q_info': quant_info_file, 'orig_pb': quant_orig_pb, 'q_eval': quant_eval_file } self.xgraph.set_quantizer_output(self.q_output) # import pdb; pdb.set_trace() return q_xgraph def _add_quant_info_to_xgraph(self, deploy_frozen_graph: str) -> None: """ Retrieve the quantization info from the provided quantized model and add the information to the corresponding XLayers """ # Import tensorflow only when needed to avoid strict dependency import tensorflow as tf quant_info = [] input_graph_def = tf.compat.v1.GraphDef() with tf.io.gfile.GFile(deploy_frozen_graph, "rb") as f: input_graph_def.ParseFromString(f.read()) for idx, node in enumerate(input_graph_def.node): if node.name in self.xgraph: X = self.xgraph.get(node.name) X.attrs['vai_quant_idx'] = idx + 1 if 'ipos' in node.attr.keys(): X.attrs['vai_quant'] = ['vai_quant_in'] X.attrs['vai_quant_in'] = \ [int(v) for v in node.attr['ipos'].list.i] if 'opos' in node.attr.keys(): X.attrs['vai_quant'].append('vai_quant_out') X.attrs['vai_quant_out'] = \ [int(v) for v in node.attr['opos'].list.i] if 'wpos' in node.attr.keys(): X.attrs['vai_quant'].append('vai_quant_weights') X.attrs['vai_quant_weights'] = \ [int(v) for v in node.attr['wpos'].list.i] if 'bpos' in node.attr.keys(): X.attrs['vai_quant'].append('vai_quant_biases') X.attrs['vai_quant_biases'] = \ [int(v) for v in node.attr['bpos'].list.i] def _save_quant_info(self, deploy_frozen_graph, filename): # type: (str) -> None """ Retrieve the quantization info from the provided quantized model """ quant_info = self._get_quant_info(deploy_frozen_graph) lines = [[q_op['idx']] + [q_op['name']] + [str(i) for i in q_op['ipos']] + [str(i) for i in q_op['opos']] + [str(i) for i in q_op['wpos']] + [str(i) for i in q_op['bpos']] for q_op in quant_info] s = '\n'.join([' '.join(line) for line in lines]) with open(filename, 'w') as f: f.write(s) def _get_quant_info(self, deploy_frozen_graph): # type: (str) -> List[dict] """ Retrieve the quantization info from the provided quantized model """ # import tensorflow only when needed to avoid strict dependency import tensorflow as tf quant_info = [] input_graph_def = tf.compat.v1.GraphDef() with tf.io.gfile.GFile(deploy_frozen_graph, "rb") as f: input_graph_def.ParseFromString(f.read()) for idx, node in enumerate(input_graph_def.node): q_op = { 'idx': str(idx + 1), 'name': node.name, 'ipos': [], 'opos': [], 'wpos': [], 'bpos': [] } if 'ipos' in node.attr.keys(): q_op['ipos'].extend( [int(v) for v in node.attr['ipos'].list.i]) if 'opos' in node.attr.keys(): q_op['opos'].extend( [int(v) for v in node.attr['opos'].list.i]) if 'wpos' in node.attr.keys(): q_op['wpos'].extend( [int(v) for v in node.attr['wpos'].list.i]) if 'bpos' in node.attr.keys(): q_op['bpos'].extend( [int(v) for v in node.attr['bpos'].list.i]) quant_info.append(q_op) return quant_info def eval(self, val_dir, gold_file, synset_words, batch_size, nb_batches, class_num=1000, gpu=0): # """ """ input_fn_data = { "prep_key": self.data_prep_key, "dir": val_dir, "batch": batch_size, "inputs": self.xgraph.get_input_names() } with open(os.path.join(FILE_PATH, 'calibration.json'), 'w') as f: json.dump(input_fn_data, f) with open(gold_file) as f: val_set = [line.strip('\n').split(' ') for line in f.readlines()] # frozen_graph_file = os.path.join(os.getcwd(), 'test.pb') frozen_graph_file = os.path.join(self.output_dir, "quantize_eval_model.pb") # TODO assert (len(self.xgraph.get_input_names()) == 1) assert (len(self.xgraph.get_output_names()) == 1) input_node = self.xgraph.get_input_names()[0] output_node = self.xgraph.get_output_names()[0] os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) input_graph_def = tf.Graph().as_graph_def() input_graph_def.ParseFromString( tf.gfile.FastGFile(frozen_graph_file, "rb").read()) tf.import_graph_def(input_graph_def, name='') # Get input tensors input_tensor = tf.get_default_graph()\ .get_tensor_by_name(input_node+':0') input_labels = tf.compat.v1.placeholder(tf.float32, shape=[None, class_num]) # Calculate accuracy output = tf.get_default_graph().get_tensor_by_name(output_node + ':0') prediction = tf.reshape(output, [batch_size, class_num]) # correct_labels = tf.argmax(input_labels, 1) # top1_prediction = tf.nn.in_top_k(prediction, correct_labels, k = 1) # top5_prediction = tf.nn.in_top_k(prediction, correct_labels, k = 5) # top1_accuracy = tf.reduce_mean(tf.cast(top1_prediction,'float')) # top5_accuracy = tf.reduce_mean(tf.cast(top5_prediction,'float')) # Start evaluation logger.info("Start Evaluation for {} Batches...".format(nb_batches)) with tf.Session() as sess: progress = ProgressBar() top1_sum_acc = 0 top5_sum_acc = 0 for iter in progress(range(0, nb_batches)): input_data = decent_prepfn.input_fn(iter) images = input_data[input_node] # labels = input_data['labels'] logger.debug("IMAGES", images) labels = [ elem[1] for elem in val_set[iter * batch_size:(iter + 1) * batch_size] ] feed_dict = {input_tensor: images} raw_predictions = sess.run(prediction, feed_dict) logger.debug(raw_predictions) # logger.debug("Predictions shape: {}" # .format(raw_predictions.shape)) # logger.debug("Labels length: {}".format(len(labels))) top_1 = classification.get_top_k_accuracy( raw_predictions, synset_words, 1, labels) top_5 = classification.get_top_k_accuracy( raw_predictions, synset_words, 5, labels) top1_sum_acc += top_1 top5_sum_acc += top_5 logger.debug("int: {}, {}".format(top_1, top_5)) final_top1_acc = top1_sum_acc / nb_batches final_top5_acc = top5_sum_acc / nb_batches print("Accuracy: Top1: {}, Top5: {}".format(final_top1_acc, final_top5_acc)) def dump(self, img_dir, input_names, max_dump_batches=1, dump_float=0): # """ TODO: inupt_names """ input_fn_data = { "prep_key": self.data_prep_key, "dir": img_dir, "batch": 1, "inputs": input_names } with open(os.path.join(FILE_PATH, 'calibration.json'), 'w') as f: json.dump(input_fn_data, f) frozen_graph = os.path.join(self.output_dir, 'quantize_eval_model.pb') command = """ decent_q dump \ --input_frozen_graph {} \ --input_fn decent_prepfn.input_fn \ --max_dump_batches {} \ --dump_float {} \ --output_dir {} """.format(frozen_graph, max_dump_batches, dump_float, self.output_dir) print("COMMAND", command) process = subprocess.Popen(command.split(), cwd=FILE_PATH, stdout=subprocess.PIPE) output, error = process.communicate() print(output, error)
class DPUCompiler(XGraphBaseCompiler): """ TODO """ xgraph_partitioner = XGraphPartitioner() xgraph_factory = XGraphFactory() tf_generator = TfGenerator() def __init__(self, xgraph, target, arch, work_dir=os.path.join(os.getcwd(), 'work'), build_dir=None, mode='debug'): super(DPUCompiler, self).__init__(xgraph) if not os.path.isfile(arch): raise ValueError("Arch file: {} does not exist".format(arch)) warnings.warn("This compilation only works with one network" " configuration at the moment!!") q_output = self.xgraph.get_quantizer_output() self.netcfgs = { q_key: q_output.get_orig_pb(q_key) for q_key in q_output.keys() } self.quant_info = { q_key: q_output.get_q_info(q_key) for q_key in q_output.keys() } assert (len(self.netcfgs) == 1) self.work_dir = work_dir self.build_dir = build_dir if build_dir is not None else work_dir self.target = target self.arch = arch self.mode = mode self.c_output = CompilerOutput(name=xgraph.get_name()) def Getopts(self, input_shapes): return { "maximumasrelu": True, "pipelineconvmaxpool": False, "bytesperpixels": 1, "dsp": 96, "memory": 9, "ddr": "256", "cpulayermustgo": True, "forceweightsfullyconnected": True, "mixmemorystrategy": True, "maximumasrelu": True, "pipelineconvmaxpool": True, 'bridges': ['bytype', 'Concat'], "usedeephi": True, 'placeholdershape': input_shapes } def compile(self): # type: () -> None """ """ layout_transform_pass = \ XGraphLayoutTransformationPass('NHWC', target=self.target) self.xgraph = layout_transform_pass.execute(self.xgraph, subgraphs_only=False) # netcfg = list(self.netcfgs.values())[0] # orig pb file quant_info_file = list(self.quant_info.values())[0] # quant info file subxg_layers = DPUCompiler.xgraph_partitioner\ .get_subgraphs(self.xgraph)[0].subgraph_data xgraph = DPUCompiler.xgraph_factory.build_from_xlayer(subxg_layers) net_name = list(self.netcfgs.keys())[0] fs = DPUCompiler.tf_generator.generate(xgraph, 'graph', subgraphs_only=True, layout='NHWC', batch_size=1, placeholder=True, out_dir=self.work_dir) netcfg = list(fs.values())[0] input_names = xgraph.get_input_names() input_shapes = [ xgraph.get(in_name).shapes.tolist()[:] for in_name in input_names ] output_names = xgraph.get_output_names() output_shapes = [ xgraph.get(out_name).shapes.tolist()[:] for out_name in output_names ] if len(input_names) > 1: raise NotImplementedError( "DPUCompiler only handles models with" " one input at the moment but found: {}".format( len(input_names))) opt_input_shapes = { in_name: [e if e != -1 else 1 for e in input_shape] for in_name, input_shape in zip(input_names, input_shapes) } opts = self.Getopts(opt_input_shapes) if not os.path.isfile(quant_info_file): raise ValueError( "quant file: {} does not exist".format(quant_info_file)) opts['quant_cfgfile'] = quant_info_file opts = str(opts) command = """ vai_c_tensorflow \ --frozen_pb {} \ --arch {} \ --output_dir {} \ --net_name {}\ --options "{}" """.format(netcfg, self.arch, self.build_dir, 'compiler', opts) logger.info("command: {}".format(command)) process = subprocess.Popen(command, shell=True, cwd=FILE_PATH, stdout=subprocess.PIPE) output, error = process.communicate() if output is not None: output = output.decode('utf-8') if 'SUCCESSFUL COMPILATION' not in output: logger.info(output) raise ValueError('compiler is failed. Please see the log for' ' more details') if error is not None: error = error.decode('utf-8') # raise ValueError(error) logger.debug("Output: {}".format(output)) logger.debug("Error: {}".format(error)) compiler_json_file = self.build_dir + '/compiler.json' with open(compiler_json_file) as json_file: json_graph = json.load(json_file) graph_inputs = json_graph["inputs"] graph_outputs = json_graph["outputs"] logger.debug("{} {}".format(input_names, graph_inputs)) logger.debug("{} {}".format(output_names, graph_outputs)) in_map = {in_name: in_name for in_name in input_names} out_node_merged = [] out_nodes = [ graph_output['previous_layers'][0] for graph_output in graph_outputs ] for i in range(len(out_nodes)): out_node_merged.append([ layer['merged'][-1] for layer in json_graph['network'] if layer['name'] == out_nodes[i] ][0]) in_map = {in_name: in_name for in_name in input_names} out_map = {out_name: t for out_name, t in zip(output_names, out_nodes)} #out_map = {out_name: out_name for out_name in output_names} self.c_output.add(net_name, ['dpuv1lib.so'], in_map, out_map) self.xgraph.set_compiler_output(self.c_output) # TODO self.xgraph.meta_attrs['compiled'] = True self.xgraph.meta_attrs['compiler_libs'] = ['dpuv1lib.so'] self.xgraph.meta_attrs['compiler_in_map'] = in_map self.xgraph.meta_attrs['compiler_out_map'] = out_map return self.xgraph