def execute(self, in_tensors: Sequence[np.ndarray], step_idx_limit=None, only_yield_step=False, qmode: QuantizationMode = None, all_details=None, yield_fusions=False, silent=False): if qmode is None: qmode = QuantizationMode.none() if qmode.is_step_all: iterator = [(qoutput, qdetails, fnode) for _, _, _, _, qoutput, qdetails, fnode in self.execute_qnoq_iterator(in_tensors, yield_fusions=yield_fusions, step_idx_limit=step_idx_limit, silent=silent)] else: iterator = [(output_tensors, details, fnode) for _, _, fnode, output_tensors, details in self.execute_iterator(in_tensors, step_idx_limit=step_idx_limit, qmode=qmode, yield_fusions=yield_fusions, only_yield_step=only_yield_step, yield_details=all_details is not None, silent=silent)] outputs = [] if yield_fusions: fusion_outputs = [] if all_details is not None: fusion_details = [] for output_tensors, details, fnode in iterator: if yield_fusions: if fnode: fusion_outputs.append([output_tensor.copy() for output_tensor in output_tensors]) if all_details is not None: fusion_details.append(details) else: outputs.append({ 'outputs': outputs.append([output_tensor.copy() for output_tensor in output_tensors]), 'fusion_outputs': fusion_outputs.copy(), }) fusion_outputs.clear() if all_details is not None: all_details.append({ 'details': details, 'fusion_details': fusion_details.copy() }) fusion_details.clear() else: outputs.append([output_tensor.copy() for output_tensor in output_tensors]) if all_details is not None: all_details.append(details) return outputs
def test_validate_mn1_quantized1(mn1q_graph, mn1f_graph): tfi = TfliteImporter() Gf = tfi.create_graph(mn1f_graph, {'load_tensors': True}) Gf.add_dimensions() Gf.adjust_order() matcher = get_pow2_match_group() matcher.match(Gf) Gf.add_dimensions() tfi = TfliteImporter() G = tfi.create_graph(mn1q_graph, { 'load_tensors': True, 'load_quantization': True }) G.add_dimensions() G.adjust_order() matcher = get_pow2_match_group() matcher.match(G) G.add_dimensions() fpnode = Gf.graph_state.steps[2]['node'] fpcnode = fpnode.contained_filters()[0] qpnode = G.graph_state.steps[2]['node'] qpcnode = qpnode.contained_filters()[0] nid = NodeId(qpnode, qpcnode) qrec = G.quantization[nid] dqbiases = qrec.biases_q.get_dequantized(qpcnode.biases) assert np.max(np.abs(fpcnode.biases - dqbiases)) < 0.1 input_tensor = np.load('tests/mobv1_valid/COCO_val2014_000000362331_0.npy') input_tensor = input_tensor.reshape((224, 224, 3)).transpose((2, 0, 1)) executer = GraphExecuter(Gf) foutput_tensors = executer.execute([input_tensor]) foutput_tensor = np.load( 'tests/mobv1_valid/output_COCO_val2014_000000362331_0_float.npy') assert np.max(np.abs(foutput_tensors[-1][0] - foutput_tensor[0])) < 0.0001 executer = GraphExecuter(G, qrecs=G.quantization) qfroutput_tensors = executer.execute([input_tensor], qmode=QuantizationMode.none()) assert np.max(np.abs(qfroutput_tensors[-1][0] - foutput_tensor[0])) < 0.2 executer = GraphExecuter(G, qrecs=G.quantization) qroutput_tensors = executer.execute( [input_tensor], qmode=QuantizationMode.all_dequantize()) output_tensor = np.load( 'tests/mobv1_valid/output_COCO_val2014_000000362331_0_quant.npy') # assert np.max(np.abs(qroutput_tensors[-1][0] - output_tensor[0])) < 0.16 assert np.max(np.abs(qroutput_tensors[-1][0] - output_tensor[0])) < 0.28
def get_base_inputs(self, nodes, progress, quantize): if self._base_inputs is None: base_inputs = self._input_files for node in nodes: node.use_compressed = False progress( f"validation without compression {'quantized: ' if quantize else ': '}", False) base_inputs, good_margin, bad_inputs, bad_margin = self.validate( QuantizationMode.all_dequantize() if quantize else QuantizationMode.none(), inputs=self._input_files, progress=lambda pred: progress('+' if pred else '-', False)) progress('', True) progress( f'good {len(base_inputs)} ({good_margin:.2f}) bad {len(bad_inputs)} ({bad_margin:.2f})', True) self._base_inputs = base_inputs else: base_inputs = self._base_inputs return base_inputs
def test_validate_mn1_dequant_quantfloat(mn1q_graph): # load dequantized graph same results as quant graph and float execution tfi = TfliteImporter() G = tfi.create_graph(mn1q_graph, { 'load_tensors': True, 'load_quantization': True }) G.add_dimensions() G.adjust_order() matcher = get_pow2_match_group() matcher.match(G) G.add_dimensions() Gdq = tfi.create_graph(mn1q_graph, { 'load_tensors': True, 'load_dequantized': True }) Gdq.add_dimensions() Gdq.adjust_order() matcher = get_pow2_match_group() matcher.match(Gdq) Gdq.add_dimensions() input_tensor = np.load('tests/mobv1_valid/COCO_val2014_000000362331_0.npy') input_tensor = input_tensor.reshape((224, 224, 3)).transpose((2, 0, 1)) executer = GraphExecuter(G, qrecs=G.quantization) qfoutput_tensors = executer.execute([input_tensor], qmode=QuantizationMode.none()) executer = GraphExecuter(Gdq) dfoutput_tensors = executer.execute([input_tensor]) diff_list = [ np.abs(df[0] - qf[0]) for df, qf in zip(dfoutput_tensors, qfoutput_tensors) ] max_diff = [np.max(elem) for elem in diff_list] assert max(max_diff) < 0.003
def do_validate(self, args: argparse.Namespace): """ Validate the model (quantized [-q] or not) in terms of prediction accuracy rate on a given dataset (images folder). Ground truth labels can be embedded in files names ("filename_03.[png, ppm, pgm]", the number of digits must be coherent with the number of networks outputs: e.g. in a 1000 classes problem the last digits must be 3, "file_45.png" will raise an error) or can be written in a .json object (example: {'file0':label0, 'file1':label1, ...}) and given to the function with --label_json """ self._check_graph() if args.quantize: self._check_quantized() qmode = QuantizationMode.all_dequantize() else: qmode = QuantizationMode.none() LOG.info("quantization mode - %s", qmode) input_args = self._get_input_args(args) good_predictions = [] good_margin = 0 bad_margin = 0 number_samples = sum(1 for _ in glob_input_files(args.input_files)) if args.vww_instances_file: validation = ValidateFromVWWInstances( args.vww_instances_file, class_thr=args.class_thr, binary_classification=args.binary_classification) elif args.label_json: validation = ValidateFromJSON( args.label_json, class_thr=args.class_thr, binary_classification=args.binary_classification) elif args.class_number is not None: validation = ValidateFromClass( args.class_number, class_thr=args.class_thr, binary_classification=args.binary_classification) else: validation = ValidateFromName( class_thr=args.class_thr, binary_classification=args.binary_classification) try: ExecutionProgress.start() for i, file_per_input in enumerate( glob_input_files(args.input_files, self.G.num_inputs)): if not args.silent: LOG.info("input file %s", file_per_input) data = [ import_data(input_file, **input_args) for input_file in file_per_input ] executer = GraphExecuter(self.G, qrecs=self.G.quantization) outputs = executer.execute(data, qmode=qmode, silent=args.silent) predicted_values = np.asarray( outputs[args.prediction_step_idx]) good_prediction, class_predicted, real_class, margin = validation.validate( file_per_input[0], predicted_values) good_predictions.append(good_prediction) if good_prediction: good_margin += margin else: bad_margin += margin if not args.silent: LOG.info( 'Prediction is %s predicted %s correct %s margin %s', good_prediction, class_predicted, real_class, margin) if not i % args.progress_every and i > 0: LOG.info( 'ACCURACY: %.3f %%', 100 * sum(good_predictions) / len(good_predictions)) ExecutionProgress.progress(i, number_samples) ExecutionProgress.end() except (KeyboardInterrupt, SystemExit): pass self.py_locals['labels'] = validation.labels self.py_locals['predictions'] = validation.predictions cnt = len(good_predictions) if cnt: ngood = sum(good_predictions) nbad = cnt - ngood if nbad: LOG.info( "%s out of %s predicted falsly with %s average margin", nbad, cnt, bad_margin / nbad) if ngood: LOG.info( "%s out of %s predicted correctly with %s average margin", ngood, cnt, good_margin / ngood) accuracy_rate = 100 * sum(good_predictions) / len(good_predictions) LOG.info('Total accuracy: %.3f %%', accuracy_rate)
def do_dump(self, args: argparse.Namespace): """ Dump the activations resulting from running an input file through the graph. You can use the current quantization settings and can also just quantify one specific step of the graph.""" self._check_graph() dequantize = args.dequantize if args.dequantize is not None\ else not (args.pickle or args.save) if args.quantize or args.quantize_step or args.quantize_all_steps: self._check_quantized() if args.quantize: if dequantize: qmode = QuantizationMode.all_dequantize() else: qmode = QuantizationMode.all() elif args.quantize_all_steps: qmode = QuantizationMode.step_all() dequantize = True else: qmode = QuantizationMode.step(args.quantize_step) elif args.quantize_and_dequantize: qmode = QuantizationMode.all_float_quantize_dequantize() else: qmode = QuantizationMode.none() if args.step is not None: step = args.step num_steps = len(self.G.graph_state.steps) if step < 0: step = num_steps + step if step < 0 or step > num_steps: self.perror("step must be from {} to {}".format( -num_steps, num_steps)) return else: step = None input_args = self._get_input_args(args) pickles = [] for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): LOG.info("input file %s", file_per_input) data = [ import_data(input_file, **input_args) for input_file in file_per_input ] executer = GraphExecuter(self.G, qrecs=self.G.quantization) outputs = executer.execute(data, step_idx_limit=step, qmode=qmode) if args.pickle or self._in_py or args.save: pickles.append(outputs) else: self.G.print_intermediates(outputs, limit=step, width=args.number_width, precision=args.precision, channel=args.channel, order=['c', 'h', 'w'], checksum=args.checksum) if args.visualize_detection: img_in = Image.open(file_per_input[0]).convert('RGBA') height = img_in.size[1] if input_args[ 'height'] == -1 else input_args['height'] width = img_in.size[0] if input_args[ 'width'] == -1 else input_args['width'] img_in = img_in.resize((width, height)) if self.G.has_ssd_postprocess: bboxes, classes, scores, _ = [ outputs[graph_out.step_idx][0] for graph_out in self.G.outputs() ] draw = ImageDraw.Draw(img_in, 'RGBA') for box, score, class_id in zip(bboxes, scores, classes): if args.quantize and not args.dequantize: ssd_node = [ node for node in self.G.nodes() if isinstance(node, SSDDetectorParameters) ][0] ssd_qrec = self.G.quantization[NodeId(ssd_node)] x0, x1 = int(box[1] * width * ssd_qrec.out_qs[0].scale), int( box[3] * width * ssd_qrec.out_qs[0].scale) y0, y1 = int(box[0] * height * ssd_qrec.out_qs[0].scale), int( box[2] * height * ssd_qrec.out_qs[0].scale) score = score * ssd_qrec.out_qs[2].scale else: x0, x1 = int(box[1] * width), int(box[3] * width) y0, y1 = int(box[0] * height), int(box[2] * height) rect_points = (x0, y0), (x1, y0), (x1, y1), (x0, y1), (x0, y0) draw.line(rect_points, fill='red', width=2) txt = '{}@{}%'.format(class_id, int(score * 100)) draw.text([x0, y0 - 10], txt, fill=(0, 255, 0)) img_in.show() if args.pickle or args.save or self._in_py: if not pickles: self.perror("no input files found") return if len(args.input_files) == self.G.num_inputs: pickles = pickles[0] if args.pickle: with open(args.pickle, 'wb') as pickle_fp: pickle.dump(pickles, pickle_fp) if args.save: if len(args.input_files) != self.G.num_inputs: self.perror( "can only save dumps on one input to tensor store") return self.tensor_store[args.save] = pickles if self._in_py: self.last_result = pickles
def execute_iterator(self, in_tensors: Sequence[np.ndarray], step_idx_limit: Optional[int] = None, start_node: Optional[Parameters] = None, qmode: Optional[QuantizationMode] = None, yield_fusions=True, yield_details=True, only_yield_step=False, record_inputs: Optional[Mapping] = None, silent=False): if qmode is None: qmode = QuantizationMode.none() saved_outputs = {} if not silent: LOG.info("execute uncached: quantization mode %s", qmode) ExecutionProgress.start() for step_idx, step in enumerate(self._G.graph_state.steps): if step_idx_limit is not None and step_idx > step_idx_limit: break node = step['node'] if start_node and start_node != node: continue # collect outputs from previous nodes # InputNode is already set above output_tensors = self.collect_outputs(saved_outputs, node) if not silent: ExecutionProgress.progress(step_idx, node.name) nid = NodeId(node, None) if record_inputs is not None: if output_tensors is None: record_inputs[nid] = output_tensors else: record_inputs[nid] = [ np.copy(output_tensor) for output_tensor in output_tensors ] qrec = self._qrecs[nid] if self._qrecs is not None else None if qmode.get_quantized(node, step_idx): switch = self._quantized_kernel_switch if qmode.is_step and output_tensors: output_tensors = [ qrec.in_qs[i].quantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] else: switch = self._kernel_switch details = {} if yield_details and ( not only_yield_step or step_idx == step_idx_limit) else None if isinstance(node, (ConvFusionParameters, ActivationFusion)): for fusion_node in node.contained_nodes(): fnid = NodeId(node, fusion_node) fqrec = None if not qrec else self._qrecs[fnid] if record_inputs is not None: record_inputs[nid] = [ np.copy(output_tensor) for output_tensor in output_tensors ] details = {} if yield_fusions and yield_details else None output_tensors = switch.execute(fusion_node, output_tensors, fqrec, details) if yield_fusions: if qmode.dequantize: qoutput_tensors = [ fqrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] yield step_idx, node, fusion_node, qoutput_tensors, details elif qmode.is_float_q_deq: qoutput_tensors = [ fqrec.out_qs[i].dequantize( fqrec.out_qs[i].quantize(output_tensor)) for i, output_tensor in enumerate( output_tensors) ] yield step_idx, node, fusion_node, qoutput_tensors, details else: yield step_idx, node, fusion_node, output_tensors, details elif isinstance(node, InputParameters): output_tensors = switch.execute(node, in_tensors, qrec, details) else: output_tensors = switch.execute(node, output_tensors, qrec, details) if qmode.dequantize: qoutput_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = qoutput_tensors elif qmode.is_float_q_deq: if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] qoutput_tensors = [ qrec.out_qs[i].dequantize( qrec.out_qs[i].quantize(output_tensor)) for i, output_tensor in enumerate(output_tensors) ] if not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details else: if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, output_tensors, details self.save_output(saved_outputs, node, output_tensors) if not silent: ExecutionProgress.end()
def execute_iterator(self, in_tensors: Sequence[np.ndarray], step_idx_limit: Optional[int] = None, start_node: Optional[Parameters] = None, qmode: Optional[QuantizationMode] = None, yield_fusions=True, yield_details=True, only_yield_step=False, record_inputs: Optional[Mapping] = None, silent=False, parent_node=None, parent_step_idx=None, saved_outputs=None, G=None): if qmode is None: qmode = QuantizationMode.none() if G is None: G = self._G saved_outputs = {} if not silent: LOG.info("execute uncached: quantization mode %s", qmode) ExecutionProgress.start() for node in G.dfs(): step_idx = node.step_idx if step_idx_limit is not None and step_idx > step_idx_limit: break if start_node and start_node != node: continue # collect outputs from previous nodes # InputNode is already set above output_tensors = self.collect_outputs(G, saved_outputs, node) if not silent: ExecutionProgress.progress(step_idx, node.name) if parent_node: nid = NodeId(parent_node, node) else: nid = NodeId(node, None) if record_inputs is not None: if output_tensors is None: record_inputs[nid] = output_tensors else: record_inputs[nid] = [ np.copy(output_tensor) for output_tensor in output_tensors ] if isinstance(node, (FusionInputParameters, FusionOutputParameters)): qrec = None else: if self._qrecs and qmode.get_quantized(node, step_idx): if nid not in self._qrecs: LOG.warning("no quantization parameters on %s", node.name) qrec = None else: qrec = self._qrecs[nid] if qmode.is_step and output_tensors: output_tensors = [ qrec.in_qs[i].quantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] else: qrec = None details = {} if yield_details and ( not only_yield_step or step_idx == step_idx_limit) else None if isinstance( node, (FilterFusionBase, ActivationFusionBase, PaddedAddFusionParameters, MatMulOpFusionParameters)): for f_step_idx, f_pnode, f_node, f_output_tensors, f_details in self.execute_iterator( output_tensors, qmode=qmode, yield_fusions=yield_fusions, yield_details=yield_details, silent=True, parent_node=node, parent_step_idx=step_idx, saved_outputs=saved_outputs, G=node.subgraph): if yield_fusions and not isinstance( f_node, (FusionInputParameters, FusionOutputParameters)): yield f_step_idx, f_pnode, f_node, f_output_tensors, f_details f_outputs = node.subgraph.outputs() num_outputs = max(f_output.idx for f_output in f_outputs) + 1 output_tensors = [None] * num_outputs for f_output in f_outputs: output_tensors[f_output.idx] = saved_outputs[f_output][0] elif isinstance(node, (InputParameters, FusionInputParameters)): output_tensors = KernelExecuter.execute( node, in_tensors, qrec, details) else: output_tensors = KernelExecuter.execute( node, output_tensors, qrec, details) if qmode.dequantize and qrec: qoutput_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if parent_node: yield parent_step_idx, parent_node, node, qoutput_tensors, details elif not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = qoutput_tensors elif qmode.is_float_q_deq and qrec: if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] qoutput_tensors = [ qrec.out_qs[i].dequantize( qrec.out_qs[i].quantize(output_tensor)) for i, output_tensor in enumerate(output_tensors) ] if parent_node: yield parent_step_idx, parent_node, node, qoutput_tensors, details elif not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details else: if qmode.is_step and qmode.get_quantized(node, step_idx) and qrec: output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if parent_node: yield parent_step_idx, parent_node, node, output_tensors, details elif not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, output_tensors, details self.save_output(saved_outputs, node, output_tensors) if not silent: ExecutionProgress.end()
def tune_all(self, nodes, progress, quantize=False): base_inputs = self.get_base_inputs(nodes, progress, quantize) def opt_func(qsnr, state): progress('compressing: ', False) compression = self.tune_qsnr( nodes, qsnr, progress=lambda _, comp: progress('+' if comp else '-', False)) if not compression or ('best_compression' in state and state['best_compression'] > compression): if qsnr == 0: raise CompressionError("could not compress graph") return None state['best_compression'] = compression progress('', True) progress('validating: ', False) good_inputs, good_margin, bad_inputs, bad_margin = self.validate( state['qmode'], inputs=state['cur_inputs'], progress=lambda pred: progress('+' if pred else '-', False)) progress('', True) progress( f'good {len(good_inputs)} ({good_margin:.2f}) bad {len(bad_inputs)} ({bad_margin:.2f})', True) if bad_inputs: if not state['final']: state['cur_inputs'] = bad_inputs del state['best_compression'] return None return compression qmode = QuantizationMode.none() dir_start = 'down' opt_state = { 'cur_inputs': base_inputs.copy(), 'final': False, 'qmode': qmode } start_qsnr = 30 start_step = 15 maximizer = Maximizer(opt_func, 0, 120) while True: res = maximizer.run( start_qsnr, opt_state, progress=lambda cur, step, direct: progress( f'QSNR {cur} step {step} direction {direct}', True), start_step=start_step, dir_start=dir_start) if quantize and opt_state['qmode'] == QuantizationMode.none(): progress('analysing quantized', True) opt_state['qmode'] = QuantizationMode.all_dequantize() elif opt_state['cur_inputs'] != base_inputs: progress('check with all inputs', True) opt_state['final'] = True else: break opt_state['cur_inputs'] = base_inputs.copy() start_qsnr = res[1] start_step = 0.5 dir_start = 'up' progress(f'tune QSNR to best {res[1]} compressed by {res[0]} bytes', True) self.tune_qsnr( nodes, res[1], progress=lambda _, comp: progress('+' if comp else '-', False)) progress('', True) return res[1]
def finetune(self, nodes, progress, quantize=False): sizes = [(node, node.compressed_value) for node in nodes if node.compressed_value and node.use_compressed] nodes = [size[0] for size in sizes] base_inputs = self.get_base_inputs(nodes, progress, quantize) for node in nodes: if node.compressed_value: node.use_compressed = True def opt_func(bits, threshold, sparse, node, state): progress('compressing: ', False) compression = self.tune_bits( [node], bits, threshold=threshold, sparse=sparse, progress=lambda _, comp: progress('+' if comp else '-', False)) if not compression or ('best_compression' in state and state['best_compression'] > compression): if bits == 8 and sparse: raise CompressionError("could not compress graph") return None state['best_compression'] = compression progress('', True) progress('validating: ', False) good_inputs, good_margin, bad_inputs, bad_margin = self.validate( state['qmode'], inputs=state['cur_inputs'], break_on_error=state['final'], progress=lambda pred: progress('+' if pred else '-', False)) progress('', True) progress( f'good {len(good_inputs)} ({good_margin:.2f}) bad {len(bad_inputs)} ({bad_margin:.2f})', True) if bad_inputs: state['cur_inputs'] = bad_inputs del state['best_compression'] return None return compression maximizer = Maximizer(opt_func, 2, 8, int_step=True) while sizes: sizes.sort(key=lambda x: x[1].size) tune_idx = -1 node = None while node is None and abs(tune_idx) <= len(sizes): node, comp_val = sizes[tune_idx] cur_bits = comp_val.bits if cur_bits > 2: cur_step = max(cur_bits // 2, 1) cur_bits = max(cur_bits - cur_step, 2) else: tune_idx -= 1 node = None if node is None: break progress(f'finetuning {node.name}', True) qmode = QuantizationMode.none() dir_start = 'down' opt_state = { 'cur_inputs': base_inputs.copy(), 'final': False, 'qmode': qmode } while True: res = maximizer.run( cur_bits, None, False, node, opt_state, progress=lambda cur, step, direct: progress( f'bits {cur} step {step} direction {direct}', True), start_step=cur_step, dir_start=dir_start) del sizes[tune_idx] if res is None: break if quantize and opt_state['qmode'] == QuantizationMode.none(): progress('analysing quantized', True) opt_state['qmode'] = QuantizationMode.all_dequantize() elif opt_state['cur_inputs'] != base_inputs: progress('check with all inputs', True) else: break opt_state['final'] = True opt_state['cur_inputs'] = base_inputs.copy() cur_bits = res[1] cur_step = 1 dir_start = 'up' if res is None: progress(f'{node.name} cannot be further optimised', True) self.tune_bits( [node], comp_val.bits, progress=lambda _, comp: progress('+' if comp else '-', False)) else: progress( f'{node.name} tune bits to {res[1]} compressed by {res[0]} bytes', True) self.tune_bits( [node], res[1], progress=lambda _, comp: progress('+' if comp else '-', False)) progress('', True)
def do_dump(self, args: argparse.Namespace): """ Dump the activations resulting from running an input file through the graph. You can use the current quantization settings and can also just quantify one specific step of the graph.""" self._check_graph() dequantize = args.dequantize if args.dequantize is not None\ else not (args.pickle or args.save) if args.quantize or args.quantize_step or args.quantize_all_steps: self._check_quantized() if args.quantize: if dequantize: qmode = QuantizationMode.all_dequantize() else: qmode = QuantizationMode.all() elif args.quantize_all_steps: qmode = QuantizationMode.step_all() dequantize = True else: qmode = QuantizationMode.step(args.quantize_step) elif args.quantize_and_dequantize: qmode = QuantizationMode.all_float_quantize_dequantize() else: qmode = QuantizationMode.none() if args.step is not None: step = args.step num_steps = len(self.G.graph_state.steps) if step < 0: step = num_steps + step if step < 0 or step > num_steps: self.perror("step must be from {} to {}".format(-num_steps, num_steps)) return else: step = None input_args = self._get_input_args(args) pickles = [] for file_per_input in glob_input_files(args.input_files, self.G.num_inputs): LOG.info("input file %s", file_per_input) data = [import_data(input_file, **input_args) for input_file in file_per_input] executer = GraphExecuter(self.G, qrecs=self.G.quantization) outputs = executer.execute(data, step_idx_limit=step, qmode=qmode) if args.pickle or self._in_py or args.save: pickles.append(format_dump_file(self.G, outputs, not qmode.is_none, args.dequantize, args.quantize_step)) else: self.G.print_intermediates(outputs, limit=step, width=args.number_width, precision=args.precision, channel=args.channel, order=['c', 'h', 'w']) if args.pickle or args.save or self._in_py: if not pickles: self.perror("no input files found") return if len(args.input_files) == 1: pickles = pickles[0] if args.pickle: with open(args.pickle, 'wb') as pickle_fp: pickle.dump(pickles, pickle_fp) if args.save: self.tensor_store[args.save] = pickles if self._in_py: self.last_result = pickles