def __init__(self, args, nntool_workdir, *rest, **kwargs): self._nntool_workdir = nntool_workdir self._graph_idx = 0 self._graphs = [] self._settings = [] self._tensor_store = {} super(NNToolShellBase, self).__init__(*rest, **kwargs) self.py_locals['tensors'] = self._tensor_store if args and args.log_level is not None: self.settings['log_level'] = args.log_level.upper() self._graph_idx = 0 # settings overide graph file graph_file = self.settings['graph_file'] tensor_file = self.settings['tensor_file'] # command line overides that if args: if args.graph_file: graph_file = args.graph_file if args.tensor_file: tensor_file = args.tensor_file if args.template_file: self.settings['template_file'] = args.template_file if args.tf_quant: self.settings['load_quantization'] = args.tf_quant if args.dequant_tf: self.settings['load_dequantized'] = args.dequant_tf if 'log_level' not in self.settings: self.settings['log_level'] = "INFO" if graph_file: self._graphs = [] self._startup_commands.append( self.__build_open_graph( graph_file, tensor_file, self.load_quantization, load_dequantized=self.settings.get('load_dequantized'))) else: self._graphs = [NO_GRAPH.copy()] ExecutionProgress().listen(progress) LOG.propagate = False handler = NNToolShellLogHandler(self) formatter = logging.Formatter('%(module)s - %(message)s') handler.setFormatter(formatter) LOG.addHandler(handler) LOG.setLevel(self.settings['log_level'])
def __init__(self, *args, **kwargs): rest = args[1:] args = args[0] if args else None self._graph_idx = 0 self._graphs = [NO_GRAPH.copy()] self._cmd_history = [[]] self._history_stats = [] self._first_graph_open = False self._replaying_history = False self._settings = [] self._tensor_store = {} super(NNToolShellBase, self).__init__(*rest, **kwargs) self.feedback_to_output = True self.register_postcmd_hook(self._record_history) self.py_locals['tensors'] = self._tensor_store if args and args.log_level is not None: self._startup_commands.append( f'set log_level {args.log_level.upper()}') else: self._startup_commands.append('set log_level INFO') if args and args.anonymise: self._startup_commands.append(f'set anonymise true') self._graph_idx = 0 # settings overide graph file graph_file = self.settings['graph_file'] # command line overides that if args: if args.graph_file: graph_file = args.graph_file if args.template_file: self._startup_commands.append( f'set template_file {args.template_file}') if graph_file: self._startup_commands.append( self.__build_open_graph(graph_file, args)) if not self.LOG_HANDLER_SET: ExecutionProgress().listen(progress) LOG.propagate = False handler = NNToolShellLogHandler(self, LOG) formatter = logging.Formatter('%(module)s - %(message)s') handler.setFormatter(formatter) NNToolShellBase.LOG_HANDLER_SET = True self.py_locals['graphs'] = self._graphs
def do_validate(self, args: argparse.Namespace): """ Validate the model (quantized [-q] or not) in terms of prediction accuracy rate on a given dataset (images folder). Ground truth labels can be embedded in files names ("filename_03.[png, ppm, pgm]", the number of digits must be coherent with the number of networks outputs: e.g. in a 1000 classes problem the last digits must be 3, "file_45.png" will raise an error) or can be written in a .json object (example: {'file0':label0, 'file1':label1, ...}) and given to the function with --label_json """ self._check_graph() if args.quantize: self._check_quantized() qmode = QuantizationMode.all_dequantize() else: qmode = QuantizationMode.none() LOG.info("quantization mode - %s", qmode) input_args = self._get_input_args(args) good_predictions = [] good_margin = 0 bad_margin = 0 number_samples = sum(1 for _ in glob_input_files(args.input_files)) if args.vww_instances_file: validation = ValidateFromVWWInstances( args.vww_instances_file, class_thr=args.class_thr, binary_classification=args.binary_classification) elif args.label_json: validation = ValidateFromJSON( args.label_json, class_thr=args.class_thr, binary_classification=args.binary_classification) elif args.class_number is not None: validation = ValidateFromClass( args.class_number, class_thr=args.class_thr, binary_classification=args.binary_classification) else: validation = ValidateFromName( class_thr=args.class_thr, binary_classification=args.binary_classification) try: ExecutionProgress.start() for i, file_per_input in enumerate( glob_input_files(args.input_files, self.G.num_inputs)): if not args.silent: LOG.info("input file %s", file_per_input) data = [ import_data(input_file, **input_args) for input_file in file_per_input ] executer = GraphExecuter(self.G, qrecs=self.G.quantization) outputs = executer.execute(data, qmode=qmode, silent=args.silent) predicted_values = np.asarray( outputs[args.prediction_step_idx]) good_prediction, class_predicted, real_class, margin = validation.validate( file_per_input[0], predicted_values) good_predictions.append(good_prediction) if good_prediction: good_margin += margin else: bad_margin += margin if not args.silent: LOG.info( 'Prediction is %s predicted %s correct %s margin %s', good_prediction, class_predicted, real_class, margin) if not i % args.progress_every and i > 0: LOG.info( 'ACCURACY: %.3f %%', 100 * sum(good_predictions) / len(good_predictions)) ExecutionProgress.progress(i, number_samples) ExecutionProgress.end() except (KeyboardInterrupt, SystemExit): pass self.py_locals['labels'] = validation.labels self.py_locals['predictions'] = validation.predictions cnt = len(good_predictions) if cnt: ngood = sum(good_predictions) nbad = cnt - ngood if nbad: LOG.info( "%s out of %s predicted falsly with %s average margin", nbad, cnt, bad_margin / nbad) if ngood: LOG.info( "%s out of %s predicted correctly with %s average margin", ngood, cnt, good_margin / ngood) accuracy_rate = 100 * sum(good_predictions) / len(good_predictions) LOG.info('Total accuracy: %.3f %%', accuracy_rate)
def execute_qnoq_iterator(self, in_tensors, step_idx_limit=None, silent=False, yield_fusions=True): if not silent: LOG.info("execute quantization comparison") ExecutionProgress.start() saved_outputs = {} for step_idx, step in enumerate(self._G.graph_state.steps): if step_idx_limit is not None and step_idx > step_idx_limit: break node = step['node'] if not silent: ExecutionProgress.progress(step_idx, node.name) output = self.collect_outputs(saved_outputs, node) nid = NodeId(node, None) qrec = self._qrecs[nid] if isinstance(node, (ConvFusionParameters, ActivationFusion)): for fusion_node in node.contained_nodes(): fnid = NodeId(node, fusion_node) fqrec = self._qrecs[fnid] qoutput = [] for val_idx, val in enumerate(output): qoutput.append(fqrec.in_qs[val_idx].quantize(val)) details = {} output = self._kernel_switch.execute( fusion_node, output, fqrec if self._G.has_quantized_parameters else None, details=details) qdetails = {} qoutput = self._quantized_kernel_switch.execute( fusion_node, qoutput, fqrec, details=qdetails) qoutput = [ fqrec.out_qs[i].dequantize(out) for i, out in enumerate(qoutput) ] if yield_fusions: yield step_idx, node, output, details, qoutput, qdetails, fusion_node else: if isinstance(node, (InputParameters, ConstantInputParameters)): details = {} output = self._kernel_switch.execute( node, in_tensors, qrec if self._G.has_quantized_parameters else None, details=details) qdetails = {} qoutput = self._quantized_kernel_switch.execute( node, in_tensors, qrec, details=qdetails) else: qoutput = [] for val_idx, val in enumerate(output): qoutput.append(qrec.in_qs[val_idx].quantize(val)) details = {} output = self._kernel_switch.execute( node, output, qrec if self._G.has_quantized_parameters else None, details=details) qdetails = {} qoutput = self._quantized_kernel_switch.execute( node, qoutput, qrec, details=qdetails) qoutput = [ qrec.out_qs[i].dequantize(out) for i, out in enumerate(qoutput) ] yield step_idx, node, output, details, qoutput, qdetails, None self.save_output(saved_outputs, node, output) if not silent: ExecutionProgress.end()
def execute_iterator(self, in_tensors: Sequence[np.ndarray], step_idx_limit: Optional[int] = None, start_node: Optional[Parameters] = None, qmode: Optional[QuantizationMode] = None, yield_fusions=True, yield_details=True, only_yield_step=False, record_inputs: Optional[Mapping] = None, silent=False): if qmode is None: qmode = QuantizationMode.none() saved_outputs = {} if not silent: LOG.info("execute uncached: quantization mode %s", qmode) ExecutionProgress.start() for step_idx, step in enumerate(self._G.graph_state.steps): if step_idx_limit is not None and step_idx > step_idx_limit: break node = step['node'] if start_node and start_node != node: continue # collect outputs from previous nodes # InputNode is already set above output_tensors = self.collect_outputs(saved_outputs, node) if not silent: ExecutionProgress.progress(step_idx, node.name) nid = NodeId(node, None) if record_inputs is not None: if output_tensors is None: record_inputs[nid] = output_tensors else: record_inputs[nid] = [ np.copy(output_tensor) for output_tensor in output_tensors ] qrec = self._qrecs[nid] if self._qrecs is not None else None if qmode.get_quantized(node, step_idx): switch = self._quantized_kernel_switch if qmode.is_step and output_tensors: output_tensors = [ qrec.in_qs[i].quantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] else: switch = self._kernel_switch details = {} if yield_details and ( not only_yield_step or step_idx == step_idx_limit) else None if isinstance(node, (ConvFusionParameters, ActivationFusion)): for fusion_node in node.contained_nodes(): fnid = NodeId(node, fusion_node) fqrec = None if not qrec else self._qrecs[fnid] if record_inputs is not None: record_inputs[nid] = [ np.copy(output_tensor) for output_tensor in output_tensors ] details = {} if yield_fusions and yield_details else None output_tensors = switch.execute(fusion_node, output_tensors, fqrec, details) if yield_fusions: if qmode.dequantize: qoutput_tensors = [ fqrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] yield step_idx, node, fusion_node, qoutput_tensors, details elif qmode.is_float_q_deq: qoutput_tensors = [ fqrec.out_qs[i].dequantize( fqrec.out_qs[i].quantize(output_tensor)) for i, output_tensor in enumerate( output_tensors) ] yield step_idx, node, fusion_node, qoutput_tensors, details else: yield step_idx, node, fusion_node, output_tensors, details elif isinstance(node, InputParameters): output_tensors = switch.execute(node, in_tensors, qrec, details) else: output_tensors = switch.execute(node, output_tensors, qrec, details) if qmode.dequantize: qoutput_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = qoutput_tensors elif qmode.is_float_q_deq: if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] qoutput_tensors = [ qrec.out_qs[i].dequantize( qrec.out_qs[i].quantize(output_tensor)) for i, output_tensor in enumerate(output_tensors) ] if not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details else: if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, output_tensors, details self.save_output(saved_outputs, node, output_tensors) if not silent: ExecutionProgress.end()
def execute_qnoq_iterator(self, in_tensors, step_idx_limit=None, silent=False, yield_fusions=True, parent_node=None, parent_step_idx=None, saved_outputs=None, G=None): if not silent: LOG.info("execute quantization comparison") ExecutionProgress.start() if G is None: G = self._G saved_outputs = {} for node in G.dfs(): step_idx = node.step_idx if step_idx_limit is not None and step_idx > step_idx_limit: break if not silent: ExecutionProgress.progress(step_idx, node.name) output = self.collect_outputs(G, saved_outputs, node) if parent_node: nid = NodeId(parent_node, node) else: nid = NodeId(node, None) if isinstance(node, (FusionInputParameters, FusionOutputParameters)): qrec = None else: qrec = self._qrecs[nid] if isinstance(node, (FilterFusionBase, ActivationFusionBase, PaddedAddFusionParameters)): for (f_step_idx, f_pnode, f_output, f_details, f_qoutput, f_qdetails, f_node) in self.execute_qnoq_iterator( output, yield_fusions=yield_fusions, silent=silent, parent_node=node, parent_step_idx=step_idx, saved_outputs=saved_outputs, G=node.subgraph): if yield_fusions and not isinstance( f_node, (FusionInputParameters, FusionOutputParameters)): yield f_step_idx, f_pnode, f_output, f_details, f_qoutput, f_qdetails, f_node f_outputs = node.subgraph.outputs() num_outputs = max(f_out.idx for f_out in f_outputs) + 1 output = [None] * num_outputs for f_out in f_outputs: output[f_out.idx] = saved_outputs[f_out][0] qoutput = [] else: if isinstance(node, (InputParameters, ConstantInputParameters)): details = {} output = KernelExecuter.execute(node, in_tensors, None, details=details) qdetails = {} qoutput = KernelExecuter.execute(node, in_tensors, qrec, details=qdetails) else: qoutput = [] for val_idx, val in enumerate(output): qoutput.append(qrec.in_qs[val_idx].quantize(val)) details = {} output = KernelExecuter.execute(node, output, None, details=details) qdetails = {} qoutput = KernelExecuter.execute(node, qoutput, qrec, details=qdetails) qoutput = [ qrec.out_qs[i].dequantize(out) for i, out in enumerate(qoutput) ] yield step_idx, node, output, details, qoutput, qdetails, None self.save_output(saved_outputs, node, output) if not silent: ExecutionProgress.end()
def execute_iterator(self, in_tensors: Sequence[np.ndarray], step_idx_limit: Optional[int] = None, start_node: Optional[Parameters] = None, qmode: Optional[QuantizationMode] = None, yield_fusions=True, yield_details=True, only_yield_step=False, record_inputs: Optional[Mapping] = None, silent=False, parent_node=None, parent_step_idx=None, saved_outputs=None, G=None): if qmode is None: qmode = QuantizationMode.none() if G is None: G = self._G saved_outputs = {} if not silent: LOG.info("execute uncached: quantization mode %s", qmode) ExecutionProgress.start() for node in G.dfs(): step_idx = node.step_idx if step_idx_limit is not None and step_idx > step_idx_limit: break if start_node and start_node != node: continue # collect outputs from previous nodes # InputNode is already set above output_tensors = self.collect_outputs(G, saved_outputs, node) if not silent: ExecutionProgress.progress(step_idx, node.name) if parent_node: nid = NodeId(parent_node, node) else: nid = NodeId(node, None) if record_inputs is not None: if output_tensors is None: record_inputs[nid] = output_tensors else: record_inputs[nid] = [ np.copy(output_tensor) for output_tensor in output_tensors ] if isinstance(node, (FusionInputParameters, FusionOutputParameters)): qrec = None else: if self._qrecs and qmode.get_quantized(node, step_idx): if nid not in self._qrecs: LOG.warning("no quantization parameters on %s", node.name) qrec = None else: qrec = self._qrecs[nid] if qmode.is_step and output_tensors: output_tensors = [ qrec.in_qs[i].quantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] else: qrec = None details = {} if yield_details and ( not only_yield_step or step_idx == step_idx_limit) else None if isinstance( node, (FilterFusionBase, ActivationFusionBase, PaddedAddFusionParameters, MatMulOpFusionParameters)): for f_step_idx, f_pnode, f_node, f_output_tensors, f_details in self.execute_iterator( output_tensors, qmode=qmode, yield_fusions=yield_fusions, yield_details=yield_details, silent=True, parent_node=node, parent_step_idx=step_idx, saved_outputs=saved_outputs, G=node.subgraph): if yield_fusions and not isinstance( f_node, (FusionInputParameters, FusionOutputParameters)): yield f_step_idx, f_pnode, f_node, f_output_tensors, f_details f_outputs = node.subgraph.outputs() num_outputs = max(f_output.idx for f_output in f_outputs) + 1 output_tensors = [None] * num_outputs for f_output in f_outputs: output_tensors[f_output.idx] = saved_outputs[f_output][0] elif isinstance(node, (InputParameters, FusionInputParameters)): output_tensors = KernelExecuter.execute( node, in_tensors, qrec, details) else: output_tensors = KernelExecuter.execute( node, output_tensors, qrec, details) if qmode.dequantize and qrec: qoutput_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if parent_node: yield parent_step_idx, parent_node, node, qoutput_tensors, details elif not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = qoutput_tensors elif qmode.is_float_q_deq and qrec: if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] qoutput_tensors = [ qrec.out_qs[i].dequantize( qrec.out_qs[i].quantize(output_tensor)) for i, output_tensor in enumerate(output_tensors) ] if parent_node: yield parent_step_idx, parent_node, node, qoutput_tensors, details elif not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details else: if qmode.is_step and qmode.get_quantized(node, step_idx) and qrec: output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if parent_node: yield parent_step_idx, parent_node, node, output_tensors, details elif not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, output_tensors, details self.save_output(saved_outputs, node, output_tensors) if not silent: ExecutionProgress.end()