def do_validate(self, args: argparse.Namespace): """ Validate the model (quantized [-q] or not) in terms of prediction accuracy rate on a given dataset (images folder). Ground truth labels can be embedded in files names ("filename_03.[png, ppm, pgm]", the number of digits must be coherent with the number of networks outputs: e.g. in a 1000 classes problem the last digits must be 3, "file_45.png" will raise an error) or can be written in a .json object (example: {'file0':label0, 'file1':label1, ...}) and given to the function with --label_json """ self._check_graph() if args.quantize: self._check_quantized() qmode = QuantizationMode.all_dequantize() else: qmode = QuantizationMode.none() LOG.info("quantization mode - %s", qmode) input_args = self._get_input_args(args) good_predictions = [] good_margin = 0 bad_margin = 0 number_samples = sum(1 for _ in glob_input_files(args.input_files)) if args.vww_instances_file: validation = ValidateFromVWWInstances( args.vww_instances_file, class_thr=args.class_thr, binary_classification=args.binary_classification) elif args.label_json: validation = ValidateFromJSON( args.label_json, class_thr=args.class_thr, binary_classification=args.binary_classification) elif args.class_number is not None: validation = ValidateFromClass( args.class_number, class_thr=args.class_thr, binary_classification=args.binary_classification) else: validation = ValidateFromName( class_thr=args.class_thr, binary_classification=args.binary_classification) try: ExecutionProgress.start() for i, file_per_input in enumerate( glob_input_files(args.input_files, self.G.num_inputs)): if not args.silent: LOG.info("input file %s", file_per_input) data = [ import_data(input_file, **input_args) for input_file in file_per_input ] executer = GraphExecuter(self.G, qrecs=self.G.quantization) outputs = executer.execute(data, qmode=qmode, silent=args.silent) predicted_values = np.asarray( outputs[args.prediction_step_idx]) good_prediction, class_predicted, real_class, margin = validation.validate( file_per_input[0], predicted_values) good_predictions.append(good_prediction) if good_prediction: good_margin += margin else: bad_margin += margin if not args.silent: LOG.info( 'Prediction is %s predicted %s correct %s margin %s', good_prediction, class_predicted, real_class, margin) if not i % args.progress_every and i > 0: LOG.info( 'ACCURACY: %.3f %%', 100 * sum(good_predictions) / len(good_predictions)) ExecutionProgress.progress(i, number_samples) ExecutionProgress.end() except (KeyboardInterrupt, SystemExit): pass self.py_locals['labels'] = validation.labels self.py_locals['predictions'] = validation.predictions cnt = len(good_predictions) if cnt: ngood = sum(good_predictions) nbad = cnt - ngood if nbad: LOG.info( "%s out of %s predicted falsly with %s average margin", nbad, cnt, bad_margin / nbad) if ngood: LOG.info( "%s out of %s predicted correctly with %s average margin", ngood, cnt, good_margin / ngood) accuracy_rate = 100 * sum(good_predictions) / len(good_predictions) LOG.info('Total accuracy: %.3f %%', accuracy_rate)
def execute_qnoq_iterator(self, in_tensors, step_idx_limit=None, silent=False, yield_fusions=True): if not silent: LOG.info("execute quantization comparison") ExecutionProgress.start() saved_outputs = {} for step_idx, step in enumerate(self._G.graph_state.steps): if step_idx_limit is not None and step_idx > step_idx_limit: break node = step['node'] if not silent: ExecutionProgress.progress(step_idx, node.name) output = self.collect_outputs(saved_outputs, node) nid = NodeId(node, None) qrec = self._qrecs[nid] if isinstance(node, (ConvFusionParameters, ActivationFusion)): for fusion_node in node.contained_nodes(): fnid = NodeId(node, fusion_node) fqrec = self._qrecs[fnid] qoutput = [] for val_idx, val in enumerate(output): qoutput.append(fqrec.in_qs[val_idx].quantize(val)) details = {} output = self._kernel_switch.execute( fusion_node, output, fqrec if self._G.has_quantized_parameters else None, details=details) qdetails = {} qoutput = self._quantized_kernel_switch.execute( fusion_node, qoutput, fqrec, details=qdetails) qoutput = [ fqrec.out_qs[i].dequantize(out) for i, out in enumerate(qoutput) ] if yield_fusions: yield step_idx, node, output, details, qoutput, qdetails, fusion_node else: if isinstance(node, (InputParameters, ConstantInputParameters)): details = {} output = self._kernel_switch.execute( node, in_tensors, qrec if self._G.has_quantized_parameters else None, details=details) qdetails = {} qoutput = self._quantized_kernel_switch.execute( node, in_tensors, qrec, details=qdetails) else: qoutput = [] for val_idx, val in enumerate(output): qoutput.append(qrec.in_qs[val_idx].quantize(val)) details = {} output = self._kernel_switch.execute( node, output, qrec if self._G.has_quantized_parameters else None, details=details) qdetails = {} qoutput = self._quantized_kernel_switch.execute( node, qoutput, qrec, details=qdetails) qoutput = [ qrec.out_qs[i].dequantize(out) for i, out in enumerate(qoutput) ] yield step_idx, node, output, details, qoutput, qdetails, None self.save_output(saved_outputs, node, output) if not silent: ExecutionProgress.end()
def execute_qnoq_iterator(self, in_tensors, step_idx_limit=None, silent=False, yield_fusions=True, parent_node=None, parent_step_idx=None, saved_outputs=None, G=None): if not silent: LOG.info("execute quantization comparison") ExecutionProgress.start() if G is None: G = self._G saved_outputs = {} for node in G.dfs(): step_idx = node.step_idx if step_idx_limit is not None and step_idx > step_idx_limit: break if not silent: ExecutionProgress.progress(step_idx, node.name) output = self.collect_outputs(G, saved_outputs, node) if parent_node: nid = NodeId(parent_node, node) else: nid = NodeId(node, None) if isinstance(node, (FusionInputParameters, FusionOutputParameters)): qrec = None else: qrec = self._qrecs[nid] if isinstance(node, (FilterFusionBase, ActivationFusionBase, PaddedAddFusionParameters)): for (f_step_idx, f_pnode, f_output, f_details, f_qoutput, f_qdetails, f_node) in self.execute_qnoq_iterator( output, yield_fusions=yield_fusions, silent=silent, parent_node=node, parent_step_idx=step_idx, saved_outputs=saved_outputs, G=node.subgraph): if yield_fusions and not isinstance( f_node, (FusionInputParameters, FusionOutputParameters)): yield f_step_idx, f_pnode, f_output, f_details, f_qoutput, f_qdetails, f_node f_outputs = node.subgraph.outputs() num_outputs = max(f_out.idx for f_out in f_outputs) + 1 output = [None] * num_outputs for f_out in f_outputs: output[f_out.idx] = saved_outputs[f_out][0] qoutput = [] else: if isinstance(node, (InputParameters, ConstantInputParameters)): details = {} output = KernelExecuter.execute(node, in_tensors, None, details=details) qdetails = {} qoutput = KernelExecuter.execute(node, in_tensors, qrec, details=qdetails) else: qoutput = [] for val_idx, val in enumerate(output): qoutput.append(qrec.in_qs[val_idx].quantize(val)) details = {} output = KernelExecuter.execute(node, output, None, details=details) qdetails = {} qoutput = KernelExecuter.execute(node, qoutput, qrec, details=qdetails) qoutput = [ qrec.out_qs[i].dequantize(out) for i, out in enumerate(qoutput) ] yield step_idx, node, output, details, qoutput, qdetails, None self.save_output(saved_outputs, node, output) if not silent: ExecutionProgress.end()
def execute_iterator(self, in_tensors: Sequence[np.ndarray], step_idx_limit: Optional[int] = None, start_node: Optional[Parameters] = None, qmode: Optional[QuantizationMode] = None, yield_fusions=True, yield_details=True, only_yield_step=False, record_inputs: Optional[Mapping] = None, silent=False): if qmode is None: qmode = QuantizationMode.none() saved_outputs = {} if not silent: LOG.info("execute uncached: quantization mode %s", qmode) ExecutionProgress.start() for step_idx, step in enumerate(self._G.graph_state.steps): if step_idx_limit is not None and step_idx > step_idx_limit: break node = step['node'] if start_node and start_node != node: continue # collect outputs from previous nodes # InputNode is already set above output_tensors = self.collect_outputs(saved_outputs, node) if not silent: ExecutionProgress.progress(step_idx, node.name) nid = NodeId(node, None) if record_inputs is not None: if output_tensors is None: record_inputs[nid] = output_tensors else: record_inputs[nid] = [ np.copy(output_tensor) for output_tensor in output_tensors ] qrec = self._qrecs[nid] if self._qrecs is not None else None if qmode.get_quantized(node, step_idx): switch = self._quantized_kernel_switch if qmode.is_step and output_tensors: output_tensors = [ qrec.in_qs[i].quantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] else: switch = self._kernel_switch details = {} if yield_details and ( not only_yield_step or step_idx == step_idx_limit) else None if isinstance(node, (ConvFusionParameters, ActivationFusion)): for fusion_node in node.contained_nodes(): fnid = NodeId(node, fusion_node) fqrec = None if not qrec else self._qrecs[fnid] if record_inputs is not None: record_inputs[nid] = [ np.copy(output_tensor) for output_tensor in output_tensors ] details = {} if yield_fusions and yield_details else None output_tensors = switch.execute(fusion_node, output_tensors, fqrec, details) if yield_fusions: if qmode.dequantize: qoutput_tensors = [ fqrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] yield step_idx, node, fusion_node, qoutput_tensors, details elif qmode.is_float_q_deq: qoutput_tensors = [ fqrec.out_qs[i].dequantize( fqrec.out_qs[i].quantize(output_tensor)) for i, output_tensor in enumerate( output_tensors) ] yield step_idx, node, fusion_node, qoutput_tensors, details else: yield step_idx, node, fusion_node, output_tensors, details elif isinstance(node, InputParameters): output_tensors = switch.execute(node, in_tensors, qrec, details) else: output_tensors = switch.execute(node, output_tensors, qrec, details) if qmode.dequantize: qoutput_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = qoutput_tensors elif qmode.is_float_q_deq: if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] qoutput_tensors = [ qrec.out_qs[i].dequantize( qrec.out_qs[i].quantize(output_tensor)) for i, output_tensor in enumerate(output_tensors) ] if not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details else: if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, output_tensors, details self.save_output(saved_outputs, node, output_tensors) if not silent: ExecutionProgress.end()
def execute_iterator(self, in_tensors: Sequence[np.ndarray], step_idx_limit: Optional[int] = None, start_node: Optional[Parameters] = None, qmode: Optional[QuantizationMode] = None, yield_fusions=True, yield_details=True, only_yield_step=False, record_inputs: Optional[Mapping] = None, silent=False, parent_node=None, parent_step_idx=None, saved_outputs=None, G=None): if qmode is None: qmode = QuantizationMode.none() if G is None: G = self._G saved_outputs = {} if not silent: LOG.info("execute uncached: quantization mode %s", qmode) ExecutionProgress.start() for node in G.dfs(): step_idx = node.step_idx if step_idx_limit is not None and step_idx > step_idx_limit: break if start_node and start_node != node: continue # collect outputs from previous nodes # InputNode is already set above output_tensors = self.collect_outputs(G, saved_outputs, node) if not silent: ExecutionProgress.progress(step_idx, node.name) if parent_node: nid = NodeId(parent_node, node) else: nid = NodeId(node, None) if record_inputs is not None: if output_tensors is None: record_inputs[nid] = output_tensors else: record_inputs[nid] = [ np.copy(output_tensor) for output_tensor in output_tensors ] if isinstance(node, (FusionInputParameters, FusionOutputParameters)): qrec = None else: if self._qrecs and qmode.get_quantized(node, step_idx): if nid not in self._qrecs: LOG.warning("no quantization parameters on %s", node.name) qrec = None else: qrec = self._qrecs[nid] if qmode.is_step and output_tensors: output_tensors = [ qrec.in_qs[i].quantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] else: qrec = None details = {} if yield_details and ( not only_yield_step or step_idx == step_idx_limit) else None if isinstance( node, (FilterFusionBase, ActivationFusionBase, PaddedAddFusionParameters, MatMulOpFusionParameters)): for f_step_idx, f_pnode, f_node, f_output_tensors, f_details in self.execute_iterator( output_tensors, qmode=qmode, yield_fusions=yield_fusions, yield_details=yield_details, silent=True, parent_node=node, parent_step_idx=step_idx, saved_outputs=saved_outputs, G=node.subgraph): if yield_fusions and not isinstance( f_node, (FusionInputParameters, FusionOutputParameters)): yield f_step_idx, f_pnode, f_node, f_output_tensors, f_details f_outputs = node.subgraph.outputs() num_outputs = max(f_output.idx for f_output in f_outputs) + 1 output_tensors = [None] * num_outputs for f_output in f_outputs: output_tensors[f_output.idx] = saved_outputs[f_output][0] elif isinstance(node, (InputParameters, FusionInputParameters)): output_tensors = KernelExecuter.execute( node, in_tensors, qrec, details) else: output_tensors = KernelExecuter.execute( node, output_tensors, qrec, details) if qmode.dequantize and qrec: qoutput_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if parent_node: yield parent_step_idx, parent_node, node, qoutput_tensors, details elif not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = qoutput_tensors elif qmode.is_float_q_deq and qrec: if qmode.is_step and qmode.get_quantized(node, step_idx): output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] qoutput_tensors = [ qrec.out_qs[i].dequantize( qrec.out_qs[i].quantize(output_tensor)) for i, output_tensor in enumerate(output_tensors) ] if parent_node: yield parent_step_idx, parent_node, node, qoutput_tensors, details elif not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, qoutput_tensors, details else: if qmode.is_step and qmode.get_quantized(node, step_idx) and qrec: output_tensors = [ qrec.out_qs[i].dequantize(output_tensor) for i, output_tensor in enumerate(output_tensors) ] if parent_node: yield parent_step_idx, parent_node, node, output_tensors, details elif not only_yield_step or step_idx == step_idx_limit: yield step_idx, node, None, output_tensors, details self.save_output(saved_outputs, node, output_tensors) if not silent: ExecutionProgress.end()