Beispiel #1
0
    def do_validate(self, args: argparse.Namespace):
        """
Validate the model (quantized [-q] or not) in terms of prediction accuracy rate on a given dataset (images
folder). Ground truth labels can be embedded in files names ("filename_03.[png, ppm, pgm]", the number of
digits must be coherent with the number of networks outputs: e.g. in a 1000 classes problem the last digits
must be 3, "file_45.png" will raise an error) or can be written in a .json object (example: {'file0':label0,
'file1':label1, ...}) and given to the function with --label_json
"""
        self._check_graph()
        if args.quantize:
            self._check_quantized()
            qmode = QuantizationMode.all_dequantize()
        else:
            qmode = QuantizationMode.none()

        LOG.info("quantization mode - %s", qmode)
        input_args = self._get_input_args(args)

        good_predictions = []
        good_margin = 0
        bad_margin = 0

        number_samples = sum(1 for _ in glob_input_files(args.input_files))

        if args.vww_instances_file:
            validation = ValidateFromVWWInstances(
                args.vww_instances_file,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        elif args.label_json:
            validation = ValidateFromJSON(
                args.label_json,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        elif args.class_number is not None:
            validation = ValidateFromClass(
                args.class_number,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        else:
            validation = ValidateFromName(
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)

        try:
            ExecutionProgress.start()
            for i, file_per_input in enumerate(
                    glob_input_files(args.input_files, self.G.num_inputs)):
                if not args.silent:
                    LOG.info("input file %s", file_per_input)
                data = [
                    import_data(input_file, **input_args)
                    for input_file in file_per_input
                ]

                executer = GraphExecuter(self.G, qrecs=self.G.quantization)
                outputs = executer.execute(data,
                                           qmode=qmode,
                                           silent=args.silent)

                predicted_values = np.asarray(
                    outputs[args.prediction_step_idx])
                good_prediction, class_predicted, real_class, margin = validation.validate(
                    file_per_input[0], predicted_values)
                good_predictions.append(good_prediction)
                if good_prediction:
                    good_margin += margin
                else:
                    bad_margin += margin

                if not args.silent:
                    LOG.info(
                        'Prediction is %s predicted %s correct %s margin %s',
                        good_prediction, class_predicted, real_class, margin)
                if not i % args.progress_every and i > 0:
                    LOG.info(
                        'ACCURACY: %.3f %%',
                        100 * sum(good_predictions) / len(good_predictions))

                ExecutionProgress.progress(i, number_samples)
            ExecutionProgress.end()

        except (KeyboardInterrupt, SystemExit):
            pass

        self.py_locals['labels'] = validation.labels
        self.py_locals['predictions'] = validation.predictions
        cnt = len(good_predictions)
        if cnt:
            ngood = sum(good_predictions)
            nbad = cnt - ngood
            if nbad:
                LOG.info(
                    "%s out of %s predicted falsly with %s average margin",
                    nbad, cnt, bad_margin / nbad)
            if ngood:
                LOG.info(
                    "%s out of %s predicted correctly with %s average margin",
                    ngood, cnt, good_margin / ngood)
            accuracy_rate = 100 * sum(good_predictions) / len(good_predictions)
            LOG.info('Total accuracy: %.3f %%', accuracy_rate)
    def execute_qnoq_iterator(self,
                              in_tensors,
                              step_idx_limit=None,
                              silent=False,
                              yield_fusions=True):

        if not silent:
            LOG.info("execute quantization comparison")
            ExecutionProgress.start()
        saved_outputs = {}
        for step_idx, step in enumerate(self._G.graph_state.steps):

            if step_idx_limit is not None and step_idx > step_idx_limit:
                break

            node = step['node']

            if not silent:
                ExecutionProgress.progress(step_idx, node.name)

            output = self.collect_outputs(saved_outputs, node)
            nid = NodeId(node, None)
            qrec = self._qrecs[nid]

            if isinstance(node, (ConvFusionParameters, ActivationFusion)):
                for fusion_node in node.contained_nodes():
                    fnid = NodeId(node, fusion_node)
                    fqrec = self._qrecs[fnid]

                    qoutput = []
                    for val_idx, val in enumerate(output):
                        qoutput.append(fqrec.in_qs[val_idx].quantize(val))

                    details = {}
                    output = self._kernel_switch.execute(
                        fusion_node,
                        output,
                        fqrec if self._G.has_quantized_parameters else None,
                        details=details)
                    qdetails = {}
                    qoutput = self._quantized_kernel_switch.execute(
                        fusion_node, qoutput, fqrec, details=qdetails)
                    qoutput = [
                        fqrec.out_qs[i].dequantize(out)
                        for i, out in enumerate(qoutput)
                    ]
                    if yield_fusions:
                        yield step_idx, node, output, details, qoutput, qdetails, fusion_node
            else:
                if isinstance(node,
                              (InputParameters, ConstantInputParameters)):
                    details = {}
                    output = self._kernel_switch.execute(
                        node,
                        in_tensors,
                        qrec if self._G.has_quantized_parameters else None,
                        details=details)
                    qdetails = {}
                    qoutput = self._quantized_kernel_switch.execute(
                        node, in_tensors, qrec, details=qdetails)
                else:
                    qoutput = []
                    for val_idx, val in enumerate(output):
                        qoutput.append(qrec.in_qs[val_idx].quantize(val))
                    details = {}
                    output = self._kernel_switch.execute(
                        node,
                        output,
                        qrec if self._G.has_quantized_parameters else None,
                        details=details)
                    qdetails = {}
                    qoutput = self._quantized_kernel_switch.execute(
                        node, qoutput, qrec, details=qdetails)

                qoutput = [
                    qrec.out_qs[i].dequantize(out)
                    for i, out in enumerate(qoutput)
                ]

            yield step_idx, node, output, details, qoutput, qdetails, None
            self.save_output(saved_outputs, node, output)

        if not silent:
            ExecutionProgress.end()
    def execute_qnoq_iterator(self,
                              in_tensors,
                              step_idx_limit=None,
                              silent=False,
                              yield_fusions=True,
                              parent_node=None,
                              parent_step_idx=None,
                              saved_outputs=None,
                              G=None):

        if not silent:
            LOG.info("execute quantization comparison")
            ExecutionProgress.start()
        if G is None:
            G = self._G
            saved_outputs = {}

        for node in G.dfs():
            step_idx = node.step_idx
            if step_idx_limit is not None and step_idx > step_idx_limit:
                break

            if not silent:
                ExecutionProgress.progress(step_idx, node.name)

            output = self.collect_outputs(G, saved_outputs, node)
            if parent_node:
                nid = NodeId(parent_node, node)
            else:
                nid = NodeId(node, None)

            if isinstance(node,
                          (FusionInputParameters, FusionOutputParameters)):
                qrec = None
            else:
                qrec = self._qrecs[nid]

            if isinstance(node, (FilterFusionBase, ActivationFusionBase,
                                 PaddedAddFusionParameters)):
                for (f_step_idx, f_pnode, f_output, f_details, f_qoutput,
                     f_qdetails, f_node) in self.execute_qnoq_iterator(
                         output,
                         yield_fusions=yield_fusions,
                         silent=silent,
                         parent_node=node,
                         parent_step_idx=step_idx,
                         saved_outputs=saved_outputs,
                         G=node.subgraph):
                    if yield_fusions and not isinstance(
                            f_node,
                        (FusionInputParameters, FusionOutputParameters)):
                        yield f_step_idx, f_pnode, f_output, f_details, f_qoutput, f_qdetails, f_node

                f_outputs = node.subgraph.outputs()
                num_outputs = max(f_out.idx for f_out in f_outputs) + 1

                output = [None] * num_outputs
                for f_out in f_outputs:
                    output[f_out.idx] = saved_outputs[f_out][0]
                qoutput = []
            else:
                if isinstance(node,
                              (InputParameters, ConstantInputParameters)):
                    details = {}
                    output = KernelExecuter.execute(node,
                                                    in_tensors,
                                                    None,
                                                    details=details)
                    qdetails = {}
                    qoutput = KernelExecuter.execute(node,
                                                     in_tensors,
                                                     qrec,
                                                     details=qdetails)
                else:
                    qoutput = []
                    for val_idx, val in enumerate(output):
                        qoutput.append(qrec.in_qs[val_idx].quantize(val))
                    details = {}
                    output = KernelExecuter.execute(node,
                                                    output,
                                                    None,
                                                    details=details)
                    qdetails = {}
                    qoutput = KernelExecuter.execute(node,
                                                     qoutput,
                                                     qrec,
                                                     details=qdetails)

                qoutput = [
                    qrec.out_qs[i].dequantize(out)
                    for i, out in enumerate(qoutput)
                ]

            yield step_idx, node, output, details, qoutput, qdetails, None
            self.save_output(saved_outputs, node, output)

        if not silent:
            ExecutionProgress.end()
    def execute_iterator(self,
                         in_tensors: Sequence[np.ndarray],
                         step_idx_limit: Optional[int] = None,
                         start_node: Optional[Parameters] = None,
                         qmode: Optional[QuantizationMode] = None,
                         yield_fusions=True,
                         yield_details=True,
                         only_yield_step=False,
                         record_inputs: Optional[Mapping] = None,
                         silent=False):
        if qmode is None:
            qmode = QuantizationMode.none()

        saved_outputs = {}

        if not silent:
            LOG.info("execute uncached: quantization mode %s", qmode)
            ExecutionProgress.start()
        for step_idx, step in enumerate(self._G.graph_state.steps):

            if step_idx_limit is not None and step_idx > step_idx_limit:
                break

            node = step['node']

            if start_node and start_node != node:
                continue

            # collect outputs from previous nodes
            # InputNode is already set above
            output_tensors = self.collect_outputs(saved_outputs, node)

            if not silent:
                ExecutionProgress.progress(step_idx, node.name)
            nid = NodeId(node, None)
            if record_inputs is not None:
                if output_tensors is None:
                    record_inputs[nid] = output_tensors
                else:
                    record_inputs[nid] = [
                        np.copy(output_tensor)
                        for output_tensor in output_tensors
                    ]

            qrec = self._qrecs[nid] if self._qrecs is not None else None
            if qmode.get_quantized(node, step_idx):
                switch = self._quantized_kernel_switch
                if qmode.is_step and output_tensors:
                    output_tensors = [
                        qrec.in_qs[i].quantize(output_tensor)
                        for i, output_tensor in enumerate(output_tensors)
                    ]
            else:
                switch = self._kernel_switch

            details = {} if yield_details and (
                not only_yield_step or step_idx == step_idx_limit) else None
            if isinstance(node, (ConvFusionParameters, ActivationFusion)):
                for fusion_node in node.contained_nodes():
                    fnid = NodeId(node, fusion_node)
                    fqrec = None if not qrec else self._qrecs[fnid]
                    if record_inputs is not None:
                        record_inputs[nid] = [
                            np.copy(output_tensor)
                            for output_tensor in output_tensors
                        ]
                    details = {} if yield_fusions and yield_details else None
                    output_tensors = switch.execute(fusion_node,
                                                    output_tensors, fqrec,
                                                    details)
                    if yield_fusions:
                        if qmode.dequantize:
                            qoutput_tensors = [
                                fqrec.out_qs[i].dequantize(output_tensor) for
                                i, output_tensor in enumerate(output_tensors)
                            ]
                            yield step_idx, node, fusion_node, qoutput_tensors, details
                        elif qmode.is_float_q_deq:
                            qoutput_tensors = [
                                fqrec.out_qs[i].dequantize(
                                    fqrec.out_qs[i].quantize(output_tensor))
                                for i, output_tensor in enumerate(
                                    output_tensors)
                            ]
                            yield step_idx, node, fusion_node, qoutput_tensors, details
                        else:
                            yield step_idx, node, fusion_node, output_tensors, details
            elif isinstance(node, InputParameters):
                output_tensors = switch.execute(node, in_tensors, qrec,
                                                details)
            else:
                output_tensors = switch.execute(node, output_tensors, qrec,
                                                details)

            if qmode.dequantize:
                qoutput_tensors = [
                    qrec.out_qs[i].dequantize(output_tensor)
                    for i, output_tensor in enumerate(output_tensors)
                ]
                if not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, qoutput_tensors, details
                if qmode.is_step and qmode.get_quantized(node, step_idx):
                    output_tensors = qoutput_tensors
            elif qmode.is_float_q_deq:
                if qmode.is_step and qmode.get_quantized(node, step_idx):
                    output_tensors = [
                        qrec.out_qs[i].dequantize(output_tensor)
                        for i, output_tensor in enumerate(output_tensors)
                    ]
                qoutput_tensors = [
                    qrec.out_qs[i].dequantize(
                        qrec.out_qs[i].quantize(output_tensor))
                    for i, output_tensor in enumerate(output_tensors)
                ]
                if not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, qoutput_tensors, details
            else:
                if qmode.is_step and qmode.get_quantized(node, step_idx):
                    output_tensors = [
                        qrec.out_qs[i].dequantize(output_tensor)
                        for i, output_tensor in enumerate(output_tensors)
                    ]
                if not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, output_tensors, details

            self.save_output(saved_outputs, node, output_tensors)

        if not silent:
            ExecutionProgress.end()
    def execute_iterator(self,
                         in_tensors: Sequence[np.ndarray],
                         step_idx_limit: Optional[int] = None,
                         start_node: Optional[Parameters] = None,
                         qmode: Optional[QuantizationMode] = None,
                         yield_fusions=True,
                         yield_details=True,
                         only_yield_step=False,
                         record_inputs: Optional[Mapping] = None,
                         silent=False,
                         parent_node=None,
                         parent_step_idx=None,
                         saved_outputs=None,
                         G=None):
        if qmode is None:
            qmode = QuantizationMode.none()

        if G is None:
            G = self._G
            saved_outputs = {}

        if not silent:
            LOG.info("execute uncached: quantization mode %s", qmode)
            ExecutionProgress.start()
        for node in G.dfs():
            step_idx = node.step_idx
            if step_idx_limit is not None and step_idx > step_idx_limit:
                break

            if start_node and start_node != node:
                continue

            # collect outputs from previous nodes
            # InputNode is already set above
            output_tensors = self.collect_outputs(G, saved_outputs, node)

            if not silent:
                ExecutionProgress.progress(step_idx, node.name)
            if parent_node:
                nid = NodeId(parent_node, node)
            else:
                nid = NodeId(node, None)
            if record_inputs is not None:
                if output_tensors is None:
                    record_inputs[nid] = output_tensors
                else:
                    record_inputs[nid] = [
                        np.copy(output_tensor)
                        for output_tensor in output_tensors
                    ]
            if isinstance(node,
                          (FusionInputParameters, FusionOutputParameters)):
                qrec = None
            else:
                if self._qrecs and qmode.get_quantized(node, step_idx):
                    if nid not in self._qrecs:
                        LOG.warning("no quantization parameters on %s",
                                    node.name)
                        qrec = None
                    else:
                        qrec = self._qrecs[nid]
                    if qmode.is_step and output_tensors:
                        output_tensors = [
                            qrec.in_qs[i].quantize(output_tensor)
                            for i, output_tensor in enumerate(output_tensors)
                        ]
                else:
                    qrec = None

            details = {} if yield_details and (
                not only_yield_step or step_idx == step_idx_limit) else None
            if isinstance(
                    node,
                (FilterFusionBase, ActivationFusionBase,
                 PaddedAddFusionParameters, MatMulOpFusionParameters)):

                for f_step_idx, f_pnode, f_node, f_output_tensors, f_details in self.execute_iterator(
                        output_tensors,
                        qmode=qmode,
                        yield_fusions=yield_fusions,
                        yield_details=yield_details,
                        silent=True,
                        parent_node=node,
                        parent_step_idx=step_idx,
                        saved_outputs=saved_outputs,
                        G=node.subgraph):
                    if yield_fusions and not isinstance(
                            f_node,
                        (FusionInputParameters, FusionOutputParameters)):
                        yield f_step_idx, f_pnode, f_node, f_output_tensors, f_details
                f_outputs = node.subgraph.outputs()
                num_outputs = max(f_output.idx for f_output in f_outputs) + 1
                output_tensors = [None] * num_outputs
                for f_output in f_outputs:
                    output_tensors[f_output.idx] = saved_outputs[f_output][0]

            elif isinstance(node, (InputParameters, FusionInputParameters)):
                output_tensors = KernelExecuter.execute(
                    node, in_tensors, qrec, details)
            else:
                output_tensors = KernelExecuter.execute(
                    node, output_tensors, qrec, details)

            if qmode.dequantize and qrec:
                qoutput_tensors = [
                    qrec.out_qs[i].dequantize(output_tensor)
                    for i, output_tensor in enumerate(output_tensors)
                ]
                if parent_node:
                    yield parent_step_idx, parent_node, node, qoutput_tensors, details
                elif not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, qoutput_tensors, details
                if qmode.is_step and qmode.get_quantized(node, step_idx):
                    output_tensors = qoutput_tensors
            elif qmode.is_float_q_deq and qrec:
                if qmode.is_step and qmode.get_quantized(node, step_idx):
                    output_tensors = [
                        qrec.out_qs[i].dequantize(output_tensor)
                        for i, output_tensor in enumerate(output_tensors)
                    ]
                qoutput_tensors = [
                    qrec.out_qs[i].dequantize(
                        qrec.out_qs[i].quantize(output_tensor))
                    for i, output_tensor in enumerate(output_tensors)
                ]
                if parent_node:
                    yield parent_step_idx, parent_node, node, qoutput_tensors, details
                elif not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, qoutput_tensors, details
            else:
                if qmode.is_step and qmode.get_quantized(node,
                                                         step_idx) and qrec:
                    output_tensors = [
                        qrec.out_qs[i].dequantize(output_tensor)
                        for i, output_tensor in enumerate(output_tensors)
                    ]
                if parent_node:
                    yield parent_step_idx, parent_node, node, output_tensors, details
                elif not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, output_tensors, details

            self.save_output(saved_outputs, node, output_tensors)

        if not silent:
            ExecutionProgress.end()