Ejemplo n.º 1
0
    def _collect(self, G, input_tensors, step_idx) -> Mapping[NodeId, Mapping]:
        LOG.debug("gather quantization statistics")
        if G.has_quantized_parameters:
            quantization = G.quantization
        else:
            quantization = None
        executer = GraphExecuter(G, qrecs=quantization)
        foutputs = self._collect_execution(executer, input_tensors, quantization)
        executer = GraphExecuter(G, qrecs=G.quantization)
        qoutputs = self._collect_execution(executer,
                                           input_tensors,
                                           G.quantization,
                                           qmode=QuantizationMode.all_dequantize())
        stats = OrderedDict()
        for idx, fstat in enumerate(foutputs):
            qstat = qoutputs[idx]
            if fstat['fusion_outputs']:
                for jdx, ffstat in enumerate(fstat['fusion_outputs']):
                    nid = NodeId(fstat['node'], ffstat['node'])
                    stats[nid] =\
                        self._collect_one(ffstat,
                                          qstat['fusion_outputs'][jdx],
                                          G.quantization[nid],
                                          quant_compare=self._quant_compare)
            nid = NodeId(fstat['node'], None)
            stats[nid] = self._collect_one(fstat,
                                           qstat,
                                           G.quantization[nid],
                                           quant_compare=self._quant_compare)

        return stats
Ejemplo n.º 2
0
def test_validate_mn1_quantized1(mn1q_graph, mn1f_graph):
    tfi = TfliteImporter()
    Gf = tfi.create_graph(mn1f_graph, {'load_tensors': True})
    Gf.add_dimensions()
    Gf.adjust_order()
    matcher = get_pow2_match_group()
    matcher.match(Gf)
    Gf.add_dimensions()

    tfi = TfliteImporter()
    G = tfi.create_graph(mn1q_graph, {
        'load_tensors': True,
        'load_quantization': True
    })
    G.add_dimensions()
    G.adjust_order()
    matcher = get_pow2_match_group()
    matcher.match(G)
    G.add_dimensions()

    fpnode = Gf.graph_state.steps[2]['node']
    fpcnode = fpnode.contained_filters()[0]
    qpnode = G.graph_state.steps[2]['node']
    qpcnode = qpnode.contained_filters()[0]
    nid = NodeId(qpnode, qpcnode)
    qrec = G.quantization[nid]
    dqbiases = qrec.biases_q.get_dequantized(qpcnode.biases)
    assert np.max(np.abs(fpcnode.biases - dqbiases)) < 0.1
    input_tensor = np.load('tests/mobv1_valid/COCO_val2014_000000362331_0.npy')
    input_tensor = input_tensor.reshape((224, 224, 3)).transpose((2, 0, 1))

    executer = GraphExecuter(Gf)
    foutput_tensors = executer.execute([input_tensor])
    foutput_tensor = np.load(
        'tests/mobv1_valid/output_COCO_val2014_000000362331_0_float.npy')
    assert np.max(np.abs(foutput_tensors[-1][0] - foutput_tensor[0])) < 0.0001

    executer = GraphExecuter(G, qrecs=G.quantization)
    qfroutput_tensors = executer.execute([input_tensor],
                                         qmode=QuantizationMode.none())
    assert np.max(np.abs(qfroutput_tensors[-1][0] - foutput_tensor[0])) < 0.2

    executer = GraphExecuter(G, qrecs=G.quantization)
    qroutput_tensors = executer.execute(
        [input_tensor], qmode=QuantizationMode.all_dequantize())

    output_tensor = np.load(
        'tests/mobv1_valid/output_COCO_val2014_000000362331_0_quant.npy')
    # assert np.max(np.abs(qroutput_tensors[-1][0] - output_tensor[0])) < 0.16
    assert np.max(np.abs(qroutput_tensors[-1][0] - output_tensor[0])) < 0.28
Ejemplo n.º 3
0
def test_external_biases_sq8(qvww_graph):
    # this model has at the end an external biases layer as constant add
    tfi = TfliteImporter()
    G = tfi.create_graph(qvww_graph, {"load_quantization": True, "load_tensors": True})
    G.add_dimensions()
    matcher = get_scale8_match_group()
    matcher.match(G)
    G.add_dimensions()
    image = 'tests/vwwimages/COCO_val2014_000000174838_1.png'
    img_in = Image.open(image)
    img_in = img_in.resize((238, 208))
    input_tensor = np.array(img_in, dtype=np.uint8)
    input_tensor = (input_tensor.astype(np.float32) - 128) / 128
    executer = GraphExecuter(G, qrecs=G.quantization)
    # check if nntool can execute
    qoutput_tensors = executer.execute([input_tensor], qmode=QuantizationMode.all_dequantize())
    foutput_tensors = executer.execute([input_tensor], qmode=None)
    diff = [q[0]-f[0] for q,f in zip(qoutput_tensors, foutput_tensors)]
    assert max([np.max(d) for d in diff]) < 2.2
Ejemplo n.º 4
0
 def get_base_inputs(self, nodes, progress, quantize):
     if self._base_inputs is None:
         base_inputs = self._input_files
         for node in nodes:
             node.use_compressed = False
         progress(
             f"validation without compression {'quantized: ' if quantize else ': '}",
             False)
         base_inputs, good_margin, bad_inputs, bad_margin = self.validate(
             QuantizationMode.all_dequantize()
             if quantize else QuantizationMode.none(),
             inputs=self._input_files,
             progress=lambda pred: progress('+' if pred else '-', False))
         progress('', True)
         progress(
             f'good {len(base_inputs)} ({good_margin:.2f}) bad {len(bad_inputs)} ({bad_margin:.2f})',
             True)
         self._base_inputs = base_inputs
     else:
         base_inputs = self._base_inputs
     return base_inputs
Ejemplo n.º 5
0
def test_graph_calc_quantized8(mnist_unfused_8bit_state, mnist_images):
    G = load_state(mnist_unfused_8bit_state)
    input_tensor = import_data(mnist_images[0],
                               height=28,
                               width=28,
                               offset=0,
                               divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    executer = GraphExecuter(G, qrecs=G.quantization)
    output1 = executer.execute([input_tensor], step_idx_limit=7)
    input_tensor = import_data(mnist_images[0],
                               height=28,
                               width=28,
                               offset=0,
                               divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    output2 = executer.execute([input_tensor],
                               qmode=QuantizationMode.all_dequantize(),
                               step_idx_limit=7)
    diffs = []
    for i in range(8):
        diffs.append(output1[i][0] - output2[i][0])
    assert np.max(np.abs(diffs[7])) < 9
Ejemplo n.º 6
0
    def do_validate(self, args: argparse.Namespace):
        """
Validate the model (quantized [-q] or not) in terms of prediction accuracy rate on a given dataset (images
folder). Ground truth labels can be embedded in files names ("filename_03.[png, ppm, pgm]", the number of
digits must be coherent with the number of networks outputs: e.g. in a 1000 classes problem the last digits
must be 3, "file_45.png" will raise an error) or can be written in a .json object (example: {'file0':label0,
'file1':label1, ...}) and given to the function with --label_json
"""
        self._check_graph()
        if args.quantize:
            self._check_quantized()
            qmode = QuantizationMode.all_dequantize()
        else:
            qmode = QuantizationMode.none()

        LOG.info("quantization mode - %s", qmode)
        input_args = self._get_input_args(args)

        good_predictions = []
        good_margin = 0
        bad_margin = 0

        number_samples = sum(1 for _ in glob_input_files(args.input_files))

        if args.vww_instances_file:
            validation = ValidateFromVWWInstances(
                args.vww_instances_file,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        elif args.label_json:
            validation = ValidateFromJSON(
                args.label_json,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        elif args.class_number is not None:
            validation = ValidateFromClass(
                args.class_number,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        else:
            validation = ValidateFromName(
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)

        try:
            ExecutionProgress.start()
            for i, file_per_input in enumerate(
                    glob_input_files(args.input_files, self.G.num_inputs)):
                if not args.silent:
                    LOG.info("input file %s", file_per_input)
                data = [
                    import_data(input_file, **input_args)
                    for input_file in file_per_input
                ]

                executer = GraphExecuter(self.G, qrecs=self.G.quantization)
                outputs = executer.execute(data,
                                           qmode=qmode,
                                           silent=args.silent)

                predicted_values = np.asarray(
                    outputs[args.prediction_step_idx])
                good_prediction, class_predicted, real_class, margin = validation.validate(
                    file_per_input[0], predicted_values)
                good_predictions.append(good_prediction)
                if good_prediction:
                    good_margin += margin
                else:
                    bad_margin += margin

                if not args.silent:
                    LOG.info(
                        'Prediction is %s predicted %s correct %s margin %s',
                        good_prediction, class_predicted, real_class, margin)
                if not i % args.progress_every and i > 0:
                    LOG.info(
                        'ACCURACY: %.3f %%',
                        100 * sum(good_predictions) / len(good_predictions))

                ExecutionProgress.progress(i, number_samples)
            ExecutionProgress.end()

        except (KeyboardInterrupt, SystemExit):
            pass

        self.py_locals['labels'] = validation.labels
        self.py_locals['predictions'] = validation.predictions
        cnt = len(good_predictions)
        if cnt:
            ngood = sum(good_predictions)
            nbad = cnt - ngood
            if nbad:
                LOG.info(
                    "%s out of %s predicted falsly with %s average margin",
                    nbad, cnt, bad_margin / nbad)
            if ngood:
                LOG.info(
                    "%s out of %s predicted correctly with %s average margin",
                    ngood, cnt, good_margin / ngood)
            accuracy_rate = 100 * sum(good_predictions) / len(good_predictions)
            LOG.info('Total accuracy: %.3f %%', accuracy_rate)
Ejemplo n.º 7
0
    def do_dump(self, args: argparse.Namespace):
        """
Dump the activations resulting from running an input file through the graph.
You can use the current quantization settings and can also just quantify one
specific step of the graph."""
        self._check_graph()
        dequantize = args.dequantize if args.dequantize is not None\
            else not (args.pickle or args.save)
        if args.quantize or args.quantize_step or args.quantize_all_steps:
            self._check_quantized()
            if args.quantize:
                if dequantize:
                    qmode = QuantizationMode.all_dequantize()
                else:
                    qmode = QuantizationMode.all()
            elif args.quantize_all_steps:
                qmode = QuantizationMode.step_all()
                dequantize = True
            else:
                qmode = QuantizationMode.step(args.quantize_step)
        elif args.quantize_and_dequantize:
            qmode = QuantizationMode.all_float_quantize_dequantize()
        else:
            qmode = QuantizationMode.none()
        if args.step is not None:
            step = args.step
            num_steps = len(self.G.graph_state.steps)
            if step < 0:
                step = num_steps + step
            if step < 0 or step > num_steps:
                self.perror("step must be from {} to {}".format(
                    -num_steps, num_steps))
                return
        else:
            step = None

        input_args = self._get_input_args(args)

        pickles = []

        for file_per_input in glob_input_files(args.input_files,
                                               self.G.num_inputs):
            LOG.info("input file %s", file_per_input)
            data = [
                import_data(input_file, **input_args)
                for input_file in file_per_input
            ]
            executer = GraphExecuter(self.G, qrecs=self.G.quantization)
            outputs = executer.execute(data, step_idx_limit=step, qmode=qmode)

            if args.pickle or self._in_py or args.save:
                pickles.append(outputs)
            else:
                self.G.print_intermediates(outputs,
                                           limit=step,
                                           width=args.number_width,
                                           precision=args.precision,
                                           channel=args.channel,
                                           order=['c', 'h', 'w'],
                                           checksum=args.checksum)

            if args.visualize_detection:
                img_in = Image.open(file_per_input[0]).convert('RGBA')

                height = img_in.size[1] if input_args[
                    'height'] == -1 else input_args['height']
                width = img_in.size[0] if input_args[
                    'width'] == -1 else input_args['width']
                img_in = img_in.resize((width, height))

                if self.G.has_ssd_postprocess:
                    bboxes, classes, scores, _ = [
                        outputs[graph_out.step_idx][0]
                        for graph_out in self.G.outputs()
                    ]
                    draw = ImageDraw.Draw(img_in, 'RGBA')

                    for box, score, class_id in zip(bboxes, scores, classes):
                        if args.quantize and not args.dequantize:
                            ssd_node = [
                                node for node in self.G.nodes()
                                if isinstance(node, SSDDetectorParameters)
                            ][0]
                            ssd_qrec = self.G.quantization[NodeId(ssd_node)]
                            x0, x1 = int(box[1] * width *
                                         ssd_qrec.out_qs[0].scale), int(
                                             box[3] * width *
                                             ssd_qrec.out_qs[0].scale)
                            y0, y1 = int(box[0] * height *
                                         ssd_qrec.out_qs[0].scale), int(
                                             box[2] * height *
                                             ssd_qrec.out_qs[0].scale)
                            score = score * ssd_qrec.out_qs[2].scale
                        else:
                            x0, x1 = int(box[1] * width), int(box[3] * width)
                            y0, y1 = int(box[0] * height), int(box[2] * height)
                        rect_points = (x0, y0), (x1, y0), (x1, y1), (x0,
                                                                     y1), (x0,
                                                                           y0)
                        draw.line(rect_points, fill='red', width=2)
                        txt = '{}@{}%'.format(class_id, int(score * 100))
                        draw.text([x0, y0 - 10], txt, fill=(0, 255, 0))
                img_in.show()

        if args.pickle or args.save or self._in_py:
            if not pickles:
                self.perror("no input files found")
                return
            if len(args.input_files) == self.G.num_inputs:
                pickles = pickles[0]
            if args.pickle:
                with open(args.pickle, 'wb') as pickle_fp:
                    pickle.dump(pickles, pickle_fp)
            if args.save:
                if len(args.input_files) != self.G.num_inputs:
                    self.perror(
                        "can only save dumps on one input to tensor store")
                    return
                self.tensor_store[args.save] = pickles

        if self._in_py:
            self.last_result = pickles
Ejemplo n.º 8
0
    def tune_all(self, nodes, progress, quantize=False):
        base_inputs = self.get_base_inputs(nodes, progress, quantize)

        def opt_func(qsnr, state):
            progress('compressing: ', False)
            compression = self.tune_qsnr(
                nodes,
                qsnr,
                progress=lambda _, comp: progress('+' if comp else '-', False))
            if not compression or ('best_compression' in state and
                                   state['best_compression'] > compression):
                if qsnr == 0:
                    raise CompressionError("could not compress graph")
                return None
            state['best_compression'] = compression
            progress('', True)
            progress('validating: ', False)
            good_inputs, good_margin, bad_inputs, bad_margin = self.validate(
                state['qmode'],
                inputs=state['cur_inputs'],
                progress=lambda pred: progress('+' if pred else '-', False))
            progress('', True)
            progress(
                f'good {len(good_inputs)} ({good_margin:.2f}) bad {len(bad_inputs)} ({bad_margin:.2f})',
                True)
            if bad_inputs:
                if not state['final']:
                    state['cur_inputs'] = bad_inputs
                del state['best_compression']
                return None
            return compression

        qmode = QuantizationMode.none()
        dir_start = 'down'

        opt_state = {
            'cur_inputs': base_inputs.copy(),
            'final': False,
            'qmode': qmode
        }
        start_qsnr = 30
        start_step = 15
        maximizer = Maximizer(opt_func, 0, 120)
        while True:
            res = maximizer.run(
                start_qsnr,
                opt_state,
                progress=lambda cur, step, direct: progress(
                    f'QSNR {cur} step {step} direction {direct}', True),
                start_step=start_step,
                dir_start=dir_start)
            if quantize and opt_state['qmode'] == QuantizationMode.none():
                progress('analysing quantized', True)
                opt_state['qmode'] = QuantizationMode.all_dequantize()
            elif opt_state['cur_inputs'] != base_inputs:
                progress('check with all inputs', True)
                opt_state['final'] = True
            else:
                break
            opt_state['cur_inputs'] = base_inputs.copy()
            start_qsnr = res[1]
            start_step = 0.5
            dir_start = 'up'

        progress(f'tune QSNR to best {res[1]} compressed by {res[0]} bytes',
                 True)
        self.tune_qsnr(
            nodes,
            res[1],
            progress=lambda _, comp: progress('+' if comp else '-', False))
        progress('', True)

        return res[1]
Ejemplo n.º 9
0
    def finetune(self, nodes, progress, quantize=False):
        sizes = [(node, node.compressed_value) for node in nodes
                 if node.compressed_value and node.use_compressed]
        nodes = [size[0] for size in sizes]
        base_inputs = self.get_base_inputs(nodes, progress, quantize)
        for node in nodes:
            if node.compressed_value:
                node.use_compressed = True

        def opt_func(bits, threshold, sparse, node, state):
            progress('compressing: ', False)
            compression = self.tune_bits(
                [node],
                bits,
                threshold=threshold,
                sparse=sparse,
                progress=lambda _, comp: progress('+' if comp else '-', False))
            if not compression or ('best_compression' in state and
                                   state['best_compression'] > compression):
                if bits == 8 and sparse:
                    raise CompressionError("could not compress graph")
                return None
            state['best_compression'] = compression
            progress('', True)
            progress('validating: ', False)
            good_inputs, good_margin, bad_inputs, bad_margin = self.validate(
                state['qmode'],
                inputs=state['cur_inputs'],
                break_on_error=state['final'],
                progress=lambda pred: progress('+' if pred else '-', False))
            progress('', True)
            progress(
                f'good {len(good_inputs)} ({good_margin:.2f}) bad {len(bad_inputs)} ({bad_margin:.2f})',
                True)
            if bad_inputs:
                state['cur_inputs'] = bad_inputs
                del state['best_compression']
                return None
            return compression

        maximizer = Maximizer(opt_func, 2, 8, int_step=True)
        while sizes:
            sizes.sort(key=lambda x: x[1].size)

            tune_idx = -1
            node = None
            while node is None and abs(tune_idx) <= len(sizes):
                node, comp_val = sizes[tune_idx]
                cur_bits = comp_val.bits
                if cur_bits > 2:
                    cur_step = max(cur_bits // 2, 1)
                    cur_bits = max(cur_bits - cur_step, 2)
                else:
                    tune_idx -= 1
                    node = None

            if node is None:
                break

            progress(f'finetuning {node.name}', True)
            qmode = QuantizationMode.none()
            dir_start = 'down'
            opt_state = {
                'cur_inputs': base_inputs.copy(),
                'final': False,
                'qmode': qmode
            }
            while True:
                res = maximizer.run(
                    cur_bits,
                    None,
                    False,
                    node,
                    opt_state,
                    progress=lambda cur, step, direct: progress(
                        f'bits {cur} step {step} direction {direct}', True),
                    start_step=cur_step,
                    dir_start=dir_start)
                del sizes[tune_idx]
                if res is None:
                    break
                if quantize and opt_state['qmode'] == QuantizationMode.none():
                    progress('analysing quantized', True)
                    opt_state['qmode'] = QuantizationMode.all_dequantize()
                elif opt_state['cur_inputs'] != base_inputs:
                    progress('check with all inputs', True)
                else:
                    break
                opt_state['final'] = True
                opt_state['cur_inputs'] = base_inputs.copy()
                cur_bits = res[1]
                cur_step = 1
                dir_start = 'up'

            if res is None:
                progress(f'{node.name} cannot be further optimised', True)
                self.tune_bits(
                    [node],
                    comp_val.bits,
                    progress=lambda _, comp: progress('+'
                                                      if comp else '-', False))
            else:
                progress(
                    f'{node.name} tune bits to {res[1]} compressed by {res[0]} bytes',
                    True)
                self.tune_bits(
                    [node],
                    res[1],
                    progress=lambda _, comp: progress('+'
                                                      if comp else '-', False))
                progress('', True)
Ejemplo n.º 10
0
    def do_dump(self, args: argparse.Namespace):
        """
Dump the activations resulting from running an input file through the graph.
You can use the current quantization settings and can also just quantify one
specific step of the graph."""
        self._check_graph()
        dequantize = args.dequantize if args.dequantize is not None\
            else not (args.pickle or args.save)
        if args.quantize or args.quantize_step or args.quantize_all_steps:
            self._check_quantized()
            if args.quantize:
                if dequantize:
                    qmode = QuantizationMode.all_dequantize()
                else:
                    qmode = QuantizationMode.all()
            elif args.quantize_all_steps:
                qmode = QuantizationMode.step_all()
                dequantize = True
            else:
                qmode = QuantizationMode.step(args.quantize_step)
        elif args.quantize_and_dequantize:
            qmode = QuantizationMode.all_float_quantize_dequantize()
        else:
            qmode = QuantizationMode.none()
        if args.step is not None:
            step = args.step
            num_steps = len(self.G.graph_state.steps)
            if step < 0:
                step = num_steps + step
            if step < 0 or step > num_steps:
                self.perror("step must be from {} to {}".format(-num_steps, num_steps))
                return
        else:
            step = None

        input_args = self._get_input_args(args)

        pickles = []

        for file_per_input in glob_input_files(args.input_files, self.G.num_inputs):
            LOG.info("input file %s", file_per_input)            
            data = [import_data(input_file, **input_args) for input_file in file_per_input]
            executer = GraphExecuter(self.G, qrecs=self.G.quantization)
            outputs = executer.execute(data, step_idx_limit=step,
                                       qmode=qmode)

            if args.pickle or self._in_py or args.save:
                pickles.append(format_dump_file(self.G, outputs, not qmode.is_none,
                                                args.dequantize, args.quantize_step))
            else:
                self.G.print_intermediates(outputs, limit=step, width=args.number_width,
                                           precision=args.precision, channel=args.channel,
                                           order=['c', 'h', 'w'])

        if args.pickle or args.save or self._in_py:
            if not pickles:
                self.perror("no input files found")
                return
            if len(args.input_files) == 1:
                pickles = pickles[0]
            if args.pickle:
                with open(args.pickle, 'wb') as pickle_fp:
                    pickle.dump(pickles, pickle_fp)
            if args.save:
                self.tensor_store[args.save] = pickles

        if self._in_py:
            self.last_result = pickles