Ejemplo n.º 1
0
def test_validate_mn1_quantized1(mn1q_graph, mn1f_graph):
    tfi = TfliteImporter()
    Gf = tfi.create_graph(mn1f_graph, {'load_tensors': True})
    Gf.add_dimensions()
    Gf.adjust_order()
    matcher = get_pow2_match_group()
    matcher.match(Gf)
    Gf.add_dimensions()

    tfi = TfliteImporter()
    G = tfi.create_graph(mn1q_graph, {
        'load_tensors': True,
        'load_quantization': True
    })
    G.add_dimensions()
    G.adjust_order()
    matcher = get_pow2_match_group()
    matcher.match(G)
    G.add_dimensions()

    fpnode = Gf.graph_state.steps[2]['node']
    fpcnode = fpnode.contained_filters()[0]
    qpnode = G.graph_state.steps[2]['node']
    qpcnode = qpnode.contained_filters()[0]
    nid = NodeId(qpnode, qpcnode)
    qrec = G.quantization[nid]
    dqbiases = qrec.biases_q.get_dequantized(qpcnode.biases)
    assert np.max(np.abs(fpcnode.biases - dqbiases)) < 0.1
    input_tensor = np.load('tests/mobv1_valid/COCO_val2014_000000362331_0.npy')
    input_tensor = input_tensor.reshape((224, 224, 3)).transpose((2, 0, 1))

    executer = GraphExecuter(Gf)
    foutput_tensors = executer.execute([input_tensor])
    foutput_tensor = np.load(
        'tests/mobv1_valid/output_COCO_val2014_000000362331_0_float.npy')
    assert np.max(np.abs(foutput_tensors[-1][0] - foutput_tensor[0])) < 0.0001

    executer = GraphExecuter(G, qrecs=G.quantization)
    qfroutput_tensors = executer.execute([input_tensor],
                                         qmode=QuantizationMode.none())
    assert np.max(np.abs(qfroutput_tensors[-1][0] - foutput_tensor[0])) < 0.2

    executer = GraphExecuter(G, qrecs=G.quantization)
    qroutput_tensors = executer.execute(
        [input_tensor], qmode=QuantizationMode.all_dequantize())

    output_tensor = np.load(
        'tests/mobv1_valid/output_COCO_val2014_000000362331_0_quant.npy')
    # assert np.max(np.abs(qroutput_tensors[-1][0] - output_tensor[0])) < 0.16
    assert np.max(np.abs(qroutput_tensors[-1][0] - output_tensor[0])) < 0.28
Ejemplo n.º 2
0
    def _collect(self, G, input_tensors, step_idx) -> Mapping[NodeId, Mapping]:
        LOG.debug("gather quantization statistics")
        if G.has_quantized_parameters:
            quantization = G.quantization
        else:
            quantization = None
        executer = GraphExecuter(G, qrecs=quantization)
        foutputs = self._collect_execution(executer, input_tensors, quantization)
        executer = GraphExecuter(G, qrecs=G.quantization)
        qoutputs = self._collect_execution(executer,
                                           input_tensors,
                                           G.quantization,
                                           qmode=QuantizationMode.all_dequantize())
        stats = OrderedDict()
        for idx, fstat in enumerate(foutputs):
            qstat = qoutputs[idx]
            if fstat['fusion_outputs']:
                for jdx, ffstat in enumerate(fstat['fusion_outputs']):
                    nid = NodeId(fstat['node'], ffstat['node'])
                    stats[nid] =\
                        self._collect_one(ffstat,
                                          qstat['fusion_outputs'][jdx],
                                          G.quantization[nid],
                                          quant_compare=self._quant_compare)
            nid = NodeId(fstat['node'], None)
            stats[nid] = self._collect_one(fstat,
                                           qstat,
                                           G.quantization[nid],
                                           quant_compare=self._quant_compare)

        return stats
Ejemplo n.º 3
0
    def _collect(self, G, input_tensors) -> Mapping[NodeId, Mapping]:
        LOG.debug("gather quantization statistics")
        output_ = execute(G, input_tensors, limit=self._limit)
        all_details = []
        qoutput_ = execute(G,
                           input_tensors,
                           limit=self._limit,
                           qrecs=G.quantization,
                           qmode=QuantizationMode.all(),
                           all_details=all_details)
        stats = OrderedDict()
        for idx, out in enumerate(output_):
            error_ = np.abs(out[0] - qoutput_[idx][0])
            step = G.graph_state.steps[idx]
            node = step['node']
            details = all_details[idx]
            if details:
                overflow_dot = details['overflow_dot']
                overflow_acc = details['overflow_acc']
            else:
                overflow_dot = overflow_acc = ""

            stats[NodeId(node, None)] = {
                'name': node.name,
                'op_name': node.op_name,
                'step': idx,
                'av_err': np.mean(error_),
                'max_err': np.max(error_),
                'min_err': np.min(error_),
                'qsnr': qsnr(out[0], qoutput_[idx][0]),
                'overflow_dot': overflow_dot,
                'overflow_acc': overflow_acc,
            }

        return stats
Ejemplo n.º 4
0
def test_graph_imu_auto_quant_and_execute_quant():
    G = create_graph("tests/graph/imu.tflite", opts={"load_tensors": True})
    G.add_dimensions()
    G.adjust_order()
    get_pow2_match_group().match(G)
    G.add_dimensions()
    stats_collector = ActivationStatsCollector()
    for input_file in ['tests/images/imu0.pgm']:
        input_tensor = import_data(input_file,
                                   offset=0,
                                   divisor=256,
                                   nptype='int16')
        stats_collector.collect_stats(G, [input_tensor])
    astats = stats_collector.reduce_stats()
    stats_collector = FilterStatsCollector()
    fstats = stats_collector.collect_stats(G)
    quantizer = SymmetricQuantizer(astats, fstats, force_width=16)
    qrecs = quantizer.quantize(G)
    G.quantization = qrecs
    executer = GraphExecuter(G, qrecs=qrecs)
    for input_file in ['tests/images/imu0.pgm']:
        input_tensor = import_data(input_file,
                                   offset=0,
                                   divisor=256,
                                   nptype='int16')
        output_ = executer.execute([input_tensor],
                                   qmode=QuantizationMode.all())
Ejemplo n.º 5
0
    def execute(self,
                in_tensors: Sequence[np.ndarray],
                step_idx_limit=None,
                only_yield_step=False,
                qmode: QuantizationMode = None,
                all_details=None,
                yield_fusions=False,
                silent=False):

        if qmode is None:
            qmode = QuantizationMode.none()

        if qmode.is_step_all:
            iterator = [(qoutput, qdetails, fnode)
                        for _, _, _, _, qoutput, qdetails, fnode
                        in self.execute_qnoq_iterator(in_tensors,
                                                      yield_fusions=yield_fusions,
                                                      step_idx_limit=step_idx_limit,
                                                      silent=silent)]
        else:
            iterator = [(output_tensors, details, fnode)
                        for _, _, fnode, output_tensors, details
                        in self.execute_iterator(in_tensors, step_idx_limit=step_idx_limit,
                                                 qmode=qmode,
                                                 yield_fusions=yield_fusions,
                                                 only_yield_step=only_yield_step,
                                                 yield_details=all_details is not None,
                                                 silent=silent)]

        outputs = []
        if yield_fusions:
            fusion_outputs = []
            if all_details is not None:
                fusion_details = []

        for output_tensors, details, fnode in iterator:
            if yield_fusions:
                if fnode:
                    fusion_outputs.append([output_tensor.copy()
                                           for output_tensor in output_tensors])
                    if all_details is not None:
                        fusion_details.append(details)
                else:
                    outputs.append({
                        'outputs': outputs.append([output_tensor.copy() for output_tensor in output_tensors]),
                        'fusion_outputs': fusion_outputs.copy(),
                    })
                    fusion_outputs.clear()
                    if all_details is not None:
                        all_details.append({
                            'details': details,
                            'fusion_details': fusion_details.copy()
                        })
                        fusion_details.clear()
            else:
                outputs.append([output_tensor.copy() for output_tensor in output_tensors])
                if all_details is not None:
                    all_details.append(details)
        return outputs
Ejemplo n.º 6
0
def test_graph_calc_quantize_one_2(value_cache, mnist_unfused_16bit_state, mnist_images):
    G = load_state(mnist_unfused_16bit_state, value_cache=value_cache)
    input_tensor = import_data(mnist_images[0], height=28, width=28, offset=0, divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    output1 = execute(G, [input_tensor])
    input_tensor = import_data(mnist_images[0], height=28, width=28, offset=0, divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    output2 = execute(G, [input_tensor], qmode=QuantizationMode.step(4), qrecs=G.quantization)
    diffs = []
    for i, out1 in enumerate(output1):
        diffs.append(out1[0] - output2[i][0])
    assert np.min(diffs[7]) > -2 and np.max(diffs[7]) < 2
Ejemplo n.º 7
0
 def get_base_inputs(self, nodes, progress, quantize):
     if self._base_inputs is None:
         base_inputs = self._input_files
         for node in nodes:
             node.use_compressed = False
         progress(
             f"validation without compression {'quantized: ' if quantize else ': '}",
             False)
         base_inputs, good_margin, bad_inputs, bad_margin = self.validate(
             QuantizationMode.all_dequantize()
             if quantize else QuantizationMode.none(),
             inputs=self._input_files,
             progress=lambda pred: progress('+' if pred else '-', False))
         progress('', True)
         progress(
             f'good {len(base_inputs)} ({good_margin:.2f}) bad {len(bad_inputs)} ({bad_margin:.2f})',
             True)
         self._base_inputs = base_inputs
     else:
         base_inputs = self._base_inputs
     return base_inputs
Ejemplo n.º 8
0
    def do_gen(self, args):
        """
Generate AutoTiler model C code and optionally dump tensors. If no destination file is
given the generated code will be outputed to the screen. Check the 'set' command for
settings related to code generation."""
        self._check_graph()
        self._check_quantized()
        self._check_adjusted()
        if args.checksums:
            input_args = self._get_input_args(None)
            LOG.info("input file %s", args.checksums)
            data = import_data(args.checksums, **input_args)
            executer = GraphExecuter(self.G, qrecs=self.G.quantization)
            executer.execute([data], qmode=QuantizationMode.all())
            self.settings['checksum_file'] = args.checksums
            self.settings['generate_checksums'] = True

        if args.tensor_directory:
            self.settings['tensor_directory'] = args.tensor_directory
        if args.model_directory:
            self.settings['model_directory'] = args.model_directory
        self.settings['basic_kernel_source_file'] = args.basic_kernel_source_file
        self.settings['basic_kernel_header_file'] = args.basic_kernel_header_file
        code_gen = CodeGenerator(self.G, DefaultNamingConvension(self.G), self.settings)

        if self.settings['template_file']:
            code_template = dynamic_template(self.settings['template_file'])
        else:
            code_template = default_template

        if args.model_file:
            with open(os.path.join(self.settings['model_directory'],
                                   args.model_file), "w") as output_fp:
                output_fp.write(code_template(self.G, code_generator=code_gen))
            if self.G.has_expressions:
                with open(os.path.join(self.settings['model_directory'],
                                       args.basic_kernel_source_file), "w") as output_fp:
                    output_fp.write(basic_kernel_source_template(self.G, code_generator=code_gen))
                with open(os.path.join(self.settings['model_directory'],
                                       args.basic_kernel_header_file), "w") as output_fp:
                    output_fp.write(basic_kernel_header_template(self.G, code_generator=code_gen))
        else:
            self.ppaged(code_template(self.G, code_generator=code_gen))
            if self.G.has_expressions:
                self.ppaged(basic_kernel_source_template(self.G, code_generator=code_gen))
                self.ppaged(basic_kernel_header_template(self.G, code_generator=code_gen))
        if args.output_tensors:
            code_gen.write_constants()

        if args.header_file:
            with open(os.path.join(self.settings['model_directory'], args.header_file), "w") as output_fp:
                output_fp.write(header_template(self.G, code_generator=code_gen))
Ejemplo n.º 9
0
    def _collect(self, G, input_tensors) -> Mapping[NodeId, Mapping]:
        LOG.debug("gather quantization statistics")
        foutputs = self._collect_execution(G, input_tensors)
        qoutputs = self._collect_execution(G,
                                           input_tensors,
                                           qrecs=G.quantization,
                                           qmode=QuantizationMode.all())
        stats = OrderedDict()
        for idx, fstat in enumerate(foutputs):
            qstat = qoutputs[idx]
            if fstat['fusion_outputs']:
                for jdx, ffstat in enumerate(fstat['fusion_outputs']):
                    stats[NodeId(fstat['node'], ffstat['node'])] =\
                        self._collect_one(ffstat, qstat['fusion_outputs'][jdx])
            stats[NodeId(fstat['node'],
                         None)] = self._collect_one(fstat, qstat)

        return stats
Ejemplo n.º 10
0
def test_external_biases_sq8(qvww_graph):
    # this model has at the end an external biases layer as constant add
    tfi = TfliteImporter()
    G = tfi.create_graph(qvww_graph, {"load_quantization": True, "load_tensors": True})
    G.add_dimensions()
    matcher = get_scale8_match_group()
    matcher.match(G)
    G.add_dimensions()
    image = 'tests/vwwimages/COCO_val2014_000000174838_1.png'
    img_in = Image.open(image)
    img_in = img_in.resize((238, 208))
    input_tensor = np.array(img_in, dtype=np.uint8)
    input_tensor = (input_tensor.astype(np.float32) - 128) / 128
    executer = GraphExecuter(G, qrecs=G.quantization)
    # check if nntool can execute
    qoutput_tensors = executer.execute([input_tensor], qmode=QuantizationMode.all_dequantize())
    foutput_tensors = executer.execute([input_tensor], qmode=None)
    diff = [q[0]-f[0] for q,f in zip(qoutput_tensors, foutput_tensors)]
    assert max([np.max(d) for d in diff]) < 2.2
Ejemplo n.º 11
0
def test_validate_mn1_dequant_quantfloat(mn1q_graph):
    # load dequantized graph same results as quant graph and float execution
    tfi = TfliteImporter()
    G = tfi.create_graph(mn1q_graph, {
        'load_tensors': True,
        'load_quantization': True
    })
    G.add_dimensions()
    G.adjust_order()
    matcher = get_pow2_match_group()
    matcher.match(G)
    G.add_dimensions()

    Gdq = tfi.create_graph(mn1q_graph, {
        'load_tensors': True,
        'load_dequantized': True
    })
    Gdq.add_dimensions()
    Gdq.adjust_order()
    matcher = get_pow2_match_group()
    matcher.match(Gdq)
    Gdq.add_dimensions()

    input_tensor = np.load('tests/mobv1_valid/COCO_val2014_000000362331_0.npy')
    input_tensor = input_tensor.reshape((224, 224, 3)).transpose((2, 0, 1))

    executer = GraphExecuter(G, qrecs=G.quantization)
    qfoutput_tensors = executer.execute([input_tensor],
                                        qmode=QuantizationMode.none())

    executer = GraphExecuter(Gdq)
    dfoutput_tensors = executer.execute([input_tensor])

    diff_list = [
        np.abs(df[0] - qf[0])
        for df, qf in zip(dfoutput_tensors, qfoutput_tensors)
    ]
    max_diff = [np.max(elem) for elem in diff_list]
    assert max(max_diff) < 0.003
Ejemplo n.º 12
0
def test_graph_calc_quantized8(mnist_unfused_8bit_state, mnist_images):
    G = load_state(mnist_unfused_8bit_state)
    input_tensor = import_data(mnist_images[0],
                               height=28,
                               width=28,
                               offset=0,
                               divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    executer = GraphExecuter(G, qrecs=G.quantization)
    output1 = executer.execute([input_tensor], step_idx_limit=7)
    input_tensor = import_data(mnist_images[0],
                               height=28,
                               width=28,
                               offset=0,
                               divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    output2 = executer.execute([input_tensor],
                               qmode=QuantizationMode.all_dequantize(),
                               step_idx_limit=7)
    diffs = []
    for i in range(8):
        diffs.append(output1[i][0] - output2[i][0])
    assert np.max(np.abs(diffs[7])) < 9
Ejemplo n.º 13
0
    def do_dump(self, args: argparse.Namespace):
        """
Dump the activations resulting from running an input file through the graph.
You can use the current quantization settings and can also just quantify one
specific step of the graph."""
        self._check_graph()
        dequantize = args.dequantize if args.dequantize is not None\
            else not (args.pickle or args.save)
        if args.quantize or args.quantize_step or args.quantize_all_steps:
            self._check_quantized()
            if args.quantize:
                if dequantize:
                    qmode = QuantizationMode.all_dequantize()
                else:
                    qmode = QuantizationMode.all()
            elif args.quantize_all_steps:
                qmode = QuantizationMode.step_all()
                dequantize = True
            else:
                qmode = QuantizationMode.step(args.quantize_step)
        elif args.quantize_and_dequantize:
            qmode = QuantizationMode.all_float_quantize_dequantize()
        else:
            qmode = QuantizationMode.none()
        if args.step is not None:
            step = args.step
            num_steps = len(self.G.graph_state.steps)
            if step < 0:
                step = num_steps + step
            if step < 0 or step > num_steps:
                self.perror("step must be from {} to {}".format(
                    -num_steps, num_steps))
                return
        else:
            step = None

        input_args = self._get_input_args(args)

        pickles = []

        for file_per_input in glob_input_files(args.input_files,
                                               self.G.num_inputs):
            LOG.info("input file %s", file_per_input)
            data = [
                import_data(input_file, **input_args)
                for input_file in file_per_input
            ]
            executer = GraphExecuter(self.G, qrecs=self.G.quantization)
            outputs = executer.execute(data, step_idx_limit=step, qmode=qmode)

            if args.pickle or self._in_py or args.save:
                pickles.append(outputs)
            else:
                self.G.print_intermediates(outputs,
                                           limit=step,
                                           width=args.number_width,
                                           precision=args.precision,
                                           channel=args.channel,
                                           order=['c', 'h', 'w'],
                                           checksum=args.checksum)

            if args.visualize_detection:
                img_in = Image.open(file_per_input[0]).convert('RGBA')

                height = img_in.size[1] if input_args[
                    'height'] == -1 else input_args['height']
                width = img_in.size[0] if input_args[
                    'width'] == -1 else input_args['width']
                img_in = img_in.resize((width, height))

                if self.G.has_ssd_postprocess:
                    bboxes, classes, scores, _ = [
                        outputs[graph_out.step_idx][0]
                        for graph_out in self.G.outputs()
                    ]
                    draw = ImageDraw.Draw(img_in, 'RGBA')

                    for box, score, class_id in zip(bboxes, scores, classes):
                        if args.quantize and not args.dequantize:
                            ssd_node = [
                                node for node in self.G.nodes()
                                if isinstance(node, SSDDetectorParameters)
                            ][0]
                            ssd_qrec = self.G.quantization[NodeId(ssd_node)]
                            x0, x1 = int(box[1] * width *
                                         ssd_qrec.out_qs[0].scale), int(
                                             box[3] * width *
                                             ssd_qrec.out_qs[0].scale)
                            y0, y1 = int(box[0] * height *
                                         ssd_qrec.out_qs[0].scale), int(
                                             box[2] * height *
                                             ssd_qrec.out_qs[0].scale)
                            score = score * ssd_qrec.out_qs[2].scale
                        else:
                            x0, x1 = int(box[1] * width), int(box[3] * width)
                            y0, y1 = int(box[0] * height), int(box[2] * height)
                        rect_points = (x0, y0), (x1, y0), (x1, y1), (x0,
                                                                     y1), (x0,
                                                                           y0)
                        draw.line(rect_points, fill='red', width=2)
                        txt = '{}@{}%'.format(class_id, int(score * 100))
                        draw.text([x0, y0 - 10], txt, fill=(0, 255, 0))
                img_in.show()

        if args.pickle or args.save or self._in_py:
            if not pickles:
                self.perror("no input files found")
                return
            if len(args.input_files) == self.G.num_inputs:
                pickles = pickles[0]
            if args.pickle:
                with open(args.pickle, 'wb') as pickle_fp:
                    pickle.dump(pickles, pickle_fp)
            if args.save:
                if len(args.input_files) != self.G.num_inputs:
                    self.perror(
                        "can only save dumps on one input to tensor store")
                    return
                self.tensor_store[args.save] = pickles

        if self._in_py:
            self.last_result = pickles
Ejemplo n.º 14
0
    def execute_iterator(self,
                         in_tensors: Sequence[np.ndarray],
                         step_idx_limit: Optional[int] = None,
                         start_node: Optional[Parameters] = None,
                         qmode: Optional[QuantizationMode] = None,
                         yield_fusions=True,
                         yield_details=True,
                         only_yield_step=False,
                         record_inputs: Optional[Mapping] = None,
                         silent=False):
        if qmode is None:
            qmode = QuantizationMode.none()

        saved_outputs = {}

        if not silent:
            LOG.info("execute uncached: quantization mode %s", qmode)
            ExecutionProgress.start()
        for step_idx, step in enumerate(self._G.graph_state.steps):

            if step_idx_limit is not None and step_idx > step_idx_limit:
                break

            node = step['node']

            if start_node and start_node != node:
                continue

            # collect outputs from previous nodes
            # InputNode is already set above
            output_tensors = self.collect_outputs(saved_outputs, node)

            if not silent:
                ExecutionProgress.progress(step_idx, node.name)
            nid = NodeId(node, None)
            if record_inputs is not None:
                if output_tensors is None:
                    record_inputs[nid] = output_tensors
                else:
                    record_inputs[nid] = [
                        np.copy(output_tensor)
                        for output_tensor in output_tensors
                    ]

            qrec = self._qrecs[nid] if self._qrecs is not None else None
            if qmode.get_quantized(node, step_idx):
                switch = self._quantized_kernel_switch
                if qmode.is_step and output_tensors:
                    output_tensors = [
                        qrec.in_qs[i].quantize(output_tensor)
                        for i, output_tensor in enumerate(output_tensors)
                    ]
            else:
                switch = self._kernel_switch

            details = {} if yield_details and (
                not only_yield_step or step_idx == step_idx_limit) else None
            if isinstance(node, (ConvFusionParameters, ActivationFusion)):
                for fusion_node in node.contained_nodes():
                    fnid = NodeId(node, fusion_node)
                    fqrec = None if not qrec else self._qrecs[fnid]
                    if record_inputs is not None:
                        record_inputs[nid] = [
                            np.copy(output_tensor)
                            for output_tensor in output_tensors
                        ]
                    details = {} if yield_fusions and yield_details else None
                    output_tensors = switch.execute(fusion_node,
                                                    output_tensors, fqrec,
                                                    details)
                    if yield_fusions:
                        if qmode.dequantize:
                            qoutput_tensors = [
                                fqrec.out_qs[i].dequantize(output_tensor) for
                                i, output_tensor in enumerate(output_tensors)
                            ]
                            yield step_idx, node, fusion_node, qoutput_tensors, details
                        elif qmode.is_float_q_deq:
                            qoutput_tensors = [
                                fqrec.out_qs[i].dequantize(
                                    fqrec.out_qs[i].quantize(output_tensor))
                                for i, output_tensor in enumerate(
                                    output_tensors)
                            ]
                            yield step_idx, node, fusion_node, qoutput_tensors, details
                        else:
                            yield step_idx, node, fusion_node, output_tensors, details
            elif isinstance(node, InputParameters):
                output_tensors = switch.execute(node, in_tensors, qrec,
                                                details)
            else:
                output_tensors = switch.execute(node, output_tensors, qrec,
                                                details)

            if qmode.dequantize:
                qoutput_tensors = [
                    qrec.out_qs[i].dequantize(output_tensor)
                    for i, output_tensor in enumerate(output_tensors)
                ]
                if not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, qoutput_tensors, details
                if qmode.is_step and qmode.get_quantized(node, step_idx):
                    output_tensors = qoutput_tensors
            elif qmode.is_float_q_deq:
                if qmode.is_step and qmode.get_quantized(node, step_idx):
                    output_tensors = [
                        qrec.out_qs[i].dequantize(output_tensor)
                        for i, output_tensor in enumerate(output_tensors)
                    ]
                qoutput_tensors = [
                    qrec.out_qs[i].dequantize(
                        qrec.out_qs[i].quantize(output_tensor))
                    for i, output_tensor in enumerate(output_tensors)
                ]
                if not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, qoutput_tensors, details
            else:
                if qmode.is_step and qmode.get_quantized(node, step_idx):
                    output_tensors = [
                        qrec.out_qs[i].dequantize(output_tensor)
                        for i, output_tensor in enumerate(output_tensors)
                    ]
                if not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, output_tensors, details

            self.save_output(saved_outputs, node, output_tensors)

        if not silent:
            ExecutionProgress.end()
    def execute_iterator(self,
                         in_tensors: Sequence[np.ndarray],
                         step_idx_limit: Optional[int] = None,
                         start_node: Optional[Parameters] = None,
                         qmode: Optional[QuantizationMode] = None,
                         yield_fusions=True,
                         yield_details=True,
                         only_yield_step=False,
                         record_inputs: Optional[Mapping] = None,
                         silent=False,
                         parent_node=None,
                         parent_step_idx=None,
                         saved_outputs=None,
                         G=None):
        if qmode is None:
            qmode = QuantizationMode.none()

        if G is None:
            G = self._G
            saved_outputs = {}

        if not silent:
            LOG.info("execute uncached: quantization mode %s", qmode)
            ExecutionProgress.start()
        for node in G.dfs():
            step_idx = node.step_idx
            if step_idx_limit is not None and step_idx > step_idx_limit:
                break

            if start_node and start_node != node:
                continue

            # collect outputs from previous nodes
            # InputNode is already set above
            output_tensors = self.collect_outputs(G, saved_outputs, node)

            if not silent:
                ExecutionProgress.progress(step_idx, node.name)
            if parent_node:
                nid = NodeId(parent_node, node)
            else:
                nid = NodeId(node, None)
            if record_inputs is not None:
                if output_tensors is None:
                    record_inputs[nid] = output_tensors
                else:
                    record_inputs[nid] = [
                        np.copy(output_tensor)
                        for output_tensor in output_tensors
                    ]
            if isinstance(node,
                          (FusionInputParameters, FusionOutputParameters)):
                qrec = None
            else:
                if self._qrecs and qmode.get_quantized(node, step_idx):
                    if nid not in self._qrecs:
                        LOG.warning("no quantization parameters on %s",
                                    node.name)
                        qrec = None
                    else:
                        qrec = self._qrecs[nid]
                    if qmode.is_step and output_tensors:
                        output_tensors = [
                            qrec.in_qs[i].quantize(output_tensor)
                            for i, output_tensor in enumerate(output_tensors)
                        ]
                else:
                    qrec = None

            details = {} if yield_details and (
                not only_yield_step or step_idx == step_idx_limit) else None
            if isinstance(
                    node,
                (FilterFusionBase, ActivationFusionBase,
                 PaddedAddFusionParameters, MatMulOpFusionParameters)):

                for f_step_idx, f_pnode, f_node, f_output_tensors, f_details in self.execute_iterator(
                        output_tensors,
                        qmode=qmode,
                        yield_fusions=yield_fusions,
                        yield_details=yield_details,
                        silent=True,
                        parent_node=node,
                        parent_step_idx=step_idx,
                        saved_outputs=saved_outputs,
                        G=node.subgraph):
                    if yield_fusions and not isinstance(
                            f_node,
                        (FusionInputParameters, FusionOutputParameters)):
                        yield f_step_idx, f_pnode, f_node, f_output_tensors, f_details
                f_outputs = node.subgraph.outputs()
                num_outputs = max(f_output.idx for f_output in f_outputs) + 1
                output_tensors = [None] * num_outputs
                for f_output in f_outputs:
                    output_tensors[f_output.idx] = saved_outputs[f_output][0]

            elif isinstance(node, (InputParameters, FusionInputParameters)):
                output_tensors = KernelExecuter.execute(
                    node, in_tensors, qrec, details)
            else:
                output_tensors = KernelExecuter.execute(
                    node, output_tensors, qrec, details)

            if qmode.dequantize and qrec:
                qoutput_tensors = [
                    qrec.out_qs[i].dequantize(output_tensor)
                    for i, output_tensor in enumerate(output_tensors)
                ]
                if parent_node:
                    yield parent_step_idx, parent_node, node, qoutput_tensors, details
                elif not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, qoutput_tensors, details
                if qmode.is_step and qmode.get_quantized(node, step_idx):
                    output_tensors = qoutput_tensors
            elif qmode.is_float_q_deq and qrec:
                if qmode.is_step and qmode.get_quantized(node, step_idx):
                    output_tensors = [
                        qrec.out_qs[i].dequantize(output_tensor)
                        for i, output_tensor in enumerate(output_tensors)
                    ]
                qoutput_tensors = [
                    qrec.out_qs[i].dequantize(
                        qrec.out_qs[i].quantize(output_tensor))
                    for i, output_tensor in enumerate(output_tensors)
                ]
                if parent_node:
                    yield parent_step_idx, parent_node, node, qoutput_tensors, details
                elif not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, qoutput_tensors, details
            else:
                if qmode.is_step and qmode.get_quantized(node,
                                                         step_idx) and qrec:
                    output_tensors = [
                        qrec.out_qs[i].dequantize(output_tensor)
                        for i, output_tensor in enumerate(output_tensors)
                    ]
                if parent_node:
                    yield parent_step_idx, parent_node, node, output_tensors, details
                elif not only_yield_step or step_idx == step_idx_limit:
                    yield step_idx, node, None, output_tensors, details

            self.save_output(saved_outputs, node, output_tensors)

        if not silent:
            ExecutionProgress.end()
Ejemplo n.º 16
0
    def tune_all(self, nodes, progress, quantize=False):
        base_inputs = self.get_base_inputs(nodes, progress, quantize)

        def opt_func(qsnr, state):
            progress('compressing: ', False)
            compression = self.tune_qsnr(
                nodes,
                qsnr,
                progress=lambda _, comp: progress('+' if comp else '-', False))
            if not compression or ('best_compression' in state and
                                   state['best_compression'] > compression):
                if qsnr == 0:
                    raise CompressionError("could not compress graph")
                return None
            state['best_compression'] = compression
            progress('', True)
            progress('validating: ', False)
            good_inputs, good_margin, bad_inputs, bad_margin = self.validate(
                state['qmode'],
                inputs=state['cur_inputs'],
                progress=lambda pred: progress('+' if pred else '-', False))
            progress('', True)
            progress(
                f'good {len(good_inputs)} ({good_margin:.2f}) bad {len(bad_inputs)} ({bad_margin:.2f})',
                True)
            if bad_inputs:
                if not state['final']:
                    state['cur_inputs'] = bad_inputs
                del state['best_compression']
                return None
            return compression

        qmode = QuantizationMode.none()
        dir_start = 'down'

        opt_state = {
            'cur_inputs': base_inputs.copy(),
            'final': False,
            'qmode': qmode
        }
        start_qsnr = 30
        start_step = 15
        maximizer = Maximizer(opt_func, 0, 120)
        while True:
            res = maximizer.run(
                start_qsnr,
                opt_state,
                progress=lambda cur, step, direct: progress(
                    f'QSNR {cur} step {step} direction {direct}', True),
                start_step=start_step,
                dir_start=dir_start)
            if quantize and opt_state['qmode'] == QuantizationMode.none():
                progress('analysing quantized', True)
                opt_state['qmode'] = QuantizationMode.all_dequantize()
            elif opt_state['cur_inputs'] != base_inputs:
                progress('check with all inputs', True)
                opt_state['final'] = True
            else:
                break
            opt_state['cur_inputs'] = base_inputs.copy()
            start_qsnr = res[1]
            start_step = 0.5
            dir_start = 'up'

        progress(f'tune QSNR to best {res[1]} compressed by {res[0]} bytes',
                 True)
        self.tune_qsnr(
            nodes,
            res[1],
            progress=lambda _, comp: progress('+' if comp else '-', False))
        progress('', True)

        return res[1]
Ejemplo n.º 17
0
    def finetune(self, nodes, progress, quantize=False):
        sizes = [(node, node.compressed_value) for node in nodes
                 if node.compressed_value and node.use_compressed]
        nodes = [size[0] for size in sizes]
        base_inputs = self.get_base_inputs(nodes, progress, quantize)
        for node in nodes:
            if node.compressed_value:
                node.use_compressed = True

        def opt_func(bits, threshold, sparse, node, state):
            progress('compressing: ', False)
            compression = self.tune_bits(
                [node],
                bits,
                threshold=threshold,
                sparse=sparse,
                progress=lambda _, comp: progress('+' if comp else '-', False))
            if not compression or ('best_compression' in state and
                                   state['best_compression'] > compression):
                if bits == 8 and sparse:
                    raise CompressionError("could not compress graph")
                return None
            state['best_compression'] = compression
            progress('', True)
            progress('validating: ', False)
            good_inputs, good_margin, bad_inputs, bad_margin = self.validate(
                state['qmode'],
                inputs=state['cur_inputs'],
                break_on_error=state['final'],
                progress=lambda pred: progress('+' if pred else '-', False))
            progress('', True)
            progress(
                f'good {len(good_inputs)} ({good_margin:.2f}) bad {len(bad_inputs)} ({bad_margin:.2f})',
                True)
            if bad_inputs:
                state['cur_inputs'] = bad_inputs
                del state['best_compression']
                return None
            return compression

        maximizer = Maximizer(opt_func, 2, 8, int_step=True)
        while sizes:
            sizes.sort(key=lambda x: x[1].size)

            tune_idx = -1
            node = None
            while node is None and abs(tune_idx) <= len(sizes):
                node, comp_val = sizes[tune_idx]
                cur_bits = comp_val.bits
                if cur_bits > 2:
                    cur_step = max(cur_bits // 2, 1)
                    cur_bits = max(cur_bits - cur_step, 2)
                else:
                    tune_idx -= 1
                    node = None

            if node is None:
                break

            progress(f'finetuning {node.name}', True)
            qmode = QuantizationMode.none()
            dir_start = 'down'
            opt_state = {
                'cur_inputs': base_inputs.copy(),
                'final': False,
                'qmode': qmode
            }
            while True:
                res = maximizer.run(
                    cur_bits,
                    None,
                    False,
                    node,
                    opt_state,
                    progress=lambda cur, step, direct: progress(
                        f'bits {cur} step {step} direction {direct}', True),
                    start_step=cur_step,
                    dir_start=dir_start)
                del sizes[tune_idx]
                if res is None:
                    break
                if quantize and opt_state['qmode'] == QuantizationMode.none():
                    progress('analysing quantized', True)
                    opt_state['qmode'] = QuantizationMode.all_dequantize()
                elif opt_state['cur_inputs'] != base_inputs:
                    progress('check with all inputs', True)
                else:
                    break
                opt_state['final'] = True
                opt_state['cur_inputs'] = base_inputs.copy()
                cur_bits = res[1]
                cur_step = 1
                dir_start = 'up'

            if res is None:
                progress(f'{node.name} cannot be further optimised', True)
                self.tune_bits(
                    [node],
                    comp_val.bits,
                    progress=lambda _, comp: progress('+'
                                                      if comp else '-', False))
            else:
                progress(
                    f'{node.name} tune bits to {res[1]} compressed by {res[0]} bytes',
                    True)
                self.tune_bits(
                    [node],
                    res[1],
                    progress=lambda _, comp: progress('+'
                                                      if comp else '-', False))
                progress('', True)
Ejemplo n.º 18
0
def gen_project(G,
                settings,
                project_folder,
                script_commands,
                overwrite=False,
                performance=False,
                quantized=False,
                test_results=False,
                save_inputs=False,
                input_file=None,
                input_args=None,
                gen_atproject=False,
                dump_tensors=False,
                input_tensors=None,
                tolerance=0.0):
    settings = deepcopy(settings)
    settings['graph_monitor_cycles'] = True
    settings['graph_produce_node_names'] = True
    settings['graph_produce_operinfos'] = True

    code_gen = CodeGenerator(G, DefaultNamingConvension(G), settings)

    if not os.path.exists(project_folder):
        os.mkdir(project_folder)

    qoutputs = None
    if test_results:
        np.random.seed(12345)
        finput_tensors = []
        input_tensors = []
        for i, node in enumerate(G.input_nodes()):
            out_q = G.quantization[NodeId(node)].out_qs[0]
            if input_file:
                file_per_input = glob_input_files(input_file, G.num_inputs)[0]
                finput = import_data(file_per_input[i], **input_args)
            else:
                min_val = out_q.min if not out_q.is_floating else -1.0
                max_val = out_q.max if not out_q.is_floating else 1.0
                finput = get_rand(node.out_dims[0].shape,
                                  low_high=(min_val, max_val))
            finput_tensors.append(finput)
        executer = GraphExecuter(G, qrecs=G.quantization)
        qoutput_tensors = executer.execute(finput_tensors.copy(),
                                           qmode=QuantizationMode.all())
        qoutputs = []
        for params in G.outputs():
            outp = qoutput_tensors[params.step_idx][0]
            qoutputs.append(outp)
        for i, params in enumerate(G.input_nodes()):
            inp = qoutput_tensors[params.step_idx][0]
            input_tensors.append(inp)
            if save_inputs:
                nodeq = G.quantization[NodeId(params, None)].out_qs[0]
                np.save(os.path.join(project_folder, f"fake_input_{i}.npy"),
                        nodeq.dequantize(inp))

    main = os.path.join(project_folder, f"{code_gen.project_name}")
    main_c = main + '.c'
    main_h = main + '.h'
    common_mk = os.path.join(project_folder, "common.mk")
    nntool_script = os.path.join(project_folder, "nntool_script")
    if overwrite or not os.path.exists(main_c):
        with open(os.path.join(project_folder, f"{code_gen.project_name}.c"),
                  "w") as output_fp:
            output_fp.write(
                generate_main_appl_template(G, code_gen, input_tensors,
                                            qoutputs, tolerance))
    if overwrite or not os.path.exists(main_h):
        with open(os.path.join(project_folder, f"{code_gen.project_name}.h"),
                  "w") as output_fp:
            output_fp.write(generate_main_appl_header(G, code_gen))
    if overwrite or not os.path.exists(common_mk):
        open_args = parse_last_open(script_commands)
        open_args = build_last_open_args(open_args) if open_args else ""
        with open(os.path.join(project_folder, "common.mk"), "w") as output_fp:
            if gen_atproject:
                output_fp.write(
                    generate_main_appl_make_atproject(G, code_gen, quantized,
                                                      'Model.c'))
            else:
                output_fp.write(
                    generate_main_appl_make(G,
                                            code_gen,
                                            quantized,
                                            open_args=open_args))
    if overwrite or not os.path.exists(nntool_script):
        with open(nntool_script, 'w') as fp:
            # NOTE - gen_template_project is excluded so that tests work. Normally it will not be in the
            # history.
            fp.writelines(process_script(script_commands))
            # always add performance since the main template uses it
            for setting in [
                    'set graph_produce_node_names true',
                    'set graph_produce_operinfos true',
                    'set graph_monitor_cycles true'
            ]:
                fp.write(f'{setting}\n')
            if dump_tensors:
                fp.write('set graph_dump_tensor 7\n')

            if script_commands[-1] != "save_state":
                fp.write('save_state\n')
    if gen_atproject:
        code_gen = CodeGenerator(G, DefaultNamingConvension(G), settings)
        with open(os.path.join(project_folder, 'Model.c'), "w") as output_fp:
            output_fp.write(default_template(G, code_generator=code_gen))
        if G.has_expressions:
            with open(os.path.join(project_folder, "Expression_Kernels.c"),
                      "w") as output_fp:
                output_fp.write(
                    basic_kernel_source_template(G, code_generator=code_gen))
            with open(os.path.join(project_folder, "Expression_Kernels.h"),
                      "w") as output_fp:
                output_fp.write(
                    basic_kernel_header_template(G, code_generator=code_gen))
        code_gen.write_constants(tensor_directory=project_folder)
    ignore_function = None if overwrite else skip_existing_files(
        project_folder)
    shutil.copytree(os.path.join(os.environ.get("NNTOOL_PATH"),
                                 'generation/project_template'),
                    project_folder,
                    dirs_exist_ok=True,
                    ignore=ignore_function)

    if not gen_atproject:
        try:
            shutil.copy(
                G.graph_identity.filename,
                os.path.join(project_folder,
                             os.path.split(G.graph_identity.filename)[1]))
        except shutil.SameFileError:
            pass
Ejemplo n.º 19
0
    def do_validate(self, args: argparse.Namespace):
        """
Validate the model (quantized [-q] or not) in terms of prediction accuracy rate on a given dataset (images
folder). Ground truth labels can be embedded in files names ("filename_03.[png, ppm, pgm]", the number of
digits must be coherent with the number of networks outputs: e.g. in a 1000 classes problem the last digits
must be 3, "file_45.png" will raise an error) or can be written in a .json object (example: {'file0':label0,
'file1':label1, ...}) and given to the function with --label_json
"""
        self._check_graph()
        if args.quantize:
            self._check_quantized()
            qmode = QuantizationMode.all_dequantize()
        else:
            qmode = QuantizationMode.none()

        LOG.info("quantization mode - %s", qmode)
        input_args = self._get_input_args(args)

        good_predictions = []
        good_margin = 0
        bad_margin = 0

        number_samples = sum(1 for _ in glob_input_files(args.input_files))

        if args.vww_instances_file:
            validation = ValidateFromVWWInstances(
                args.vww_instances_file,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        elif args.label_json:
            validation = ValidateFromJSON(
                args.label_json,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        elif args.class_number is not None:
            validation = ValidateFromClass(
                args.class_number,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        else:
            validation = ValidateFromName(
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)

        try:
            ExecutionProgress.start()
            for i, file_per_input in enumerate(
                    glob_input_files(args.input_files, self.G.num_inputs)):
                if not args.silent:
                    LOG.info("input file %s", file_per_input)
                data = [
                    import_data(input_file, **input_args)
                    for input_file in file_per_input
                ]

                executer = GraphExecuter(self.G, qrecs=self.G.quantization)
                outputs = executer.execute(data,
                                           qmode=qmode,
                                           silent=args.silent)

                predicted_values = np.asarray(
                    outputs[args.prediction_step_idx])
                good_prediction, class_predicted, real_class, margin = validation.validate(
                    file_per_input[0], predicted_values)
                good_predictions.append(good_prediction)
                if good_prediction:
                    good_margin += margin
                else:
                    bad_margin += margin

                if not args.silent:
                    LOG.info(
                        'Prediction is %s predicted %s correct %s margin %s',
                        good_prediction, class_predicted, real_class, margin)
                if not i % args.progress_every and i > 0:
                    LOG.info(
                        'ACCURACY: %.3f %%',
                        100 * sum(good_predictions) / len(good_predictions))

                ExecutionProgress.progress(i, number_samples)
            ExecutionProgress.end()

        except (KeyboardInterrupt, SystemExit):
            pass

        self.py_locals['labels'] = validation.labels
        self.py_locals['predictions'] = validation.predictions
        cnt = len(good_predictions)
        if cnt:
            ngood = sum(good_predictions)
            nbad = cnt - ngood
            if nbad:
                LOG.info(
                    "%s out of %s predicted falsly with %s average margin",
                    nbad, cnt, bad_margin / nbad)
            if ngood:
                LOG.info(
                    "%s out of %s predicted correctly with %s average margin",
                    ngood, cnt, good_margin / ngood)
            accuracy_rate = 100 * sum(good_predictions) / len(good_predictions)
            LOG.info('Total accuracy: %.3f %%', accuracy_rate)
Ejemplo n.º 20
0
    def do_dump(self, args: argparse.Namespace):
        """
Dump the activations resulting from running an input file through the graph.
You can use the current quantization settings and can also just quantify one
specific step of the graph."""
        self._check_graph()
        dequantize = args.dequantize if args.dequantize is not None\
            else not (args.pickle or args.save)
        if args.quantize or args.quantize_step or args.quantize_all_steps:
            self._check_quantized()
            if args.quantize:
                if dequantize:
                    qmode = QuantizationMode.all_dequantize()
                else:
                    qmode = QuantizationMode.all()
            elif args.quantize_all_steps:
                qmode = QuantizationMode.step_all()
                dequantize = True
            else:
                qmode = QuantizationMode.step(args.quantize_step)
        elif args.quantize_and_dequantize:
            qmode = QuantizationMode.all_float_quantize_dequantize()
        else:
            qmode = QuantizationMode.none()
        if args.step is not None:
            step = args.step
            num_steps = len(self.G.graph_state.steps)
            if step < 0:
                step = num_steps + step
            if step < 0 or step > num_steps:
                self.perror("step must be from {} to {}".format(-num_steps, num_steps))
                return
        else:
            step = None

        input_args = self._get_input_args(args)

        pickles = []

        for file_per_input in glob_input_files(args.input_files, self.G.num_inputs):
            LOG.info("input file %s", file_per_input)            
            data = [import_data(input_file, **input_args) for input_file in file_per_input]
            executer = GraphExecuter(self.G, qrecs=self.G.quantization)
            outputs = executer.execute(data, step_idx_limit=step,
                                       qmode=qmode)

            if args.pickle or self._in_py or args.save:
                pickles.append(format_dump_file(self.G, outputs, not qmode.is_none,
                                                args.dequantize, args.quantize_step))
            else:
                self.G.print_intermediates(outputs, limit=step, width=args.number_width,
                                           precision=args.precision, channel=args.channel,
                                           order=['c', 'h', 'w'])

        if args.pickle or args.save or self._in_py:
            if not pickles:
                self.perror("no input files found")
                return
            if len(args.input_files) == 1:
                pickles = pickles[0]
            if args.pickle:
                with open(args.pickle, 'wb') as pickle_fp:
                    pickle.dump(pickles, pickle_fp)
            if args.save:
                self.tensor_store[args.save] = pickles

        if self._in_py:
            self.last_result = pickles