예제 #1
0
    def _collect(self, G, input_tensors) -> Mapping[NodeId, Mapping]:
        LOG.debug("gather quantization statistics")
        output_ = execute(G, input_tensors, limit=self._limit)
        all_details = []
        qoutput_ = execute(G,
                           input_tensors,
                           limit=self._limit,
                           qrecs=G.quantization,
                           qmode=QuantizationMode.all(),
                           all_details=all_details)
        stats = OrderedDict()
        for idx, out in enumerate(output_):
            error_ = np.abs(out[0] - qoutput_[idx][0])
            step = G.graph_state.steps[idx]
            node = step['node']
            details = all_details[idx]
            if details:
                overflow_dot = details['overflow_dot']
                overflow_acc = details['overflow_acc']
            else:
                overflow_dot = overflow_acc = ""

            stats[NodeId(node, None)] = {
                'name': node.name,
                'op_name': node.op_name,
                'step': idx,
                'av_err': np.mean(error_),
                'max_err': np.max(error_),
                'min_err': np.min(error_),
                'qsnr': qsnr(out[0], qoutput_[idx][0]),
                'overflow_dot': overflow_dot,
                'overflow_acc': overflow_acc,
            }

        return stats
예제 #2
0
def test_graph_imu_auto_quant_and_execute_quant():
    G = create_graph("tests/graph/imu.tflite", opts={"load_tensors": True})
    G.add_dimensions()
    G.adjust_order()
    get_pow2_match_group().match(G)
    G.add_dimensions()
    stats_collector = ActivationStatsCollector()
    for input_file in ['tests/images/imu0.pgm']:
        input_tensor = import_data(input_file,
                                   offset=0,
                                   divisor=256,
                                   nptype='int16')
        stats_collector.collect_stats(G, [input_tensor])
    astats = stats_collector.reduce_stats()
    stats_collector = FilterStatsCollector()
    fstats = stats_collector.collect_stats(G)
    quantizer = SymmetricQuantizer(astats, fstats, force_width=16)
    qrecs = quantizer.quantize(G)
    G.quantization = qrecs
    executer = GraphExecuter(G, qrecs=qrecs)
    for input_file in ['tests/images/imu0.pgm']:
        input_tensor = import_data(input_file,
                                   offset=0,
                                   divisor=256,
                                   nptype='int16')
        output_ = executer.execute([input_tensor],
                                   qmode=QuantizationMode.all())
예제 #3
0
파일: gen.py 프로젝트: brupa9/gap_sdk
    def do_gen(self, args):
        """
Generate AutoTiler model C code and optionally dump tensors. If no destination file is
given the generated code will be outputed to the screen. Check the 'set' command for
settings related to code generation."""
        self._check_graph()
        self._check_quantized()
        self._check_adjusted()
        if args.checksums:
            input_args = self._get_input_args(None)
            LOG.info("input file %s", args.checksums)
            data = import_data(args.checksums, **input_args)
            executer = GraphExecuter(self.G, qrecs=self.G.quantization)
            executer.execute([data], qmode=QuantizationMode.all())
            self.settings['checksum_file'] = args.checksums
            self.settings['generate_checksums'] = True

        if args.tensor_directory:
            self.settings['tensor_directory'] = args.tensor_directory
        if args.model_directory:
            self.settings['model_directory'] = args.model_directory
        self.settings['basic_kernel_source_file'] = args.basic_kernel_source_file
        self.settings['basic_kernel_header_file'] = args.basic_kernel_header_file
        code_gen = CodeGenerator(self.G, DefaultNamingConvension(self.G), self.settings)

        if self.settings['template_file']:
            code_template = dynamic_template(self.settings['template_file'])
        else:
            code_template = default_template

        if args.model_file:
            with open(os.path.join(self.settings['model_directory'],
                                   args.model_file), "w") as output_fp:
                output_fp.write(code_template(self.G, code_generator=code_gen))
            if self.G.has_expressions:
                with open(os.path.join(self.settings['model_directory'],
                                       args.basic_kernel_source_file), "w") as output_fp:
                    output_fp.write(basic_kernel_source_template(self.G, code_generator=code_gen))
                with open(os.path.join(self.settings['model_directory'],
                                       args.basic_kernel_header_file), "w") as output_fp:
                    output_fp.write(basic_kernel_header_template(self.G, code_generator=code_gen))
        else:
            self.ppaged(code_template(self.G, code_generator=code_gen))
            if self.G.has_expressions:
                self.ppaged(basic_kernel_source_template(self.G, code_generator=code_gen))
                self.ppaged(basic_kernel_header_template(self.G, code_generator=code_gen))
        if args.output_tensors:
            code_gen.write_constants()

        if args.header_file:
            with open(os.path.join(self.settings['model_directory'], args.header_file), "w") as output_fp:
                output_fp.write(header_template(self.G, code_generator=code_gen))
예제 #4
0
    def _collect(self, G, input_tensors) -> Mapping[NodeId, Mapping]:
        LOG.debug("gather quantization statistics")
        foutputs = self._collect_execution(G, input_tensors)
        qoutputs = self._collect_execution(G,
                                           input_tensors,
                                           qrecs=G.quantization,
                                           qmode=QuantizationMode.all())
        stats = OrderedDict()
        for idx, fstat in enumerate(foutputs):
            qstat = qoutputs[idx]
            if fstat['fusion_outputs']:
                for jdx, ffstat in enumerate(fstat['fusion_outputs']):
                    stats[NodeId(fstat['node'], ffstat['node'])] =\
                        self._collect_one(ffstat, qstat['fusion_outputs'][jdx])
            stats[NodeId(fstat['node'],
                         None)] = self._collect_one(fstat, qstat)

        return stats
예제 #5
0
파일: dump.py 프로젝트: brupa9/gap_sdk
    def do_dump(self, args: argparse.Namespace):
        """
Dump the activations resulting from running an input file through the graph.
You can use the current quantization settings and can also just quantify one
specific step of the graph."""
        self._check_graph()
        dequantize = args.dequantize if args.dequantize is not None\
            else not (args.pickle or args.save)
        if args.quantize or args.quantize_step or args.quantize_all_steps:
            self._check_quantized()
            if args.quantize:
                if dequantize:
                    qmode = QuantizationMode.all_dequantize()
                else:
                    qmode = QuantizationMode.all()
            elif args.quantize_all_steps:
                qmode = QuantizationMode.step_all()
                dequantize = True
            else:
                qmode = QuantizationMode.step(args.quantize_step)
        elif args.quantize_and_dequantize:
            qmode = QuantizationMode.all_float_quantize_dequantize()
        else:
            qmode = QuantizationMode.none()
        if args.step is not None:
            step = args.step
            num_steps = len(self.G.graph_state.steps)
            if step < 0:
                step = num_steps + step
            if step < 0 or step > num_steps:
                self.perror("step must be from {} to {}".format(
                    -num_steps, num_steps))
                return
        else:
            step = None

        input_args = self._get_input_args(args)

        pickles = []

        for file_per_input in glob_input_files(args.input_files,
                                               self.G.num_inputs):
            LOG.info("input file %s", file_per_input)
            data = [
                import_data(input_file, **input_args)
                for input_file in file_per_input
            ]
            executer = GraphExecuter(self.G, qrecs=self.G.quantization)
            outputs = executer.execute(data, step_idx_limit=step, qmode=qmode)

            if args.pickle or self._in_py or args.save:
                pickles.append(outputs)
            else:
                self.G.print_intermediates(outputs,
                                           limit=step,
                                           width=args.number_width,
                                           precision=args.precision,
                                           channel=args.channel,
                                           order=['c', 'h', 'w'],
                                           checksum=args.checksum)

            if args.visualize_detection:
                img_in = Image.open(file_per_input[0]).convert('RGBA')

                height = img_in.size[1] if input_args[
                    'height'] == -1 else input_args['height']
                width = img_in.size[0] if input_args[
                    'width'] == -1 else input_args['width']
                img_in = img_in.resize((width, height))

                if self.G.has_ssd_postprocess:
                    bboxes, classes, scores, _ = [
                        outputs[graph_out.step_idx][0]
                        for graph_out in self.G.outputs()
                    ]
                    draw = ImageDraw.Draw(img_in, 'RGBA')

                    for box, score, class_id in zip(bboxes, scores, classes):
                        if args.quantize and not args.dequantize:
                            ssd_node = [
                                node for node in self.G.nodes()
                                if isinstance(node, SSDDetectorParameters)
                            ][0]
                            ssd_qrec = self.G.quantization[NodeId(ssd_node)]
                            x0, x1 = int(box[1] * width *
                                         ssd_qrec.out_qs[0].scale), int(
                                             box[3] * width *
                                             ssd_qrec.out_qs[0].scale)
                            y0, y1 = int(box[0] * height *
                                         ssd_qrec.out_qs[0].scale), int(
                                             box[2] * height *
                                             ssd_qrec.out_qs[0].scale)
                            score = score * ssd_qrec.out_qs[2].scale
                        else:
                            x0, x1 = int(box[1] * width), int(box[3] * width)
                            y0, y1 = int(box[0] * height), int(box[2] * height)
                        rect_points = (x0, y0), (x1, y0), (x1, y1), (x0,
                                                                     y1), (x0,
                                                                           y0)
                        draw.line(rect_points, fill='red', width=2)
                        txt = '{}@{}%'.format(class_id, int(score * 100))
                        draw.text([x0, y0 - 10], txt, fill=(0, 255, 0))
                img_in.show()

        if args.pickle or args.save or self._in_py:
            if not pickles:
                self.perror("no input files found")
                return
            if len(args.input_files) == self.G.num_inputs:
                pickles = pickles[0]
            if args.pickle:
                with open(args.pickle, 'wb') as pickle_fp:
                    pickle.dump(pickles, pickle_fp)
            if args.save:
                if len(args.input_files) != self.G.num_inputs:
                    self.perror(
                        "can only save dumps on one input to tensor store")
                    return
                self.tensor_store[args.save] = pickles

        if self._in_py:
            self.last_result = pickles
def gen_project(G,
                settings,
                project_folder,
                script_commands,
                overwrite=False,
                performance=False,
                quantized=False,
                test_results=False,
                save_inputs=False,
                input_file=None,
                input_args=None,
                gen_atproject=False,
                dump_tensors=False,
                input_tensors=None,
                tolerance=0.0):
    settings = deepcopy(settings)
    settings['graph_monitor_cycles'] = True
    settings['graph_produce_node_names'] = True
    settings['graph_produce_operinfos'] = True

    code_gen = CodeGenerator(G, DefaultNamingConvension(G), settings)

    if not os.path.exists(project_folder):
        os.mkdir(project_folder)

    qoutputs = None
    if test_results:
        np.random.seed(12345)
        finput_tensors = []
        input_tensors = []
        for i, node in enumerate(G.input_nodes()):
            out_q = G.quantization[NodeId(node)].out_qs[0]
            if input_file:
                file_per_input = glob_input_files(input_file, G.num_inputs)[0]
                finput = import_data(file_per_input[i], **input_args)
            else:
                min_val = out_q.min if not out_q.is_floating else -1.0
                max_val = out_q.max if not out_q.is_floating else 1.0
                finput = get_rand(node.out_dims[0].shape,
                                  low_high=(min_val, max_val))
            finput_tensors.append(finput)
        executer = GraphExecuter(G, qrecs=G.quantization)
        qoutput_tensors = executer.execute(finput_tensors.copy(),
                                           qmode=QuantizationMode.all())
        qoutputs = []
        for params in G.outputs():
            outp = qoutput_tensors[params.step_idx][0]
            qoutputs.append(outp)
        for i, params in enumerate(G.input_nodes()):
            inp = qoutput_tensors[params.step_idx][0]
            input_tensors.append(inp)
            if save_inputs:
                nodeq = G.quantization[NodeId(params, None)].out_qs[0]
                np.save(os.path.join(project_folder, f"fake_input_{i}.npy"),
                        nodeq.dequantize(inp))

    main = os.path.join(project_folder, f"{code_gen.project_name}")
    main_c = main + '.c'
    main_h = main + '.h'
    common_mk = os.path.join(project_folder, "common.mk")
    nntool_script = os.path.join(project_folder, "nntool_script")
    if overwrite or not os.path.exists(main_c):
        with open(os.path.join(project_folder, f"{code_gen.project_name}.c"),
                  "w") as output_fp:
            output_fp.write(
                generate_main_appl_template(G, code_gen, input_tensors,
                                            qoutputs, tolerance))
    if overwrite or not os.path.exists(main_h):
        with open(os.path.join(project_folder, f"{code_gen.project_name}.h"),
                  "w") as output_fp:
            output_fp.write(generate_main_appl_header(G, code_gen))
    if overwrite or not os.path.exists(common_mk):
        open_args = parse_last_open(script_commands)
        open_args = build_last_open_args(open_args) if open_args else ""
        with open(os.path.join(project_folder, "common.mk"), "w") as output_fp:
            if gen_atproject:
                output_fp.write(
                    generate_main_appl_make_atproject(G, code_gen, quantized,
                                                      'Model.c'))
            else:
                output_fp.write(
                    generate_main_appl_make(G,
                                            code_gen,
                                            quantized,
                                            open_args=open_args))
    if overwrite or not os.path.exists(nntool_script):
        with open(nntool_script, 'w') as fp:
            # NOTE - gen_template_project is excluded so that tests work. Normally it will not be in the
            # history.
            fp.writelines(process_script(script_commands))
            # always add performance since the main template uses it
            for setting in [
                    'set graph_produce_node_names true',
                    'set graph_produce_operinfos true',
                    'set graph_monitor_cycles true'
            ]:
                fp.write(f'{setting}\n')
            if dump_tensors:
                fp.write('set graph_dump_tensor 7\n')

            if script_commands[-1] != "save_state":
                fp.write('save_state\n')
    if gen_atproject:
        code_gen = CodeGenerator(G, DefaultNamingConvension(G), settings)
        with open(os.path.join(project_folder, 'Model.c'), "w") as output_fp:
            output_fp.write(default_template(G, code_generator=code_gen))
        if G.has_expressions:
            with open(os.path.join(project_folder, "Expression_Kernels.c"),
                      "w") as output_fp:
                output_fp.write(
                    basic_kernel_source_template(G, code_generator=code_gen))
            with open(os.path.join(project_folder, "Expression_Kernels.h"),
                      "w") as output_fp:
                output_fp.write(
                    basic_kernel_header_template(G, code_generator=code_gen))
        code_gen.write_constants(tensor_directory=project_folder)
    ignore_function = None if overwrite else skip_existing_files(
        project_folder)
    shutil.copytree(os.path.join(os.environ.get("NNTOOL_PATH"),
                                 'generation/project_template'),
                    project_folder,
                    dirs_exist_ok=True,
                    ignore=ignore_function)

    if not gen_atproject:
        try:
            shutil.copy(
                G.graph_identity.filename,
                os.path.join(project_folder,
                             os.path.split(G.graph_identity.filename)[1]))
        except shutil.SameFileError:
            pass
예제 #7
0
    def do_dump(self, args: argparse.Namespace):
        """
Dump the activations resulting from running an input file through the graph.
You can use the current quantization settings and can also just quantify one
specific step of the graph."""
        self._check_graph()
        dequantize = args.dequantize if args.dequantize is not None\
            else not (args.pickle or args.save)
        if args.quantize or args.quantize_step or args.quantize_all_steps:
            self._check_quantized()
            if args.quantize:
                if dequantize:
                    qmode = QuantizationMode.all_dequantize()
                else:
                    qmode = QuantizationMode.all()
            elif args.quantize_all_steps:
                qmode = QuantizationMode.step_all()
                dequantize = True
            else:
                qmode = QuantizationMode.step(args.quantize_step)
        elif args.quantize_and_dequantize:
            qmode = QuantizationMode.all_float_quantize_dequantize()
        else:
            qmode = QuantizationMode.none()
        if args.step is not None:
            step = args.step
            num_steps = len(self.G.graph_state.steps)
            if step < 0:
                step = num_steps + step
            if step < 0 or step > num_steps:
                self.perror("step must be from {} to {}".format(-num_steps, num_steps))
                return
        else:
            step = None

        input_args = self._get_input_args(args)

        pickles = []

        for file_per_input in glob_input_files(args.input_files, self.G.num_inputs):
            LOG.info("input file %s", file_per_input)            
            data = [import_data(input_file, **input_args) for input_file in file_per_input]
            executer = GraphExecuter(self.G, qrecs=self.G.quantization)
            outputs = executer.execute(data, step_idx_limit=step,
                                       qmode=qmode)

            if args.pickle or self._in_py or args.save:
                pickles.append(format_dump_file(self.G, outputs, not qmode.is_none,
                                                args.dequantize, args.quantize_step))
            else:
                self.G.print_intermediates(outputs, limit=step, width=args.number_width,
                                           precision=args.precision, channel=args.channel,
                                           order=['c', 'h', 'w'])

        if args.pickle or args.save or self._in_py:
            if not pickles:
                self.perror("no input files found")
                return
            if len(args.input_files) == 1:
                pickles = pickles[0]
            if args.pickle:
                with open(args.pickle, 'wb') as pickle_fp:
                    pickle.dump(pickles, pickle_fp)
            if args.save:
                self.tensor_store[args.save] = pickles

        if self._in_py:
            self.last_result = pickles