コード例 #1
0
    def _collect(self, G, input_tensors, step_idx) -> Mapping[NodeId, Mapping]:
        LOG.debug("gather quantization statistics")
        if G.has_quantized_parameters:
            quantization = G.quantization
        else:
            quantization = None
        executer = GraphExecuter(G, qrecs=quantization)
        foutputs = self._collect_execution(executer, input_tensors, quantization)
        executer = GraphExecuter(G, qrecs=G.quantization)
        qoutputs = self._collect_execution(executer,
                                           input_tensors,
                                           G.quantization,
                                           qmode=QuantizationMode.all_dequantize())
        stats = OrderedDict()
        for idx, fstat in enumerate(foutputs):
            qstat = qoutputs[idx]
            if fstat['fusion_outputs']:
                for jdx, ffstat in enumerate(fstat['fusion_outputs']):
                    nid = NodeId(fstat['node'], ffstat['node'])
                    stats[nid] =\
                        self._collect_one(ffstat,
                                          qstat['fusion_outputs'][jdx],
                                          G.quantization[nid],
                                          quant_compare=self._quant_compare)
            nid = NodeId(fstat['node'], None)
            stats[nid] = self._collect_one(fstat,
                                           qstat,
                                           G.quantization[nid],
                                           quant_compare=self._quant_compare)

        return stats
コード例 #2
0
def test_graph_execute_complex(ir_graph, ir_images):
    G = create_graph(ir_graph, opts={"load_tensors": True})
    G.add_dimensions()
    input_tensor = import_data(ir_images[0], offset=0, divisor=255)
    input_tensor = input_tensor.reshape((80, 80, 1))
    executer = GraphExecuter(G)
    executer.execute([input_tensor])
コード例 #3
0
    def _common(cls, node, **kwargs):
        all_nodes = kwargs['all_nodes']
        G = kwargs['G']
        valid_name = kwargs['valid_name']

        inputs = [all_nodes[inp] for inp in node.input]

        if not all(cls.is_constant(inp) for inp in inputs):
            raise NotImplementedError(
                "nntool does not support import of graphs with evaluated loops"
            )

        importer = kwargs['importer']
        sub_G = NNGraph()
        all_nodes_clone = all_nodes.copy()
        importer.import_subgraph(sub_G,
                                 node.attrs['body'], {},
                                 all_nodes=all_nodes_clone)
        if not all(
                isinstance(inp, (InputParameters, ConstantInputParameters))
                for inp in sub_G.inputs()):
            raise NotImplementedError(
                "nntool does not support import of graphs with evaluated loops"
            )
        sub_G.add_dimensions()
        for idx, inp in enumerate(sub_G.inputs()):
            inp.index = idx

        logger.info(f"reducing loop {valid_name} to a constant")
        count = inputs[0][0].value
        keep_going = inputs[1][0].value
        loop_carried = [inp[0].value for inp in inputs[2:]]
        outputs = [np.array([])] * len(node.output)
        while keep_going and count > 0:
            executer = GraphExecuter(sub_G)
            output_tensors = executer.execute([count, keep_going] +
                                              loop_carried,
                                              silent=True)
            outp_vals = [
                output_tensors[node.step_idx][0] for node in sub_G.outputs()
                if not isinstance(node, InputParameters)
            ]
            keep_going = outp_vals[0]
            for idx, val in enumerate(outp_vals[1:]):
                if idx < len(loop_carried):
                    loop_carried[idx] = outputs[idx] = val
                elif outputs[idx] is None:
                    outputs[idx] = val
                else:
                    outputs[idx] = np.concatenate((outputs[idx], val))
            count -= 1
        for idx, outp in enumerate(node.output):
            params = ConstantInputParameters(
                G.unique_name(f'{valid_name}_out{idx}'),
                value=outputs[idx],
                dims=Dim.unnamed(outputs[idx].shape))
            all_nodes[outp] = (params, 0, ProvisionalDim(outputs[idx].shape),
                               None)

        return None
コード例 #4
0
def test_graph_imu_auto_quant_and_execute_quant():
    G = create_graph("tests/graph/imu.tflite", opts={"load_tensors": True})
    G.add_dimensions()
    G.adjust_order()
    get_pow2_match_group().match(G)
    G.add_dimensions()
    stats_collector = ActivationStatsCollector()
    for input_file in ['tests/images/imu0.pgm']:
        input_tensor = import_data(input_file,
                                   offset=0,
                                   divisor=256,
                                   nptype='int16')
        stats_collector.collect_stats(G, [input_tensor])
    astats = stats_collector.reduce_stats()
    stats_collector = FilterStatsCollector()
    fstats = stats_collector.collect_stats(G)
    quantizer = SymmetricQuantizer(astats, fstats, force_width=16)
    qrecs = quantizer.quantize(G)
    G.quantization = qrecs
    executer = GraphExecuter(G, qrecs=qrecs)
    for input_file in ['tests/images/imu0.pgm']:
        input_tensor = import_data(input_file,
                                   offset=0,
                                   divisor=256,
                                   nptype='int16')
        output_ = executer.execute([input_tensor],
                                   qmode=QuantizationMode.all())
コード例 #5
0
def test_cross_mini(two_conv_graph):
    G = two_conv_graph
    executer = GraphExecuter(G)
    output1 = executer.execute([np.full([10, 10, 2], 1)])
    groups, neurons = cl.discover_groups(G)
    assert groups and neurons, "Nothing discovered"
    cl.process_groups(groups)
    cl.update_parameters(neurons)
    output2 = executer.execute([np.full([10, 10, 2], 1)])
    assert np.max(np.abs(output1[3][0] - output2[3][0])) < 0.00001
コード例 #6
0
ファイル: gen.py プロジェクト: brupa9/gap_sdk
    def do_gen(self, args):
        """
Generate AutoTiler model C code and optionally dump tensors. If no destination file is
given the generated code will be outputed to the screen. Check the 'set' command for
settings related to code generation."""
        self._check_graph()
        self._check_quantized()
        self._check_adjusted()
        if args.checksums:
            input_args = self._get_input_args(None)
            LOG.info("input file %s", args.checksums)
            data = import_data(args.checksums, **input_args)
            executer = GraphExecuter(self.G, qrecs=self.G.quantization)
            executer.execute([data], qmode=QuantizationMode.all())
            self.settings['checksum_file'] = args.checksums
            self.settings['generate_checksums'] = True

        if args.tensor_directory:
            self.settings['tensor_directory'] = args.tensor_directory
        if args.model_directory:
            self.settings['model_directory'] = args.model_directory
        self.settings['basic_kernel_source_file'] = args.basic_kernel_source_file
        self.settings['basic_kernel_header_file'] = args.basic_kernel_header_file
        code_gen = CodeGenerator(self.G, DefaultNamingConvension(self.G), self.settings)

        if self.settings['template_file']:
            code_template = dynamic_template(self.settings['template_file'])
        else:
            code_template = default_template

        if args.model_file:
            with open(os.path.join(self.settings['model_directory'],
                                   args.model_file), "w") as output_fp:
                output_fp.write(code_template(self.G, code_generator=code_gen))
            if self.G.has_expressions:
                with open(os.path.join(self.settings['model_directory'],
                                       args.basic_kernel_source_file), "w") as output_fp:
                    output_fp.write(basic_kernel_source_template(self.G, code_generator=code_gen))
                with open(os.path.join(self.settings['model_directory'],
                                       args.basic_kernel_header_file), "w") as output_fp:
                    output_fp.write(basic_kernel_header_template(self.G, code_generator=code_gen))
        else:
            self.ppaged(code_template(self.G, code_generator=code_gen))
            if self.G.has_expressions:
                self.ppaged(basic_kernel_source_template(self.G, code_generator=code_gen))
                self.ppaged(basic_kernel_header_template(self.G, code_generator=code_gen))
        if args.output_tensors:
            code_gen.write_constants()

        if args.header_file:
            with open(os.path.join(self.settings['model_directory'], args.header_file), "w") as output_fp:
                output_fp.write(header_template(self.G, code_generator=code_gen))
コード例 #7
0
def test_validate_mn1_float(mn1f_graph):
    tfi = TfliteImporter()
    G = tfi.create_graph(mn1f_graph, {'load_tensors': True})
    G.add_dimensions()
    matcher = get_pow2_match_group()
    matcher.match(G)
    G.add_dimensions()
    input_tensor = np.load('tests/mobv1_valid/COCO_val2014_000000362331_0.npy')
    input_tensor = input_tensor.reshape((224, 224, 3))
    executer = GraphExecuter(G, qrecs=G.quantization)
    routput_tensors = executer.execute([input_tensor])
    output_tensor = np.load(
        'tests/mobv1_valid/output_COCO_val2014_000000362331_0_float.npy')
    assert np.max(np.abs(routput_tensors[-1][0] - output_tensor[0])) < 0.0001
コード例 #8
0
def test_graph_calc_quantized8_qnoq(mnist_unfused_8bit_state, mnist_images):
    G = load_state(mnist_unfused_8bit_state)
    input_tensor = import_data(mnist_images[0],
                               height=28,
                               width=28,
                               offset=0,
                               divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    executer = GraphExecuter(G, qrecs=G.quantization)
    diffs = []
    for step_idx, pnode, output, details, qoutput, qdetails, fnode in\
        executer.execute_qnoq_iterator([input_tensor]):
        del step_idx, pnode, details, qdetails, fnode
        diffs.append(output[0] - qoutput[0])
    assert np.max(np.abs(diffs[7])) < 9
コード例 #9
0
def test_validate_mn1_quantized1(mn1q_graph, mn1f_graph):
    tfi = TfliteImporter()
    Gf = tfi.create_graph(mn1f_graph, {'load_tensors': True})
    Gf.add_dimensions()
    Gf.adjust_order()
    matcher = get_pow2_match_group()
    matcher.match(Gf)
    Gf.add_dimensions()

    tfi = TfliteImporter()
    G = tfi.create_graph(mn1q_graph, {
        'load_tensors': True,
        'load_quantization': True
    })
    G.add_dimensions()
    G.adjust_order()
    matcher = get_pow2_match_group()
    matcher.match(G)
    G.add_dimensions()

    fpnode = Gf.graph_state.steps[2]['node']
    fpcnode = fpnode.contained_filters()[0]
    qpnode = G.graph_state.steps[2]['node']
    qpcnode = qpnode.contained_filters()[0]
    nid = NodeId(qpnode, qpcnode)
    qrec = G.quantization[nid]
    dqbiases = qrec.biases_q.get_dequantized(qpcnode.biases)
    assert np.max(np.abs(fpcnode.biases - dqbiases)) < 0.1
    input_tensor = np.load('tests/mobv1_valid/COCO_val2014_000000362331_0.npy')
    input_tensor = input_tensor.reshape((224, 224, 3)).transpose((2, 0, 1))

    executer = GraphExecuter(Gf)
    foutput_tensors = executer.execute([input_tensor])
    foutput_tensor = np.load(
        'tests/mobv1_valid/output_COCO_val2014_000000362331_0_float.npy')
    assert np.max(np.abs(foutput_tensors[-1][0] - foutput_tensor[0])) < 0.0001

    executer = GraphExecuter(G, qrecs=G.quantization)
    qfroutput_tensors = executer.execute([input_tensor],
                                         qmode=QuantizationMode.none())
    assert np.max(np.abs(qfroutput_tensors[-1][0] - foutput_tensor[0])) < 0.2

    executer = GraphExecuter(G, qrecs=G.quantization)
    qroutput_tensors = executer.execute(
        [input_tensor], qmode=QuantizationMode.all_dequantize())

    output_tensor = np.load(
        'tests/mobv1_valid/output_COCO_val2014_000000362331_0_quant.npy')
    # assert np.max(np.abs(qroutput_tensors[-1][0] - output_tensor[0])) < 0.16
    assert np.max(np.abs(qroutput_tensors[-1][0] - output_tensor[0])) < 0.28
コード例 #10
0
def test_graph_kws(kws_graph, kws_sounds):
    G = create_graph(kws_graph, opts={"load_tensors": True})
    G.add_dimensions()
    input_tensor = import_data(kws_sounds[0],
                               offset=0,
                               divisor=128,
                               nptype='int16')
    normal_steps = 0
    fusion_steps = 0
    # pylint: disable=unused-variable
    executer = GraphExecuter(G)
    for step_idx, node, fnode, output_tensors, details in\
        executer.execute_iterator([input_tensor]):
        if fnode is not None:
            fusion_steps += 1
        else:
            normal_steps += 1
    assert normal_steps == 9 and fusion_steps == 0
コード例 #11
0
    def collect_stats(self, G, input_tensors, step_idx=None):
        if self._graph_execution is None:
            if G.has_quantized_parameters:
                quantization = G.quantization
            else:
                quantization = None
            graph_executor = GraphExecuter(G, qrecs=quantization)
            graph_execution = graph_executor.execute_iterator
        else:
            graph_execution = self._graph_execution

        limit = step_idx[0] if isinstance(step_idx, tuple) else step_idx
        for _, node, fnode, output_tensors, details in\
                graph_execution(input_tensors, step_idx_limit=limit, yield_fusions=True, yield_details=True):
            key = NodeId(node, fnode)
            node = (node if fnode is None else fnode)
            stat = self.stats.get(key)
            if stat is None:
                range_in = []
                range_out = [{'min': float('inf'), 'max': float('-inf'), 'std': 0.0}] * len(output_tensors)
                stat = {
                    'range_in': range_in,
                    'range_out': range_out,
                }
                self.stats[key] = stat
                if fnode is None:
                    for edge in G.in_edges(node.name):
                        if len(range_in) <= edge.to_idx:
                            range_in.extend([None] * (edge.to_idx + 1 - len(range_in)))
                        other_stat = self.stats[NodeId(edge.from_node)]
                        range_in[edge.to_idx] = other_stat['range_out'][edge.from_idx]
                    for edge in G.out_edges(node.name):
                        if len(range_out) <= edge.from_idx:
                            range_out.extend([{'min': float('inf'), 'max': float('-inf'), 'std': 0.0}
                                              for _ in range(edge.from_idx + 1 - len(range_out))])

            for idx, tensor in enumerate(output_tensors):
                range_out = stat['range_out'][idx]
                self.update_ranges(range_out, tensor.min(), tensor.max())
                range_out['std'] = np.std(tensor)
                update_peraxis(range_out, tensor)

            if isinstance(node, FilterParameters):
                if details:
                    self.collect_stat(stat, 'range_acc', details, details_name='acc')
                    if isinstance(node, MultiplicativeBiasParameters) and node.has_mul_bias:
                        self.collect_stat(stat, 'range_pre_mul_bias', details, details_name='pre_mul_bias')
            elif isinstance(node, RNNBaseParameters):
                if details:
                    self.collect_stat(stat, 'range_state', details)
                    if isinstance(node, LSTMParameters):
                        self.collect_stat(stat, 'range_cell', details)
            elif isinstance(node, ExpressionFusionParameters):
                if details:
                   self.update_expression_ranges(stat, details)

        return self.stats
コード例 #12
0
def test_external_biases_sq8(qvww_graph):
    # this model has at the end an external biases layer as constant add
    tfi = TfliteImporter()
    G = tfi.create_graph(qvww_graph, {"load_quantization": True, "load_tensors": True})
    G.add_dimensions()
    matcher = get_scale8_match_group()
    matcher.match(G)
    G.add_dimensions()
    image = 'tests/vwwimages/COCO_val2014_000000174838_1.png'
    img_in = Image.open(image)
    img_in = img_in.resize((238, 208))
    input_tensor = np.array(img_in, dtype=np.uint8)
    input_tensor = (input_tensor.astype(np.float32) - 128) / 128
    executer = GraphExecuter(G, qrecs=G.quantization)
    # check if nntool can execute
    qoutput_tensors = executer.execute([input_tensor], qmode=QuantizationMode.all_dequantize())
    foutput_tensors = executer.execute([input_tensor], qmode=None)
    diff = [q[0]-f[0] for q,f in zip(qoutput_tensors, foutput_tensors)]
    assert max([np.max(d) for d in diff]) < 2.2
コード例 #13
0
    def _collect_execution(self, G, tensors, qrecs):
        outputs = []
        fusion_outputs = []
        executer = GraphExecuter(G, qrecs)
        for step_idx, node, output, details, qoutput, qdetails, fusion_node in\
                executer.execute_qnoq_iterator(tensors):
            output = [np.copy(out) for out in output]
            qoutput = [np.copy(out) for out in qoutput]

            if fusion_node:
                fusion_outputs.append({
                    "name":
                    "",
                    "step_idx":
                    "{}_{}".format(step_idx, len(fusion_outputs)),
                    "node":
                    fusion_node,
                    "output":
                    output,
                    "details":
                    details,
                    "qoutput":
                    qoutput,
                    "qdetails":
                    qdetails
                })
            else:
                stat = {
                    "name": node.name,
                    "step_idx": str(step_idx),
                    "node": node,
                    "output": output,
                    "details": details,
                    "qoutput": qoutput,
                    "qdetails": qdetails,
                    "fusion_outputs": []
                }
                if len(fusion_outputs) > 0:
                    stat['fusion_outputs'] = fusion_outputs.copy()
                    fusion_outputs.clear()
                outputs.append(stat)
        return outputs
コード例 #14
0
def test_graph_calc(mnist_graph, mnist_images):
    G = create_graph(mnist_graph, opts={"load_tensors": True})
    G.add_dimensions()
    input_tensor = import_data(mnist_images[0],
                               height=28,
                               width=28,
                               offset=0,
                               divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    normal_steps = 0
    fusion_steps = 0
    # pylint: disable=unused-variable
    executer = GraphExecuter(G)
    for step_idx, pnode, fnode, output_tensors, details in\
        executer.execute_iterator([input_tensor]):
        if fnode is not None:
            fusion_steps += 1
        else:
            normal_steps += 1
    assert normal_steps == 10 and fusion_steps == 0
コード例 #15
0
def test_equivalence(mnist_graph, mnist_images):
    G = create_graph(mnist_graph, opts={"load_tensors": True})
    G.add_dimensions()
    G.adjust_order()
    G.add_dimensions()
    input_tensor = import_data(mnist_images[0],
                               height=28,
                               width=28,
                               divisor=255,
                               offset=0,
                               transpose=False)
    executer = GraphExecuter(G)
    output_ = executer.execute([input_tensor])
    with open("tests/h5_pickles/weights.pickle", 'rb') as fp:
        verif_weights = pickle.load(fp)
    assert np.array_equal(verif_weights[0]['weights'],
                          G.graph_state.steps[1]['node'].weights)
    assert np.array_equal(verif_weights[0]['biases'],
                          G.graph_state.steps[1]['node'].biases)
    assert np.array_equal(verif_weights[3]['weights'],
                          G.graph_state.steps[4]['node'].weights)
    assert np.array_equal(verif_weights[3]['biases'],
                          G.graph_state.steps[4]['node'].biases)
    assert np.array_equal(verif_weights[7]['weights'],
                          G.graph_state.steps[7]['node'].weights)
    assert np.array_equal(verif_weights[7]['biases'],
                          G.graph_state.steps[7]['node'].biases)
    with open(
            os.path.join("tests/h5_pickles",
                         os.path.basename(mnist_images[0]) + '.pickle'),
            'rb') as fp:
        verif = pickle.load(fp)
    assert all([
        np.max(np.abs(verif[idx][0] - output_[idx][0])) < 0.00001
        for idx in range(7)
    ])
    # checking the Flatten layer doesn't work because the layout was not changed in the run tool
    # the layout for the output of the linear layer is a little different
    assert np.max(np.abs(verif[8][0] - output_[7][0].flatten())) < 0.00001
    assert np.array_equal(np.round(output_[-1][0].flatten()),
                          [1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
コード例 #16
0
def test_graph_calc_quantize_one_2(mnist_unfused_16bit_state, mnist_images):
    G = load_state(mnist_unfused_16bit_state)
    input_tensor = import_data(mnist_images[0],
                               height=28,
                               width=28,
                               offset=0,
                               divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    executer = GraphExecuter(G, qrecs=G.quantization)
    output1 = executer.execute([input_tensor])
    input_tensor = import_data(mnist_images[0],
                               height=28,
                               width=28,
                               offset=0,
                               divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    output2 = executer.execute([input_tensor], qmode=QuantizationMode.step(4))
    diffs = []
    for i, out1 in enumerate(output1):
        diffs.append(out1[0] - output2[i][0])
    assert np.min(diffs[7]) > -2 and np.max(diffs[7]) < 2
コード例 #17
0
    def validate(self,
                 qmode,
                 break_on_error=False,
                 inputs=None,
                 progress=None):
        if inputs is None:
            inputs = self._input_files
        good_inputs = []
        bad_inputs = []
        good_margin = 0.0
        bad_margin = 0.0
        for file_per_input in inputs:
            data = [
                import_data(input_file, **self._input_args)
                for input_file in file_per_input
            ]

            executer = GraphExecuter(self._graph,
                                     qrecs=self._graph.quantization)
            outputs = executer.execute(data, qmode=qmode, silent=True)

            predicted_values = np.asarray(outputs[self._prediction_step_idx])
            good_prediction, _, _, margin = self._validation.validate(
                file_per_input[0], predicted_values)

            if good_prediction:
                good_margin += margin
                good_inputs.append(file_per_input)
            else:
                bad_margin += margin
                bad_inputs.append(file_per_input)
                if break_on_error:
                    break
            if progress:
                progress(good_prediction)
        if good_inputs:
            good_margin /= len(good_inputs)
        if bad_inputs:
            bad_margin /= len(bad_inputs)
        return good_inputs, good_margin, bad_inputs, bad_margin
コード例 #18
0
def test_graph_calc_quantized8(mnist_unfused_8bit_state, mnist_images):
    G = load_state(mnist_unfused_8bit_state)
    input_tensor = import_data(mnist_images[0],
                               height=28,
                               width=28,
                               offset=0,
                               divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    executer = GraphExecuter(G, qrecs=G.quantization)
    output1 = executer.execute([input_tensor], step_idx_limit=7)
    input_tensor = import_data(mnist_images[0],
                               height=28,
                               width=28,
                               offset=0,
                               divisor=255)
    input_tensor = input_tensor.reshape((28, 28, 1))
    output2 = executer.execute([input_tensor],
                               qmode=QuantizationMode.all_dequantize(),
                               step_idx_limit=7)
    diffs = []
    for i in range(8):
        diffs.append(output1[i][0] - output2[i][0])
    assert np.max(np.abs(diffs[7])) < 9
コード例 #19
0
    def _collect(self, G, input_tensors, step_idx):
        if self._graph_execution is None:
            if G.has_quantized_parameters:
                quantization = G.quantization
            else:
                quantization = None
            graph_executor = GraphExecuter(G, qrecs=quantization)
            graph_execution = graph_executor.execute_iterator
        else:
            graph_execution = self._graph_execution

        stats = OrderedDict()
        limit = step_idx[0] if isinstance(step_idx, tuple) else step_idx
        for _, node, fnode, output_tensors, details in\
                graph_execution(input_tensors, step_idx_limit=limit, yield_fusions=True, yield_details=True):
            if not self.matches_step(step_idx, node, fnode):
                continue
            key = NodeId(node, fnode)
            node = (node if fnode is None else fnode)
            if node.out_dims[0].is_named and node.out_dims[0].has_key('c'):
                channel_dim = node.out_dims[0].get_order_idx('c')
            else:
                channel_dim = 0
            stat = gather_stats(
                output_tensors[0],
                force_ideal=not isinstance(node, InputParameters),
                channel_dim=channel_dim,
                channel_details=step_idx is not None)
            if isinstance(node, FilterParameters) and details:
                stat['min_acc'] = details['min_acc']
                stat['max_acc'] = details['max_acc']
                if isinstance(
                        node,
                        MultiplicativeBiasParameters) and node.has_mul_bias:
                    stat['min_pre_mul_bias'] = details['min_pre_mul_bias']
                    stat['max_pre_mul_bias'] = details['max_pre_mul_bias']

            stats[key] = stat

        return stats
コード例 #20
0
def test_validate_mn1_dequant_quantfloat(mn1q_graph):
    # load dequantized graph same results as quant graph and float execution
    tfi = TfliteImporter()
    G = tfi.create_graph(mn1q_graph, {
        'load_tensors': True,
        'load_quantization': True
    })
    G.add_dimensions()
    G.adjust_order()
    matcher = get_pow2_match_group()
    matcher.match(G)
    G.add_dimensions()

    Gdq = tfi.create_graph(mn1q_graph, {
        'load_tensors': True,
        'load_dequantized': True
    })
    Gdq.add_dimensions()
    Gdq.adjust_order()
    matcher = get_pow2_match_group()
    matcher.match(Gdq)
    Gdq.add_dimensions()

    input_tensor = np.load('tests/mobv1_valid/COCO_val2014_000000362331_0.npy')
    input_tensor = input_tensor.reshape((224, 224, 3)).transpose((2, 0, 1))

    executer = GraphExecuter(G, qrecs=G.quantization)
    qfoutput_tensors = executer.execute([input_tensor],
                                        qmode=QuantizationMode.none())

    executer = GraphExecuter(Gdq)
    dfoutput_tensors = executer.execute([input_tensor])

    diff_list = [
        np.abs(df[0] - qf[0])
        for df, qf in zip(dfoutput_tensors, qfoutput_tensors)
    ]
    max_diff = [np.max(elem) for elem in diff_list]
    assert max(max_diff) < 0.003
コード例 #21
0
ファイル: dump.py プロジェクト: VishalSharma0309/gap_sdk
    def do_dump(self, args: argparse.Namespace):
        """
Dump the activations resulting from running an input file through the graph.
You can use the current quantization settings and can also just quantify one
specific step of the graph."""
        self._check_graph()
        dequantize = args.dequantize if args.dequantize is not None\
            else not (args.pickle or args.save)
        if args.quantize or args.quantize_step or args.quantize_all_steps:
            self._check_quantized()
            if args.quantize:
                if dequantize:
                    qmode = QuantizationMode.all_dequantize()
                else:
                    qmode = QuantizationMode.all()
            elif args.quantize_all_steps:
                qmode = QuantizationMode.step_all()
                dequantize = True
            else:
                qmode = QuantizationMode.step(args.quantize_step)
        elif args.quantize_and_dequantize:
            qmode = QuantizationMode.all_float_quantize_dequantize()
        else:
            qmode = QuantizationMode.none()
        if args.step is not None:
            step = args.step
            num_steps = len(self.G.graph_state.steps)
            if step < 0:
                step = num_steps + step
            if step < 0 or step > num_steps:
                self.perror("step must be from {} to {}".format(-num_steps, num_steps))
                return
        else:
            step = None

        input_args = self._get_input_args(args)

        pickles = []

        for file_per_input in glob_input_files(args.input_files, self.G.num_inputs):
            LOG.info("input file %s", file_per_input)            
            data = [import_data(input_file, **input_args) for input_file in file_per_input]
            executer = GraphExecuter(self.G, qrecs=self.G.quantization)
            outputs = executer.execute(data, step_idx_limit=step,
                                       qmode=qmode)

            if args.pickle or self._in_py or args.save:
                pickles.append(format_dump_file(self.G, outputs, not qmode.is_none,
                                                args.dequantize, args.quantize_step))
            else:
                self.G.print_intermediates(outputs, limit=step, width=args.number_width,
                                           precision=args.precision, channel=args.channel,
                                           order=['c', 'h', 'w'])

        if args.pickle or args.save or self._in_py:
            if not pickles:
                self.perror("no input files found")
                return
            if len(args.input_files) == 1:
                pickles = pickles[0]
            if args.pickle:
                with open(args.pickle, 'wb') as pickle_fp:
                    pickle.dump(pickles, pickle_fp)
            if args.save:
                self.tensor_store[args.save] = pickles

        if self._in_py:
            self.last_result = pickles
コード例 #22
0
ファイル: validation.py プロジェクト: mfkiwl/gap_sdk
    def do_validate(self, args: argparse.Namespace):
        """
Validate the model (quantized [-q] or not) in terms of prediction accuracy rate on a given dataset (images
folder). Ground truth labels can be embedded in files names ("filename_03.[png, ppm, pgm]", the number of
digits must be coherent with the number of networks outputs: e.g. in a 1000 classes problem the last digits
must be 3, "file_45.png" will raise an error) or can be written in a .json object (example: {'file0':label0,
'file1':label1, ...}) and given to the function with --label_json
"""
        self._check_graph()
        if args.quantize:
            self._check_quantized()
            qmode = QuantizationMode.all_dequantize()
        else:
            qmode = QuantizationMode.none()

        LOG.info("quantization mode - %s", qmode)
        input_args = self._get_input_args(args)

        good_predictions = []
        good_margin = 0
        bad_margin = 0

        number_samples = sum(1 for _ in glob_input_files(args.input_files))

        if args.vww_instances_file:
            validation = ValidateFromVWWInstances(
                args.vww_instances_file,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        elif args.label_json:
            validation = ValidateFromJSON(
                args.label_json,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        elif args.class_number is not None:
            validation = ValidateFromClass(
                args.class_number,
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)
        else:
            validation = ValidateFromName(
                class_thr=args.class_thr,
                binary_classification=args.binary_classification)

        try:
            ExecutionProgress.start()
            for i, file_per_input in enumerate(
                    glob_input_files(args.input_files, self.G.num_inputs)):
                if not args.silent:
                    LOG.info("input file %s", file_per_input)
                data = [
                    import_data(input_file, **input_args)
                    for input_file in file_per_input
                ]

                executer = GraphExecuter(self.G, qrecs=self.G.quantization)
                outputs = executer.execute(data,
                                           qmode=qmode,
                                           silent=args.silent)

                predicted_values = np.asarray(
                    outputs[args.prediction_step_idx])
                good_prediction, class_predicted, real_class, margin = validation.validate(
                    file_per_input[0], predicted_values)
                good_predictions.append(good_prediction)
                if good_prediction:
                    good_margin += margin
                else:
                    bad_margin += margin

                if not args.silent:
                    LOG.info(
                        'Prediction is %s predicted %s correct %s margin %s',
                        good_prediction, class_predicted, real_class, margin)
                if not i % args.progress_every and i > 0:
                    LOG.info(
                        'ACCURACY: %.3f %%',
                        100 * sum(good_predictions) / len(good_predictions))

                ExecutionProgress.progress(i, number_samples)
            ExecutionProgress.end()

        except (KeyboardInterrupt, SystemExit):
            pass

        self.py_locals['labels'] = validation.labels
        self.py_locals['predictions'] = validation.predictions
        cnt = len(good_predictions)
        if cnt:
            ngood = sum(good_predictions)
            nbad = cnt - ngood
            if nbad:
                LOG.info(
                    "%s out of %s predicted falsly with %s average margin",
                    nbad, cnt, bad_margin / nbad)
            if ngood:
                LOG.info(
                    "%s out of %s predicted correctly with %s average margin",
                    ngood, cnt, good_margin / ngood)
            accuracy_rate = 100 * sum(good_predictions) / len(good_predictions)
            LOG.info('Total accuracy: %.3f %%', accuracy_rate)
コード例 #23
0
ファイル: dump.py プロジェクト: brupa9/gap_sdk
    def do_dump(self, args: argparse.Namespace):
        """
Dump the activations resulting from running an input file through the graph.
You can use the current quantization settings and can also just quantify one
specific step of the graph."""
        self._check_graph()
        dequantize = args.dequantize if args.dequantize is not None\
            else not (args.pickle or args.save)
        if args.quantize or args.quantize_step or args.quantize_all_steps:
            self._check_quantized()
            if args.quantize:
                if dequantize:
                    qmode = QuantizationMode.all_dequantize()
                else:
                    qmode = QuantizationMode.all()
            elif args.quantize_all_steps:
                qmode = QuantizationMode.step_all()
                dequantize = True
            else:
                qmode = QuantizationMode.step(args.quantize_step)
        elif args.quantize_and_dequantize:
            qmode = QuantizationMode.all_float_quantize_dequantize()
        else:
            qmode = QuantizationMode.none()
        if args.step is not None:
            step = args.step
            num_steps = len(self.G.graph_state.steps)
            if step < 0:
                step = num_steps + step
            if step < 0 or step > num_steps:
                self.perror("step must be from {} to {}".format(
                    -num_steps, num_steps))
                return
        else:
            step = None

        input_args = self._get_input_args(args)

        pickles = []

        for file_per_input in glob_input_files(args.input_files,
                                               self.G.num_inputs):
            LOG.info("input file %s", file_per_input)
            data = [
                import_data(input_file, **input_args)
                for input_file in file_per_input
            ]
            executer = GraphExecuter(self.G, qrecs=self.G.quantization)
            outputs = executer.execute(data, step_idx_limit=step, qmode=qmode)

            if args.pickle or self._in_py or args.save:
                pickles.append(outputs)
            else:
                self.G.print_intermediates(outputs,
                                           limit=step,
                                           width=args.number_width,
                                           precision=args.precision,
                                           channel=args.channel,
                                           order=['c', 'h', 'w'],
                                           checksum=args.checksum)

            if args.visualize_detection:
                img_in = Image.open(file_per_input[0]).convert('RGBA')

                height = img_in.size[1] if input_args[
                    'height'] == -1 else input_args['height']
                width = img_in.size[0] if input_args[
                    'width'] == -1 else input_args['width']
                img_in = img_in.resize((width, height))

                if self.G.has_ssd_postprocess:
                    bboxes, classes, scores, _ = [
                        outputs[graph_out.step_idx][0]
                        for graph_out in self.G.outputs()
                    ]
                    draw = ImageDraw.Draw(img_in, 'RGBA')

                    for box, score, class_id in zip(bboxes, scores, classes):
                        if args.quantize and not args.dequantize:
                            ssd_node = [
                                node for node in self.G.nodes()
                                if isinstance(node, SSDDetectorParameters)
                            ][0]
                            ssd_qrec = self.G.quantization[NodeId(ssd_node)]
                            x0, x1 = int(box[1] * width *
                                         ssd_qrec.out_qs[0].scale), int(
                                             box[3] * width *
                                             ssd_qrec.out_qs[0].scale)
                            y0, y1 = int(box[0] * height *
                                         ssd_qrec.out_qs[0].scale), int(
                                             box[2] * height *
                                             ssd_qrec.out_qs[0].scale)
                            score = score * ssd_qrec.out_qs[2].scale
                        else:
                            x0, x1 = int(box[1] * width), int(box[3] * width)
                            y0, y1 = int(box[0] * height), int(box[2] * height)
                        rect_points = (x0, y0), (x1, y0), (x1, y1), (x0,
                                                                     y1), (x0,
                                                                           y0)
                        draw.line(rect_points, fill='red', width=2)
                        txt = '{}@{}%'.format(class_id, int(score * 100))
                        draw.text([x0, y0 - 10], txt, fill=(0, 255, 0))
                img_in.show()

        if args.pickle or args.save or self._in_py:
            if not pickles:
                self.perror("no input files found")
                return
            if len(args.input_files) == self.G.num_inputs:
                pickles = pickles[0]
            if args.pickle:
                with open(args.pickle, 'wb') as pickle_fp:
                    pickle.dump(pickles, pickle_fp)
            if args.save:
                if len(args.input_files) != self.G.num_inputs:
                    self.perror(
                        "can only save dumps on one input to tensor store")
                    return
                self.tensor_store[args.save] = pickles

        if self._in_py:
            self.last_result = pickles
コード例 #24
0
def gen_project(G,
                settings,
                project_folder,
                script_commands,
                overwrite=False,
                performance=False,
                quantized=False,
                test_results=False,
                save_inputs=False,
                input_file=None,
                input_args=None,
                gen_atproject=False,
                dump_tensors=False,
                input_tensors=None,
                tolerance=0.0):
    settings = deepcopy(settings)
    settings['graph_monitor_cycles'] = True
    settings['graph_produce_node_names'] = True
    settings['graph_produce_operinfos'] = True

    code_gen = CodeGenerator(G, DefaultNamingConvension(G), settings)

    if not os.path.exists(project_folder):
        os.mkdir(project_folder)

    qoutputs = None
    if test_results:
        np.random.seed(12345)
        finput_tensors = []
        input_tensors = []
        for i, node in enumerate(G.input_nodes()):
            out_q = G.quantization[NodeId(node)].out_qs[0]
            if input_file:
                file_per_input = glob_input_files(input_file, G.num_inputs)[0]
                finput = import_data(file_per_input[i], **input_args)
            else:
                min_val = out_q.min if not out_q.is_floating else -1.0
                max_val = out_q.max if not out_q.is_floating else 1.0
                finput = get_rand(node.out_dims[0].shape,
                                  low_high=(min_val, max_val))
            finput_tensors.append(finput)
        executer = GraphExecuter(G, qrecs=G.quantization)
        qoutput_tensors = executer.execute(finput_tensors.copy(),
                                           qmode=QuantizationMode.all())
        qoutputs = []
        for params in G.outputs():
            outp = qoutput_tensors[params.step_idx][0]
            qoutputs.append(outp)
        for i, params in enumerate(G.input_nodes()):
            inp = qoutput_tensors[params.step_idx][0]
            input_tensors.append(inp)
            if save_inputs:
                nodeq = G.quantization[NodeId(params, None)].out_qs[0]
                np.save(os.path.join(project_folder, f"fake_input_{i}.npy"),
                        nodeq.dequantize(inp))

    main = os.path.join(project_folder, f"{code_gen.project_name}")
    main_c = main + '.c'
    main_h = main + '.h'
    common_mk = os.path.join(project_folder, "common.mk")
    nntool_script = os.path.join(project_folder, "nntool_script")
    if overwrite or not os.path.exists(main_c):
        with open(os.path.join(project_folder, f"{code_gen.project_name}.c"),
                  "w") as output_fp:
            output_fp.write(
                generate_main_appl_template(G, code_gen, input_tensors,
                                            qoutputs, tolerance))
    if overwrite or not os.path.exists(main_h):
        with open(os.path.join(project_folder, f"{code_gen.project_name}.h"),
                  "w") as output_fp:
            output_fp.write(generate_main_appl_header(G, code_gen))
    if overwrite or not os.path.exists(common_mk):
        open_args = parse_last_open(script_commands)
        open_args = build_last_open_args(open_args) if open_args else ""
        with open(os.path.join(project_folder, "common.mk"), "w") as output_fp:
            if gen_atproject:
                output_fp.write(
                    generate_main_appl_make_atproject(G, code_gen, quantized,
                                                      'Model.c'))
            else:
                output_fp.write(
                    generate_main_appl_make(G,
                                            code_gen,
                                            quantized,
                                            open_args=open_args))
    if overwrite or not os.path.exists(nntool_script):
        with open(nntool_script, 'w') as fp:
            # NOTE - gen_template_project is excluded so that tests work. Normally it will not be in the
            # history.
            fp.writelines(process_script(script_commands))
            # always add performance since the main template uses it
            for setting in [
                    'set graph_produce_node_names true',
                    'set graph_produce_operinfos true',
                    'set graph_monitor_cycles true'
            ]:
                fp.write(f'{setting}\n')
            if dump_tensors:
                fp.write('set graph_dump_tensor 7\n')

            if script_commands[-1] != "save_state":
                fp.write('save_state\n')
    if gen_atproject:
        code_gen = CodeGenerator(G, DefaultNamingConvension(G), settings)
        with open(os.path.join(project_folder, 'Model.c'), "w") as output_fp:
            output_fp.write(default_template(G, code_generator=code_gen))
        if G.has_expressions:
            with open(os.path.join(project_folder, "Expression_Kernels.c"),
                      "w") as output_fp:
                output_fp.write(
                    basic_kernel_source_template(G, code_generator=code_gen))
            with open(os.path.join(project_folder, "Expression_Kernels.h"),
                      "w") as output_fp:
                output_fp.write(
                    basic_kernel_header_template(G, code_generator=code_gen))
        code_gen.write_constants(tensor_directory=project_folder)
    ignore_function = None if overwrite else skip_existing_files(
        project_folder)
    shutil.copytree(os.path.join(os.environ.get("NNTOOL_PATH"),
                                 'generation/project_template'),
                    project_folder,
                    dirs_exist_ok=True,
                    ignore=ignore_function)

    if not gen_atproject:
        try:
            shutil.copy(
                G.graph_identity.filename,
                os.path.join(project_folder,
                             os.path.split(G.graph_identity.filename)[1]))
        except shutil.SameFileError:
            pass