Beispiel #1
0
    def _quantize(self, xgraph, stop=None, subgraphs_only=True):
        # (str, boolean) -> None
        """ Start MSE quantization """

        # Graph pass to construct new graph with quantization layers
        graph_pass = XGraphPassAddMSEQuantLayers(
            bitwidth=self.bitwidth,
            mse_opt_num=self.mse_opt_num,
            subgraphs_only=subgraphs_only,
            output_png='tvm_mse_quant.png'
            if logger.getEffectiveLevel() <= 10 else None,
            name=xgraph.get_name())
        xgraph = graph_pass.execute(xgraph=xgraph)

        self.quant_xgraph = xgraph
        self.runtime = pyxir.build(self.quant_xgraph, target='cpu')

        # Run graph to set Variable layer thresholds in graph
        fancy_logger.banner("EXECUTE QUANTIZATION GRAPH")

        inpts = self.inputs_func(0)
        out, params = self.runtime.optimize(inpts)

        logger.info("Done executing graph")
        # logger.info(out.shape, out)
        # logger.info(thresholds)

        logger.info("Retrieving quantization parameters...")

        self._retrieve_quant_params(params, xgraph, subgraphs_only)
Beispiel #2
0
def _run_network_cpu(xgraph, inputs, batch_size=1):
    """
    Optimize graph with using basic optimizations and run on a CPU
    """

    # Optimize graph
    optimizer = XGraphBasicOptimizer(xgraph)
    opt_xgraph = optimizer.optimize()

    # Build for CPU execution
    rt_mod = pyxir.build(opt_xgraph, target='cpu')

    # Execute
    res = pyxir.run(rt_mod, inputs, [], batch_size=batch_size)

    return res
Beispiel #3
0
    def __init__(self,
                 xgraph,
                 inputs_func,
                 work_dir=os.path.join(os.getcwd()),
                 quant_iter=1):
        #
        super(XGraphBaseSubgraphQuantizer, self).__init__(xgraph)

        self.subgraph_Xps = XGraphBaseSubgraphQuantizer.xgraph_partitioner\
            .get_subgraphs(self.xgraph)

        # Maps external (graph) to internal (subgraph) inputs for each subgraph
        self.subgraph_input_map = {}
        self.subgraph_inputs = {}
        self.subgraph_input_names = []
        for Xp in self.subgraph_Xps:
            sub_xgraph = XGraphBaseSubgraphQuantizer.xgraph_factory.\
                build_from_xlayer(Xp.subgraph_data, name=Xp.name)

            self.subgraph_input_map[Xp.name] = {}

            input_names = sub_xgraph.get_input_names()

            for b, in_name in zip(Xp.bottoms, input_names):
                self.subgraph_input_names.append(b)
                self.subgraph_inputs[b] = None
                self.subgraph_input_map[Xp.name][b] = in_name

        # Setup executable graph
        self.runtime = pyxir.build(self.xgraph,
                                   target='cpu',
                                   last_layers=self.subgraph_input_names)

        self.inputs_func = inputs_func
        self.work_dir = work_dir
        os.makedirs(self.work_dir, exist_ok=True)
        self.quant_iter = quant_iter
Beispiel #4
0
    def __init__(self,
                 xgraph,
                 inputs_func,
                 bitwidth=8,
                 work_dir=os.path.join(os.getcwd(), 'work'),
                 quant_iter=1):
        #
        super(XGraphDefaultQuantizer, self).__init__(xgraph)

        # Setup executable graph
        self.runtime = pyxir.build(self.xgraph, target='cpu')

        self.inputs_func = inputs_func
        self.work_dir = work_dir
        self._bitwidth = bitwidth

        self._quant_param = QuantParamFactory()
        self._quant_layers = {}

        self.XFDNN_OP_2_QUANT_FUNC = {
            'Input':
            InputQuant(self._quant_param, self._quant_layers, self._bitwidth),
            'Output':
            DefaultQuant(self._quant_param, self._quant_layers,
                         self._bitwidth),
            'Constant':
            SkipQuant(self._quant_param, self._quant_layers, self._bitwidth),

            # BASIC NN OPS
            'Dense':
            DefaultQuant(self._quant_param, self._quant_layers,
                         self._bitwidth),
            'Softmax':
            DefaultQuant(self._quant_param, self._quant_layers,
                         self._bitwidth),
            'ReLU':
            DefaultQuant(self._quant_param, self._quant_layers,
                         self._bitwidth),
            'Tanh':
            DefaultQuant(self._quant_param, self._quant_layers,
                         self._bitwidth),

            # MATH
            'Scale':
            ScaleQuant(self._quant_param, self._quant_layers, self._bitwidth),
            'Eltwise':
            EltwiseQuant(self._quant_param, self._quant_layers,
                         self._bitwidth),
            'Concat':
            ConcatQuant(self._quant_param, self._quant_layers, self._bitwidth),
            'Mean':
            DefaultQuant(self._quant_param, self._quant_layers,
                         self._bitwidth),
            'BatchNorm':
            BatchNormQuant(self._quant_param, self._quant_layers,
                           self._bitwidth),

            # CONVOLUTION
            'Convolution':
            ConvQuant(self._quant_param, self._quant_layers, self._bitwidth),
            'Conv2DTranspose':
            ConvQuant(self._quant_param, self._quant_layers, self._bitwidth),
            'Pooling':
            PoolQuant(self._quant_param, self._quant_layers, self._bitwidth),

            # OTHER
            'Reshape':
            DefaultQuant(self._quant_param, self._quant_layers,
                         self._bitwidth),
            'Squeeze':
            DefaultQuant(self._quant_param, self._quant_layers,
                         self._bitwidth),
            'Flatten':
            DefaultQuant(self._quant_param, self._quant_layers,
                         self._bitwidth),
            'Transpose':
            DefaultQuant(self._quant_param, self._quant_layers,
                         self._bitwidth),
        }
Beispiel #5
0
                 bn_handling=BatchNormHandling.MERGE_AND_QUANTIZE)

# Finetune the model
# . . .

# Export to ONNX
onnx_filename = 'dpuv2_resnet18.onnx'
export_dpuv2_onnx(model,
                  input_shape=IN_SIZE,
                  input_t=inp,
                  export_path=onnx_filename)

# Load ONNX into PyXIR
onnx_model = onnx.load(onnx_filename)
xgraph = from_onnx(onnx_model)
xgraph = pyxir.partition(xgraph, [target])
xgraph = pyxir.optimize(xgraph, target)
work_dir = os.path.join(file_dir, f'{target}_quant_trained_resnet18_workdir')
inputs = np.random.randn(*IN_SIZE)


def inputs_func(iter):
    return {'inp.1': inputs}


xgraph = pyxir.quantize(xgraph, target, inputs_func, work_dir=work_dir)
pyxir.build(xgraph,
            target,
            work_dir=work_dir,
            build_dir=work_dir,
            runtime='cpu-np')