Esempio n. 1
0
    def do_quantize(self, blob, name, node=None, tensor_type='input'):
        # forward quant graph but not quantize parameter and activation
        if NndctOption.nndct_quant_off.value:
            return blob

        blob_save = blob
        if isinstance(blob.values, torch.Tensor):
            blob = blob.values

        quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        if blob.device.type != quant_device.type:
            raise TypeError(
                "Device of quantizer is {}, device of model and data should match device of quantizer"
                .format(quant_device.type))

        if (NndctOption.nndct_quant_opt.value
                and NndctOption.nndct_logging_level.value > 0):
            quant_data = nndct_quant.QuantizeData(name,
                                                  blob.cpu().detach().numpy())
        # quantize the tensor
        bnfp = self.get_bnfp(name, True, tensor_type)
        #print('---- quant %s with 1/step = %g' % (name, bnfp[1]))
        # hardware cut method
        mth = 4 if self.lstm else 2
        if tensor_type == 'param':
            mth = 3

        res = py_nndct.nn.NndctFixNeuron(blob,
                                         blob,
                                         maxamp=[bnfp[0], bnfp[1]],
                                         method=mth)

        if (NndctOption.nndct_quant_opt.value
                and NndctOption.nndct_logging_level.value > 0):
            global global_snr_inv
            quant_efficiency, sqnr = quant_data.quant_efficiency(
                blob.cpu().detach().numpy(), 8)
            global_snr_inv += 1 / sqnr
            print(
                f"quant_efficiency={quant_efficiency}, global_snr_inv={global_snr_inv} {quant_data._name}\n"
            )

        # update param to nndct graph
        if tensor_type == 'param':
            self.update_param_to_nndct(node, name, res.cpu().detach().numpy())

        blob = blob_save
        res = blob_save

        return res
Esempio n. 2
0
    def do_scan(self, res, name, node=None, tensor_type='input'):
        # keep quantization steps after fast finetune
        if self.keep_fp:
            return self.do_quantize(res, name, node, tensor_type)

        # forward quant graph but not quantize parameter and activation
        if NndctOption.nndct_quant_off.value:
            if self.inplace:
                return res
            else:
                return res.clone().detach()

        res_save = None
        if isinstance(res.values, torch.Tensor):
            res_save = res
            res = res.values.data

        if res.dtype != torch.float32 and res.dtype != torch.double:
            NndctScreenLogger().warning_once(
                f'The tensor type of  {node.name} is {str(res.dtype)}. Only support float32/double quantization.'
            )
            return res_save if res_save is not None else res

        quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        if res.device.type != quant_device.type:
            raise TypeError(
                "Device of quantizer is {}, device of model and data should match device of quantizer"
                .format(quant_device.type))

        # get fixed position
        bnfp = self.get_quant_config(name, False, tensor_type)

        # hardware cut method
        mth = 4 if self.lstm else 2

        if NndctOption.nndct_use_torch_quantizer.value is True:
            mth = -1
        elif tensor_type == 'param':
            mth = 3

        scope = 5 if NndctOption.nndct_diffs_mode.value == "mse" else 1
        # set fix pos scanning scope to 1 for some type of tensors
        if (node.op.type in [NNDCT_OP.INPUT, NNDCT_OP.QUANT_STUB]):
            scope = 1
        if (self.lstm and tensor_type == 'input'):
            scope = 1
            res = res.detach().clone()

        Tbuffer = torch.empty_like(res).to(quant_device)
        Tfixpos = torch.tensor(
            [1], dtype=torch.get_default_dtype()).to(quant_device)

        # activation always calculate fix pos
        # calcualte fix pos if it is None
        # always calculate fis pos in finetune mode

        if tensor_type != 'param' or bnfp[1] is None or self.quant_mode == 3:
            py_nndct.nn.NndctDiffsFixPos(Tinput=res,
                                         Tbuffer=Tbuffer,
                                         Tfixpos=Tfixpos,
                                         bit_width=bnfp[0],
                                         range=scope,
                                         method=mth)
            bnfp[1] = (int)(Tfixpos.item())
            # limit max fix pos to 12 if bit width <= 8, others limit to 15
            if bnfp[0] <= 8 or self.lstm:
                max_fp = NndctOption.nndct_max_fix_position.value
                bnfp[1] = min(max_fp, bnfp[1])
            else:
                bnfp[1] = min(15, bnfp[1])
            # record fix pos of activation
            if tensor_type != 'param':
                self.config_history[tensor_type][name].append(bnfp[1])
                if (NndctOption.nndct_stat.value > 1):
                    print(
                        f'---- fp history: {stats.mode(np.array(self.config_history[tensor_type][name]))}'
                    )
                data = np.array(self.config_history[tensor_type][name])
                bnfp[1] = stats.mode(data)[0][0]
                bnfp[1] = bnfp[1].astype(np.int32).tolist()
            self.set_quant_config(name, bnfp, tensor_type)
            if (NndctOption.nndct_stat.value > 1):
                print('---- quant %s tensor: %s with bw = %d and fp = %g' %
                      (tensor_type, name, bnfp[0], bnfp[1]))

            # get 2^bit_width and 2^fracpos
            bnfp = self.get_quant_config(name, True, tensor_type)

            if (NndctOption.nndct_stat.value > 2):
                quant_data = nndct_quant.QuantizeData(
                    name,
                    res.cpu().detach().numpy())

            # do quantization for parameter or activation
            res = fake_quantize_per_tensor(res, bnfp[1], 0, -bnfp[0],
                                           bnfp[0] - 1, mth, self.inplace)

            if (NndctOption.nndct_stat.value > 2):
                #quant_data.all_close(res.cpu().detach().numpy())
                global global_snr_inv
                quant_efficiency, sqnr = quant_data.quant_efficiency(
                    res.cpu().detach().numpy(), math.log2(bnfp[0]))
                global_snr_inv += 1 / sqnr
                if quant_efficiency < 3.0:
                    print(
                        f"quant_efficiency={quant_efficiency}, {quant_data._name}\n"
                    )
                    print('Statistic [Min, Max, Mean, Std]:')
                    print('[{}, {}, {}, {}]'.format(res.min(), res.max(),
                                                    res.mean(), res.std()))
                    print('histogram: {}'.format(
                        res.histc(bins=10).cpu().detach().numpy()))
                    t = res
                    if tensor_type != 'param':
                        t = res.transpose(0, 1)
                    print('Channel number:{}'.format(t.shape[0]))
                    print('Channel-wise statistic [Min, Max, Mean, Std]:')
                    for c in range(t.shape[0]):
                        print('[{}, {}, {}, {}]'.format(
                            t[c].min(), t[c].max(), t[c].mean(), t[c].std()))
                        print('histogram: {}'.format(
                            t[c].histc(bins=10).cpu().detach().numpy()))

        if res_save is not None:
            res_save.values.data = res
            res = res_save

        return res
Esempio n. 3
0
    def do_quantize(self, blob, name, node=None, tensor_type='input'):
        # forward quant graph but not quantize parameter and activation
        if NndctOption.nndct_quant_off.value:
            if self.inplace:
                return blob
            else:
                return blob.clone().detach()

        blob_save = None
        if isinstance(blob.values, torch.Tensor):
            blob_save = blob
            blob = blob.values.data

        if blob.dtype != torch.float32 and blob.dtype != torch.double:
            NndctScreenLogger().warning_once(
                f'The tensor type of  {node.name} is {str(blob.dtype)}. Only support float32/double quantization.'
            )
            return blob_save if blob_save is not None else blob

        quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        if blob.device.type != quant_device.type:
            raise TypeError(
                "Device of quantizer is {}, device of model and data should match device of quantizer"
                .format(quant_device.type))

        if (NndctOption.nndct_stat.value > 2):
            quant_data = nndct_quant.QuantizeData(name,
                                                  blob.cpu().detach().numpy())
        # quantize the tensor
        bnfp = self.get_quant_config(name, True, tensor_type)
        if (NndctOption.nndct_stat.value > 1):
            print('---- quant %s tensor: %s with 1/step = %g' %
                  (tensor_type, name, bnfp[1]))
        # hardware cut method
        mth = 4 if self.lstm else 2

        if NndctOption.nndct_use_torch_quantizer.value is True:
            mth = -1
        elif tensor_type == 'param':
            mth = 3

        res = fake_quantize_per_tensor(blob, bnfp[1], 0, -bnfp[0], bnfp[0] - 1,
                                       mth, self.inplace)

        if (NndctOption.nndct_stat.value > 2):
            global global_snr_inv
            quant_efficiency, sqnr = quant_data.quant_efficiency(
                res.cpu().detach().numpy(), 8)
            global_snr_inv += 1 / sqnr
            if quant_efficiency < 3.0:
                print(
                    f"quant_efficiency={quant_efficiency}, global_snr_inv={global_snr_inv} {quant_data._name}\n"
                )
                print(
                    'Network input channel-wise statistic [Min, Max, Mean, Std]:'
                )
                print('[{}, {}, {}, {}]'.format(res.min(), res.max(),
                                                res.mean(), res.std()))
                print('histogram: {}'.format(
                    res.histc(bins=10).cpu().detach().numpy()))
                t = res
                if tensor_type != 'param':
                    t = res.transpose(0, 1)
                print('Channel number:{}'.format(t.shape[0]))
                print('Channel-wise statistic [Min, Max, Mean, Std]:')
                for c in range(t.shape[0]):
                    print('[{}, {}, {}, {}]'.format(t[c].min(), t[c].max(),
                                                    t[c].mean(), t[c].std()))
                    print('histogram: {}'.format(
                        t[c].histc(bins=10).cpu().detach().numpy()))

        # update param to nndct graph
        if tensor_type == 'param' and not self.exporting:
            self.update_param_to_nndct(node, name, res.cpu().detach().numpy())

        if blob_save is not None:
            blob_save.values.data = res
            res = blob_save

        return res
Esempio n. 4
0
    def do_scan(self, res, name, node=None, tensor_type='input'):
        # forward quant graph but not quantize parameter and activation
        if NndctOption.nndct_quant_off.value:
            return res

        res_save = res
        if isinstance(res.values, torch.Tensor):
            res = res.values

        quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        if res.device.type != quant_device.type:
            raise TypeError(
                "Device of quantizer is {}, device of model and data should match device of quantizer"
                .format(quant_device.type))

        # hardware cut method
        mth = 4 if self.lstm else 2
        if tensor_type == 'param':
            mth = 3

        range = 5
        # set fix pos scanning range to 1 for some type of tensors
        if ((node.op.type in [NNDCT_OP.INPUT, NNDCT_OP.QUANT_STUB])
                or (self.lstm and tensor_type == 'input')):
            range = 1

        # get fixed position
        bnfp = self.get_bnfp(name, False, tensor_type)
        #if(res.device == torch.device("cpu")):
        if (quant_device.type == "cpu"):
            Tbuffer = torch.empty_like(res).to(torch.device("cpu"))
            Tfixpos = torch.tensor([1], dtype=torch.get_default_dtype()).to(
                torch.device("cpu"))
        else:
            Tbuffer = torch.empty_like(res).cuda()
            Tfixpos = torch.tensor([1], dtype=torch.get_default_dtype()).cuda()

        # activation always calculate fix pos
        # calcualte fix pos if it is None
        # always calculate fis pos in finetune mode
        if tensor_type != 'param' or bnfp[1] is None or self.quant_mode == 3:
            py_nndct.nn.NndctDiffsFixPos(Tinput=res,
                                         Tbuffer=Tbuffer,
                                         Tfixpos=Tfixpos,
                                         bit_width=bnfp[0],
                                         range=range,
                                         method=mth)
            bnfp[1] = (int)(Tfixpos.item())
            # record fix pos of activation
            if tensor_type != 'param':
                self.fp_history[tensor_type][name].append(bnfp[1])
                data = np.array(self.fp_history[tensor_type][name])
                bnfp[1] = stats.mode(data)[0][0]
                bnfp[1] = bnfp[1].astype(np.int32).tolist()
            self.set_bnfp(name, bnfp, tensor_type)
            #print('---- quant %s with bw = %d and fp = %g' % (name, bnfp[0], bnfp[1]))

            # get 2^bit_width and 2^fracpos
            bnfp = self.get_bnfp(name, True, tensor_type)

            if (NndctOption.nndct_quant_opt.value
                    and NndctOption.nndct_logging_level.value > 0):
                #if tensor_type == "param":
                quant_data = nndct_quant.QuantizeData(
                    name,
                    res.cpu().detach().numpy())

            #print('---- quant %s with bw = %d and 1/step = %g' % (name, bnfp[0], bnfp[1]))
            # do quantization for parameter or activation
            res = py_nndct.nn.NndctFixNeuron(res,
                                             res,
                                             maxamp=[bnfp[0], bnfp[1]],
                                             method=mth)

            if (NndctOption.nndct_quant_opt.value
                    and NndctOption.nndct_logging_level.value > 0):
                #if tensor_type == "param":
                global global_snr_inv
                quant_efficiency, sqnr = quant_data.quant_efficiency(
                    res.cpu().detach().numpy(), 8)
                global_snr_inv += 1 / sqnr
                print(
                    f"quant_efficiency={quant_efficiency}, {quant_data._name}\n"
                )
                #print(f"quant_efficiency={quant_efficiency}, global_snr_inv={globacl_snr_inv} {quant_data._name}\n")
        res = res_save
        return res