Esempio n. 1
0
 def forward(self, input, other):
     [qinput, qother] = quantize_tensors([input, other],
                                         self.node,
                                         tensor_type='input')
     output = torch.matmul(input=qinput, other=qother)
     output = quantize_tensors([output], self.node)[0]
     return output
Esempio n. 2
0
    def forward(self, input):
        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]
        # check input shape
        if self.node.out_tensors[0].is_complete_tensor(
        ) and self.node.out_tensors[0].ndim == 4:
            # py_utils.blob_to_torch_format(self.node.out_tensors[0])
            if not (self.node.out_tensors[0].shape[1:] == list(
                    input.size())[1:]):
                NndctScreenLogger().warning(
                    f"The shape of input ({input.shape[1:]}) should be the same with that of dummy input ({self.node.out_tensors[0].shape[1:]})"
                )
            # py_utils.blob_to_nndct_format(self.node.out_tensors[0])
        output = qinput

        if (self.node.in_quant_part and NndctOption.nndct_stat.value > 2):
            print('Channel number of input data: {}'.format(output.shape[1]))
            print('Input data histogram: {}'.format(
                output.histc(bins=10).cpu().detach().numpy()))
            print(
                'Network input channel-wise statistic [Min, Max, Mean, Std]:')
            t = output.transpose(0, 1)
            for c in range(t.shape[0]):
                print('[{}, {}, {}, {}]'.format(t[c].min(), t[c].max(),
                                                t[c].mean(), t[c].std()))
                print('histogram: {}'.format(
                    t[c].histc(bins=10).cpu().detach().numpy()))

        if self.node.in_quant_part:
            output = quantize_tensors([output], self.node)[0]

        return output
Esempio n. 3
0
                def forward(self, *args, **kwargs):

                    inputs = []

                    def collect_inputs(inputs, value):
                        if isinstance(value, torch.Tensor):
                            inputs.append(value)
                        elif isinstance(value, (tuple, list)):
                            for i in value:
                                collect_inputs(inputs, i)

                    for _, v in kwargs.items():
                        collect_inputs(inputs, v)

                    inputs = quantize_tensors(inputs,
                                              self.node,
                                              tensor_type='input')
                    try:
                        output = caller(*args, **kwargs)
                        if isinstance(output, torch.Tensor):
                            output = output.clone()
                    except TypeError as e:
                        NndctScreenLogger().warning_once(
                            f"{str(e)}. The arguments of function will convert to positional arguments."
                        )
                        inputs = list(args) + list(kwargs.values())
                        output = caller(*inputs)

                    output = quantize_tensors([output], self.node)[0]

                    return output
Esempio n. 4
0
    def forward(self, input_1: torch.Tensor, input_2: torch.Tensor,
                pad_size: Union[torch.Tensor, Sequence[Any], int]):
        qinput_1 = quantize_tensors([input_1], self.node,
                                    tensor_type='input')[0]
        qinput_2 = quantize_tensors([input_2], self.node,
                                    tensor_type='input')[0]

        if isinstance(pad_size, (list, tuple)):
            pad_size = torch.Tensor(pad_size).to(qinput_1.device)
        elif isinstance(pad_size, float):
            pad_size = torch.Tensor([pad_size]).to(qinput_1.device)

        output_dim = 2 * pad_size + 1
        B, C, H, W = qinput_1.size()
        qinput_2 = F.pad(qinput_2, [pad_size] * 4)
        cv = []
        for i in range(output_dim):
            for j in range(output_dim):
                cost = qinput_1 * qinput_2[:, :, i:(i + H), j:(j + W)]
                cost = cost.unsqueeze(2)
                cv.append(cost)
        output = torch.cat(cv, 2)

        if self.node.in_quant_part:
            output = quantize_tensors([output], self.node)[0]

        return output
Esempio n. 5
0
    def forward(self, input_1: torch.Tensor, input_2: torch.Tensor,
                maxdisp: Union[torch.Tensor, Sequence[Any], int]):
        qinput_1 = quantize_tensors([input_1], self.node,
                                    tensor_type='input')[0]
        qinput_2 = quantize_tensors([input_2], self.node,
                                    tensor_type='input')[0]
        if os.environ["DUMP_XMODEL"] == '1':
            cost = Variable(
                torch.zeros(qinput_1.size()[0],
                            qinput_1.size()[1] * 2, maxdisp // 4,
                            qinput_1.size()[2],
                            qinput_1.size()[3])).cpu()
        else:
            cost = Variable(
                torch.zeros(qinput_1.size()[0],
                            qinput_1.size()[1] * 2, maxdisp // 4,
                            qinput_1.size()[2],
                            qinput_1.size()[3])).cuda()

        for i in range(maxdisp // 4):
            if i > 0:
                cost[:, :qinput_1.size()[1], i, :, i:] = qinput_1[:, :, :, i:]
                cost[:, qinput_1.size()[1]:, i, :, i:] = qinput_2[:, :, :, :-i]
            else:
                cost[:, :qinput_1.size()[1], i, :, :] = qinput_1
                cost[:, qinput_1.size()[1]:, i, :, :] = qinput_2
        output = cost.contiguous()

        if self.node.in_quant_part:
            output = quantize_tensors([output], self.node)[0]

        return output
Esempio n. 6
0
    def forward(self, input):
        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]
        output = super().forward(qinput)

        # scale to DPU accuracy
        if (isinstance(self.output_size,
                       (tuple, list)) and tuple(self.output_size) !=
            (1, 1)) or self.output_size != 1:
            print(
                "NNDCT-Waring: For adaptive average pooling, DPU only supports output size 1"
            )

        # kernel = [input.shape[3], input.shape[2]]
        # During slow trace, the dim of shape will convert to tensor value which is not support in nndct.
        kernel = [
            input.shape[3] if isinstance(input.shape[3], int) else
            input.shape[3].item(), input.shape[2] if isinstance(
                input.shape[2], int) else input.shape[2].item()
        ]
        self.node.set_node_attr(self.node.op.AttrName.KERNEL, kernel)
        self.node.set_node_attr(self.node.op.AttrName.STRIDE, kernel)

        # scale to DPU accuracy
        if NndctOption.nndct_avg_pool_approximate.value:
            scale = 1.0
            if self.node.node_attr(self.node.op.AttrName.KERNEL) == [3, 3]:
                scale = 9.0 * 7.0 / 64.0
            elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [5, 5]:
                scale = 25.0 * 10.0 / 256.0
            elif self.node.node_attr(self.node.op.AttrName.KERNEL) in [[6, 6],
                                                                       [3, 6],
                                                                       [6, 3]]:
                scale = 36.0 * 7.0 / 256.0
            elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [7, 7]:
                scale = 49.0 * 21.0 / 1024.0
            elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [14, 14]:
                scale = 196.0 * 21.0 / 4096.0
            else:
                rec = self.node.node_attr(
                    self.node.op.AttrName.KERNEL)[0] * self.node.node_attr(
                        self.node.op.AttrName.KERNEL)[1]
                max_factor = math.ceil(math.log(rec * 128, 2))
                diff = 1.0
                multi_factor = 0.0
                shift_factor = 0.0
                for shift_factor_ in range(max_factor):
                    factor = round((2**shift_factor_) / rec)
                    diff_ = abs(factor / (2**shift_factor_) - 1 / rec)
                    if diff_ < diff:
                        multi_factor = factor
                        diff = diff_
                        shift_factor = shift_factor_
                scale = rec * multi_factor / (2**shift_factor)

            output = output * scale

        output = quantize_tensors([output], self.node)[0]

        return output
Esempio n. 7
0
 def forward(self, input, other, alpha=1):
   [qinput, qother] = quantize_tensors(
       [input, other],
       self.node,
       tensor_type='input')
   output = torch.sub(input=qinput, other=qother, alpha=alpha)
   output = quantize_tensors([output], self.node)[0]
   return output
Esempio n. 8
0
                def forward(self, input, *args, **kwargs):
                    input = quantize_tensors([input],
                                             self.node,
                                             tensor_type='input')[0]

                    output = getattr(input, self.op_type, None)(*args,
                                                                **kwargs)

                    output = quantize_tensors([output], self.node)[0]

                    return output
Esempio n. 9
0
    def forward(self, input, dim, keepdim):
        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]
        output = torch.mean(qinput, dim, keepdim)
        if len(self.node.node_attr(self.node.op.AttrName.DIMS)) == 1:
            scale = calculate_op_scale(
                self.node.in_tensors[0].shape[self.node.node_attr(
                    self.node.op.AttrName.DIMS)[0]], self.node)
            output = output * scale

        output = quantize_tensors([output], self.node)[0]

        return output
Esempio n. 10
0
  def forward(self,
              input,
              size=None,
              scale_factor=None,
              mode='nearest',
              align_corners=None):
    qinput = quantize_tensors([input], self.node, tensor_type='input')[0]

    output = torch.nn.functional.interpolate(qinput, size, scale_factor, mode,
                                             align_corners)

    output = quantize_tensors([output], self.node)[0]

    return output
Esempio n. 11
0
    def forward(self, input):
        if self.quant_mode is None or NndctOption.nndct_quant_off.value:
            return torch.div(F.relu6(torch.add(input, 3.)), 6.)
        else:
            qinput = quantize_tensors([input], self.node,
                                      tensor_type='input')[0]
            output = F.relu6(torch.add(qinput, 3.))

            # scale to DPU accuracy
            scale = 2731.0 / 16384.0
            output = output * scale

            output = quantize_tensors([output], self.node)[0]

            return output
Esempio n. 12
0
    def forward(self, input, dim, start, end, step):
        size = input.size()
        break_symbol = ':'
        symbols = ""
        start_symbol = []
        end_symbol = []
        step_symbol = []
        for i in range(dim[0]):
            start_symbol.append(str(0))
            end_symbol.append(str(int(size[i])))
            step_symbol.append(str(1))

        for i in range(len(start)):
            start_symbol.append(str(start[i]))
            end_symbol.append(str(end[i]))
            step_symbol.append(str(step[i]))

        for i in range(len(start_symbol)):
            slice_symbol = break_symbol.join(
                [start_symbol[i], end_symbol[i], step_symbol[i]])
            if i > 0:
                symbols += "," + slice_symbol
            else:
                symbols = slice_symbol

        eval_str = f"input[{symbols}]"
        output = eval(eval_str)
        output = quantize_tensors([output], self.node)[0]
        return output
Esempio n. 13
0
    def forward(self, input: torch.Tensor,
                channel_max: Union[torch.Tensor, Sequence[Any], float]):
        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]

        if isinstance(channel_max, (list, tuple)):
            channel_max = torch.Tensor(channel_max).to(input.device)
        elif isinstance(channel_max, float):
            channel_max = torch.Tensor([channel_max]).to(input.device)
        if self.node.in_quant_part:
            channel_max = quant_reluk_params(self.node, channel_max)

        output = F.relu(input) - F.relu(qinput - channel_max)

        if self.node.in_quant_part:
            output = quantize_tensors([output], self.node)[0]

        return output
Esempio n. 14
0
    def forward(self, input: torch.Tensor,
                channel_scale: Union[torch.Tensor, Sequence[Any], float]):
        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]

        if isinstance(channel_scale, (list, tuple)):
            channel_scale = torch.Tensor(channel_scale).to(input.device)
        elif isinstance(channel_scale, float):
            channel_scale = torch.Tensor([channel_scale]).to(input.device)
        '''
    if self.node.in_quant_part:
      channel_scale = quant_channel_scale_params(self.node, channel_scale)
    '''
        output = qinput * channel_scale

        if self.node.in_quant_part:
            output = quantize_tensors([output], self.node)[0]

        return output
Esempio n. 15
0
    def forward(self, input):
        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]

        output = qinput
        if NndctOption.nndct_stat.value > 2:
            print('Channel number of input data: {}'.format(output.shape[1]))
            print('Input data histogram: {}'.format(
                output.histc(bins=10).cpu().detach().numpy()))
            print(
                'Network input channel-wise statistic [Min, Max, Mean, Std]:')
            t = output.transpose(0, 1)
            for c in range(t.shape[0]):
                print('[{}, {}, {}, {}]'.format(t[c].min(), t[c].max(),
                                                t[c].mean(), t[c].std()))
                print('histogram: {}'.format(
                    t[c].histc(bins=10).cpu().detach().numpy()))

        output = quantize_tensors([output], self.node)[0]
        return output
Esempio n. 16
0
    def forward(self, input):
        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]
        output = super().forward(qinput)

        # scale to DPU accuracy
        if NndctOption.nndct_avg_pool_approximate.value:
            scale = 1.0
            if self.node.node_attr(self.node.op.AttrName.KERNEL) == [3, 3]:
                scale = 9.0 * 7.0 / 64.0
            elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [5, 5]:
                scale = 25.0 * 10.0 / 256.0
            elif self.node.node_attr(self.node.op.AttrName.KERNEL) in [[6, 6],
                                                                       [3, 6],
                                                                       [6, 3]]:
                scale = 36.0 * 7.0 / 256.0
            elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [7, 7]:
                scale = 49.0 * 21.0 / 1024.0
            elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [14, 14]:
                scale = 196.0 * 21.0 / 4096.0
            else:
                rec = self.node.node_attr(
                    self.node.op.AttrName.KERNEL)[0] * self.node.node_attr(
                        self.node.op.AttrName.KERNEL)[1]
                max_factor = math.ceil(math.log(rec * 128, 2))
                diff = 1.0
                multi_factor = 0.0
                shift_factor = 0.0
                for shift_factor_ in range(max_factor):
                    factor = round((2**shift_factor_) / rec)
                    diff_ = abs(factor / (2**shift_factor_) - 1 / rec)
                    if diff_ < diff:
                        multi_factor = factor
                        diff = diff_
                        shift_factor = shift_factor_
                scale = rec * multi_factor / (2**shift_factor)

            output = output * scale

        output = quantize_tensors([output], self.node)[0]

        return output
Esempio n. 17
0
                def forward(self, *args, **kwargs):
                    # quantize input tensor
                    configer = NndctGraphHolder()
                    qinputs = quantize_tensors(list(args),
                                               self.node,
                                               tensor_type='input')[0]
                    if (configer.node_quantizable_with_params(self.node)):
                        qparams = []
                        inplace = (NndctOption.nndct_quant_off.value
                                   or self.quantizer is not None
                                   and self.quantizer.inplace)
                        # quantize weights/scale and bias for batch norm
                        if not configer.is_conv_like(
                                self.node) or self.node.node_attr(
                                    self.node.op.AttrName.BIAS_TERM):
                            param_names = self.params_name[:2]
                            params = [self.weight, self.bias]
                        else:
                            param_names = [self.params_name[0]]
                            params = [self.weight]
                        if not self.param_quantized:
                            if inplace:
                                _ = quantize_tensors(params,
                                                     self.node,
                                                     tensor_names=param_names,
                                                     tensor_type='param')
                                qparams = [p for p in params]
                            else:
                                qparams = quantize_tensors(
                                    params,
                                    self.node,
                                    tensor_names=param_names,
                                    tensor_type='param')
                            self.param_quantized = True
                        else:
                            qparams = [p for p in params]

                    output = super().forward(*args, **kwargs)
                    output = quantize_tensors([output], self.node)[0]
                    return output
Esempio n. 18
0
    def forward(self, input):

        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]

        if NndctOption.nndct_quant_off.value or NndctOption.nndct_cv_app.value:
            output = super().forward(qinput)
            output = quantize_tensors([output], self.node)[0]
        elif self.quant_mode > 0:
            output = torch.empty_like(qinput)
            if NndctOption.nndct_tanh_sigmoid_sim.value > 0:
                NndctTanhSimulation(input, output)
                output = quantize_tensors([output], self.node)[0]
            else:
                input_name = self.node.in_nodes[0]
                fragpos = self.quantizer.get_quant_config(input_name, False)[1]
                quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
                Ttable = TANH_TABLE.table.to(quant_device)
                output = output.to(quant_device)
                NndctTanhTableLookup(input, Ttable, output, fragpos)
        else:
            output = super().forward(qinput)

        return output
Esempio n. 19
0
            def forward(self, *args):

                inputs = []

                def collect_inputs(inputs, value):
                    if isinstance(value, torch.Tensor):
                        inputs.append(value)
                    elif isinstance(value, (tuple, list)):
                        for i in value:
                            collect_inputs(inputs, i)

                for v in args:
                    collect_inputs(inputs, v)

                inputs = quantize_tensors(inputs,
                                          self.node,
                                          tensor_type='input')

                caller_map = GLOBAL_MAP.get_ele(NNDCT_KEYS.NODE_CALLER_MAP)
                output = caller_map[self.node.name](*args)

                output = quantize_tensors([output], self.node)[0]

                return output
Esempio n. 20
0
    def forward(self, input):
        if self.quant_mode is None or NndctOption.nndct_quant_off.value:
            return torch.mul(input, torch.div(F.relu6(torch.add(input, 3.)),
                                              6.))
        else:
            qinput = quantize_tensors([input], self.node,
                                      tensor_type='input')[0]
            output = F.relu6(torch.add(qinput, 3.))

            # scale to DPU accuracy
            scale = 2731.0 / 16384.0
            output = output * scale

            output = fake_quantize_per_tensor(output,
                                              scale_inv=128,
                                              zero_point=0,
                                              quant_min=-128,
                                              quant_max=127)

            output = torch.mul(qinput, output)

            output = quantize_tensors([output], self.node)[0]

            return output
Esempio n. 21
0
 def forward(self, input, index):
     if isinstance(index, (list, tuple)):
         break_symbol = ':'
         symbols = ""
         for i in range(len(index)):
             if index[i] == None:
                 slice_symbol = break_symbol
             else:
                 slice_symbol = "index[" + str(i) + "]"
             if i > 0:
                 symbols += "," + slice_symbol
             else:
                 symbols = slice_symbol
         eval_str = f"input[{symbols}]"
         output = eval(eval_str)
         output = quantize_tensors([output], self.node)[0]
     else:
         output = input[index]
     return output
Esempio n. 22
0
  def forward(self, input):
    # backup bias for bias correction feature
    if (not self.param_saved):
      if NndctOption.nndct_param_corr.value > 0:
        # backup orignal float parameters
        if self.quant_mode == 1:
          self.weight_bak = self.weight.detach().clone()
          if self.bias is not None:
            self.bias_bak = self.bias.detach().clone()
        # adjust bias
        if self.quant_mode == 2 and self.bias is not None:
          if self.node.name not in self.quantizer.bias_corr.keys():
            NndctScreenLogger().error(f"Bias correction file in quantization result directory does not match current model.")
            exit(2)
          self.bias.data = torch.sub(self.bias.data, torch.tensor(
              self.quantizer.bias_corr[self.node.name],
              device=self.bias.data.device))
      self.param_saved = True

    # quantize parameters
    qweight = None
    qbias = None
    inplace = (NndctOption.nndct_quant_off.value or 
        self.quantizer is not None and self.quantizer.inplace)
    if (not self.param_quantized):
      if inplace:
        _ = quantize_tensors(
            [self.weight],
            self.node,
            tensor_names = [self.params_name[0]],
            tensor_type = 'param')[0]
        qweight = self.weight
        if self.bias is not None:
          _ = quantize_tensors(
              [self.bias],
              self.node,
              tensor_names = [self.params_name[1]],
              tensor_type = 'param')[0]
          qbias = self.bias
      else:
        qweight = quantize_tensors(
            [self.weight],
            self.node,
            tensor_names = [self.params_name[0]],
            tensor_type = 'param')[0]
        if self.bias is not None:
          qbias = quantize_tensors(
              [self.bias],
              self.node,
              tensor_names = [self.params_name[1]],
              tensor_type = 'param')[0]
      self.param_quantized = True
    else:
      qweight = self.weight
      qbias = self.bias


    # quantize input tensor
    qinput = quantize_tensors([input], self.node, tensor_type='input')[0]
    # split linear to mul and add operations
    if (self.quant_mode == 2 and self.quantizer.is_lstm):
      # i * w
      output = torch.matmul(qinput, torch.transpose(qweight, 0, 1))
      output = self.quantizer.do_quantize(output, self.node.name, self.node, tensor_type='output')
      # i*w + bias
      if self.bias is not None:
        output = torch.add(output, qbias)
    else:
      output = torch.nn.functional.linear(qinput, qweight, qbias)
    output = quantize_tensors([output], self.node)[0]

    if NndctOption.nndct_param_corr.value > 0:
      #rate = NndctOption.nndct_param_corr_rate.value
      # statistic of quantization error
      if (self.quant_mode == 1 and not self.stop):
        res_f = torch.matmul(input, torch.transpose(self.weight_bak, 0, 1))
        if self.bias is not None:
          res_f = torch.add(res_f, self.bias_bak)
        error, rate, self.stop, self.efficency, self.deviation = eval_qnoise(
                            output, 
                            res_f, 
                            self.efficency, 
                            self.deviation, 
                            self.rate, 
                            self.stop)
        if (not self.stop) and (self.bias is not None):
          if error.dim() == 3:
            error = error.mean(dim = [0, 1])
          else:
            error = error.mean(dim = 0)
          self.bias.data = torch.sub(self.bias.data, error, alpha=rate)
        self.param_quantized = False

    return output
Esempio n. 23
0
 def forward(self, *args):
     caller_map = GLOBAL_MAP.get_ele(NNDCT_KEYS.NODE_CALLER_MAP)
     output = caller_map[self.node.name](*args)
     output = quantize_tensors([output], self.node)[0]
     return output
Esempio n. 24
0
    def forward(self, input):
        params = [self.weight, self.bias]
        param_names = self.params_name[:2]

        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]

        if (not self.param_quantized):
            inplace = (NndctOption.nndct_quant_off.value or
                       self.quantizer is not None and self.quantizer.inplace)
            # quantize weights and bias
            if inplace:
                _ = quantize_tensors(params,
                                     self.node,
                                     tensor_names=param_names,
                                     tensor_type='param')
                qparams = [p for p in params]
            else:
                qparams = quantize_tensors(params,
                                           self.node,
                                           tensor_names=param_names,
                                           tensor_type='param')
            self.param_quantized = True
        else:
            qparams = [p for p in params]

        if self.momentum is None:
            exponential_average_factor = 0.0
        else:
            exponential_average_factor = self.momentum

        if self.training and self.track_running_stats:
            # TODO: if statement only here to tell the jit to skip emitting this when it is None
            if self.num_batches_tracked is not None:  # type: ignore[has-type]
                self.num_batches_tracked = self.num_batches_tracked + 1  # type: ignore[has-type]
                if self.momentum is None:  # use cumulative moving average
                    exponential_average_factor = 1.0 / float(
                        self.num_batches_tracked)
                else:  # use exponential moving average
                    exponential_average_factor = self.momentum

        if self.training:
            bn_training = True
        else:
            bn_training = (self.running_mean is None) and (self.running_var is
                                                           None)

        output = torch.nn.functional.batch_norm(
            qinput,
            # If buffers are not to be tracked, ensure that they won't be updated
            self.running_mean
            if not self.training or self.track_running_stats else None,
            self.running_var
            if not self.training or self.track_running_stats else None,
            qparams[0],
            qparams[1],
            bn_training,
            exponential_average_factor,
            self.eps,
        )

        # quantize output
        output = quantize_tensors([output], self.node)[0]
        return output
Esempio n. 25
0
    def forward(self, input, size):
        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]
        output = qinput.expand(size).clone()
        output = quantize_tensors([output], self.node)[0]

        return output
Esempio n. 26
0
    def forward(self, input):
        if self.quant_mode > 0:
            if self.node.in_quant_part:
                if NndctOption.nndct_softmax_sim.value == 1:
                    # Method 1: Hardware PL Softmax
                    qinput = quantize_tensors([input],
                                              self.node,
                                              tensor_type='input')[0]

                    x_max = torch.max(qinput, dim=self.dim,
                                      keepdim=True).values
                    Exp_sum_appr = 0.0
                    softmax_sum = 0.0
                    softmax_appr_sum = 0.0

                    uvi = 47274 / math.pow(2, 15) * (qinput - x_max)
                    exp_appr = torch.empty_like(uvi)
                    NndctSoftmaxExpApproximate(uvi, exp_appr)

                    exp_appr = torch.round(exp_appr * 10**5)
                    exp_appr = exp_appr / (10**5)
                    Exp_sum_appr = torch.sum(exp_appr,
                                             dim=self.dim,
                                             keepdim=True)

                    F = Exp_sum_appr
                    w = torch.empty_like(F)
                    NndctSoftmaxLOD(F, w)
                    m = F / (2**w)

                    lnF = torch.round(
                        (22713 / (2**15)) * (m - 1 + w) * 10**5) / 10**5
                    uvi = 47274 / (2**15) * (qinput - x_max - lnF)
                    exp_appr = torch.empty_like(uvi)
                    NndctSoftmaxExpApproximate(uvi, exp_appr)
                    exp_appr = torch.round(exp_appr * 10**5) / 10**5
                    output = exp_appr

                    output = quantize_tensors([output], self.node)[0]

                elif NndctOption.nndct_softmax_sim.value == 2:
                    # Method 2: Hardware PL Softmax
                    qinput = quantize_tensors([input],
                                              self.node,
                                              tensor_type='input')[0]
                    x_max = torch.max(qinput, dim=self.dim,
                                      keepdim=True).values
                    qinput = qinput - x_max

                    exp_appr = torch.empty_like(qinput)
                    NndctSoftmaxSimulationPart1(qinput, exp_appr)
                    sum = torch.sum(exp_appr, dim=self.dim, keepdim=True)

                    sum1 = torch.empty_like(sum)
                    NndctSoftmaxSimulationPart2(sum, sum1)
                    output = (exp_appr * sum1).bfloat16().float()
                    output = quantize_tensors([output], self.node)[0]
                else:
                    output = super().forward(input)
            else:
                output = super().forward(input)
        else:
            output = super().forward(input)
        return output
Esempio n. 27
0
    def forward(self, input):
        # backup bias for bias correction feature
        if (not self.param_saved):
            if NndctOption.nndct_param_corr.value > 0:
                # backup orignal float parameters
                if self.quant_mode == 1:
                    self.weight_bak = self.weight.detach().clone()
                    if self.bias is not None:
                        self.bias_bak = self.bias.detach().clone()
                # adjust bias
                if self.quant_mode == 2 and self.bias is not None:
                    if self.node.name not in self.quantizer.bias_corr.keys():
                        NndctScreenLogger().error(
                            f"Bias correction file in quantization result directory does not match current model."
                        )
                        exit(2)
                    self.bias.data = torch.sub(
                        self.bias.data,
                        torch.tensor(self.quantizer.bias_corr[self.node.name],
                                     device=self.bias.data.device))
            self.param_saved = True

        # quantize parameters
        qweight = None
        qbias = None
        inplace = (NndctOption.nndct_quant_off.value
                   or self.quantizer is not None and self.quantizer.inplace)
        if (not self.param_quantized):
            if inplace:
                _ = quantize_tensors([self.weight],
                                     self.node,
                                     tensor_names=[self.params_name[0]],
                                     tensor_type='param')[0]
                qweight = self.weight
                if self.bias is not None:
                    _ = quantize_tensors([self.bias],
                                         self.node,
                                         tensor_names=[self.params_name[1]],
                                         tensor_type='param')[0]
                    qbias = self.bias
            else:
                qweight = quantize_tensors([self.weight],
                                           self.node,
                                           tensor_names=[self.params_name[0]],
                                           tensor_type='param')[0]
                if self.bias is not None:
                    qbias = quantize_tensors(
                        [self.bias],
                        self.node,
                        tensor_names=[self.params_name[1]],
                        tensor_type='param')[0]
            self.param_quantized = True
        else:
            qweight = self.weight
            qbias = self.bias

        # quantize input tensor
        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]
        output = torch.nn.functional.conv1d(qinput,
                                            weight=qweight,
                                            bias=qbias,
                                            stride=self.stride,
                                            padding=self.padding,
                                            dilation=self.dilation,
                                            groups=self.groups)
        output = quantize_tensors([output], self.node)[0]

        # correct weights and bias in calibation
        if NndctOption.nndct_param_corr.value > 0:
            #rate = NndctOption.nndct_param_corr_rate.value
            # statistic of quantization error
            if (self.quant_mode == 1 and not self.stop):
                res_f = torch.nn.functional.conv1d(input,
                                                   self.weight_bak,
                                                   bias=self.bias_bak,
                                                   stride=self.stride,
                                                   padding=self.padding,
                                                   dilation=self.dilation,
                                                   groups=self.groups)
                error, rate, self.stop, self.efficency, self.deviation = eval_qnoise(
                    output, res_f, self.efficency, self.deviation, self.rate,
                    self.stop)
                if (not self.stop) and (self.bias is not None):
                    error = error.mean(dim=[0, 1, 2])
                    self.bias.data = torch.sub(self.bias.data,
                                               error,
                                               alpha=rate)
                self.param_quantized = False

        return output
Esempio n. 28
0
 def forward(self, input, source, dim, index):
     index = torch.tensor([index]).to(input.device)
     output = input.index_copy_(dim, index, source.unsqueeze(dim))
     output = quantize_tensors([output], self.node)[0]
     return output
Esempio n. 29
0
 def forward(self, input):
   qinput = quantize_tensors([input], self.node, tensor_type='input')[0]
   output = super().forward(qinput)
   output = quantize_tensors([output], self.node)[0]
   return output
Esempio n. 30
0
  def forward(self, tensors, dim):
    qinputs = quantize_tensors(tensors, self.node, tensor_type='input')
    output = torch.cat(qinputs, dim)
    output = quantize_tensors([output], self.node)[0]

    return output