Beispiel #1
0
    def __init__(self,
                 min_init=-6,
                 max_init=6,
                 num_bits=8,
                 ema=False,
                 ema_decay=0.999,
                 per_channel=False,
                 out_channels=1,
                 quant_delay=0,
                 symmetric=False,
                 narrow_range=False):
        """init FakeQuantWithMinMax layer"""
        super(FakeQuantWithMinMax, self).__init__()

        self.min_init = min_init
        self.num_bits = num_bits
        self.max_init = max_init
        self.ema = ema
        self.ema_decay = ema_decay
        self.per_channel = per_channel
        self.out_channels = out_channels
        self.quant_delay = quant_delay
        self.symmetric = symmetric
        self.narrow_range = narrow_range

        if per_channel:
            min_array = np.array([
                self.min_init for i in range(0, self.out_channels)
            ]).astype(np.float32)
            max_array = np.array([
                self.max_init for i in range(0, self.channel_size)
            ]).astype(np.float32)
            self.minq = Parameter(Tensor(min_array),
                                  name='quant_min',
                                  requires_grad=False)
            self.maxq = Parameter(Tensor(max_array),
                                  name='quant_max',
                                  requires_grad=False)
            self.fake_quant_train = P.FakeQuantWithMinMaxPerChannel(
                num_bits=self.num_bits,
                ema=self.ema,
                ema_decay=self.ema_decay,
                quant_delay=self.quant_delay,
                symmetric=self.symmetric,
                narrow_range=self.narrow_range,
                training=True)
            self.fake_quant_infer = P.FakeQuantWithMinMaxPerChannel(
                num_bits=self.num_bits,
                ema=self.ema,
                ema_decay=self.ema_decay,
                quant_delay=self.quant_delay,
                symmetric=self.symmetric,
                narrow_range=self.narrow_range,
                training=False)
        else:
            min_array = np.array([min_init]).reshape(1).astype(np.float32)
            max_array = np.array([max_init]).reshape(1).astype(np.float32)
            self.minq = Parameter(Tensor(min_array),
                                  name='quant_min',
                                  requires_grad=False)
            self.maxq = Parameter(Tensor(max_array),
                                  name='quant_max',
                                  requires_grad=False)
            if context.get_context('device_target') == "Ascend":
                self.fake_quant_train = FakeQuantWithMinMaxD(
                    num_bits=self.num_bits,
                    ema=self.ema,
                    ema_decay=self.ema_decay,
                    quant_delay=self.quant_delay,
                    symmetric=self.symmetric,
                    narrow_range=self.narrow_range,
                    training=True,
                    min_init=self.minq,
                    max_init=self.maxq)
                self.fake_quant_infer = FakeQuantWithMinMaxD(
                    num_bits=self.num_bits,
                    ema=self.ema,
                    ema_decay=self.ema_decay,
                    quant_delay=self.quant_delay,
                    symmetric=self.symmetric,
                    narrow_range=self.narrow_range,
                    training=False,
                    min_init=self.minq,
                    max_init=self.maxq)
            elif context.get_context('device_target') == "GPU":
                self.fake_quant_train = P.FakeQuantWithMinMax(
                    num_bits=self.num_bits,
                    ema=self.ema,
                    ema_decay=self.ema_decay,
                    quant_delay=self.quant_delay,
                    symmetric=self.symmetric,
                    narrow_range=self.narrow_range,
                    training=True)
                self.fake_quant_infer = P.FakeQuantWithMinMax(
                    num_bits=self.num_bits,
                    ema=self.ema,
                    ema_decay=ema_decay,
                    quant_delay=quant_delay,
                    symmetric=self.symmetric,
                    narrow_range=self.narrow_range,
                    training=False)
            else:
                raise ValueError("Not support platform.")
Beispiel #2
0
    def __init__(self,
                 min_init=-6,
                 max_init=6,
                 num_bits=8,
                 ema=False,
                 ema_decay=0.999,
                 per_channel=False,
                 out_channels=1,
                 quant_delay=0,
                 symmetric=False,
                 narrow_range=False):
        super(FakeQuantWithMinMax, self).__init__()

        self.min_init = min_init
        self.num_bits = num_bits
        self.max_init = max_init
        self.ema = ema
        self.ema_decay = ema_decay
        self.per_channel = per_channel
        self.out_channels = out_channels
        self.quant_delay = quant_delay
        self.symmetric = symmetric
        self.narrow_range = narrow_range

        if per_channel:
            min_array = np.array([
                self.min_init for i in range(0, self.out_channels)
            ]).astype(np.float32)
            max_array = np.array([
                self.max_init for i in range(0, self.out_channels)
            ]).astype(np.float32)
            self.fake_quant_train = P.FakeQuantWithMinMaxPerChannel(
                num_bits=self.num_bits,
                ema=self.ema,
                ema_decay=self.ema_decay,
                quant_delay=self.quant_delay,
                symmetric=self.symmetric,
                narrow_range=self.narrow_range,
                training=True)
            self.fake_quant_infer = P.FakeQuantWithMinMaxPerChannel(
                num_bits=self.num_bits,
                ema=self.ema,
                ema_decay=self.ema_decay,
                quant_delay=self.quant_delay,
                symmetric=self.symmetric,
                narrow_range=self.narrow_range,
                training=False)
        else:
            min_array = np.array([min_init]).reshape(1).astype(np.float32)
            max_array = np.array([max_init]).reshape(1).astype(np.float32)
            self.fake_quant_train = P.FakeQuantWithMinMax(
                num_bits=self.num_bits,
                ema=self.ema,
                ema_decay=self.ema_decay,
                quant_delay=self.quant_delay,
                symmetric=self.symmetric,
                narrow_range=self.narrow_range,
                training=True)
            self.fake_quant_infer = P.FakeQuantWithMinMax(
                num_bits=self.num_bits,
                ema=self.ema,
                ema_decay=self.ema_decay,
                quant_delay=self.quant_delay,
                symmetric=self.symmetric,
                narrow_range=self.narrow_range,
                training=False)

        self.minq = Parameter(Tensor(min_array),
                              name='quant_min',
                              requires_grad=False)
        self.maxq = Parameter(Tensor(max_array),
                              name='quant_max',
                              requires_grad=False)
Beispiel #3
0
    def __init__(self,
                 min_init=-6,
                 max_init=6,
                 num_bits=8,
                 ema=False,
                 ema_decay=0.999,
                 per_channel=False,
                 channel_size=1,
                 quant_delay=0,
                 symmetric=False,
                 narrow_range=False,
                 training=True):
        """init FakeQuantWithMinMax ascend layer"""
        super(FakeQuantWithMinMaxD, self).__init__()

        self.min_init = min_init
        self.num_bits = num_bits
        self.max_init = max_init
        self.ema = ema
        self.ema_decay = ema_decay
        self.per_channel = per_channel
        self.channel_size = channel_size
        self.quant_delay = quant_delay
        self.symmetric = symmetric
        self.narrow_range = narrow_range
        self.training = training

        if not per_channel:
            self.fake_quant = P.FakeQuantWithMinMax(
                num_bits=self.num_bits,
                ema=self.ema,
                ema_decay=self.ema_decay,
                quant_delay=self.quant_delay,
                symmetric=self.symmetric,
                narrow_range=self.narrow_range,
                training=training)
            self.ema_update = P.FakeQuantWithMinMaxUpdate(
                num_bits=self.num_bits,
                ema=self.ema,
                ema_decay=self.ema_decay,
                quant_delay=self.quant_delay,
                symmetric=self.symmetric,
                narrow_range=self.narrow_range,
                training=training)
        else:
            raise RuntimeError("not support per channel")

        if isinstance(min_init, Parameter):
            self.minq = min_init
            self.maxq = max_init
        else:
            self.minq = Parameter(Tensor(
                np.array([min_init]).astype(np.float32)),
                                  name='quant_min',
                                  requires_grad=False)
            self.maxq = Parameter(Tensor(
                np.array([max_init]).astype(np.float32)),
                                  name='quant_max',
                                  requires_grad=False)
        self.reduce_min = P.ReduceMin()
        self.reduce_max = P.ReduceMax()