Beispiel #1
0
    def execute(cls, params, in_tensors, qrec: QuantizationRecordBase,
                **kwargs):
        details = kwargs.get('details')
        # qrec is set by default to Float32QuantizationRecord if None
        if qrec is None or isinstance(qrec, Float32QuantizationRecord):
            qrec = Float32ScalableFilterQuantizationRecord()

        in_dims = params.in_dims[0]
        out_dims = params.out_dims[0]
        weights = qrec.prepare_weights(params,
                                       params.get_uncompressed_weights(),
                                       ktype="float32")
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0]

        if details is not None:
            details['min_acc'] = float("Infinity")
            details['max_acc'] = float("-Infinity")

        if params.has_bias:
            biases = qrec.prepare_biases(params,
                                         params.get_uncompressed_biases(),
                                         params.get_uncompressed_weights(),
                                         ktype="float32")
            acc_tensor = np.ones(out_dims.shape, dtype=np.float32) * biases
        else:
            acc_tensor = np.zeros(out_dims.shape, dtype=np.float32)

        in_tensor = in_tensor.reshape((in_dims.size()))
        filt = params.filter.get_filter_dims()
        for out_c in range(out_dims.c):
            # Expand and normalize the accumulator

            w_slice = weights[filt.srange(out_c=out_c)].reshape(
                (in_dims.size()))

            res = np.dot(in_tensor, w_slice)

            if details is not None:
                details['min_acc'] = min(np.sum(res[res < 0]),
                                         details['min_acc'])
                details['max_acc'] = min(np.sum(res[res > 0]),
                                         details['max_acc'])

            acc_tensor[out_c] += res

            if details is not None:
                details['min_acc'] = min(np.min(acc_tensor[out_c]),
                                         details['min_acc'])
                details['max_acc'] = max(np.max(acc_tensor[out_c]),
                                         details['max_acc'])

        acc_tensor = qrec.apply_multiplicative_bias(params,
                                                    acc_tensor,
                                                    0,
                                                    ktype="float32")

        return qrec.get_outputs(params, [acc_tensor], ktype="float32")
Beispiel #2
0
    def execute(cls, params,
                in_tensors,
                qrec: QuantizationRecordBase,
                **kwargs):
        details = kwargs.get('details')

        in_dims = params.in_dims[0]
        out_dims = params.out_dims[0]
        weights = qrec.prepare_weights(params, params.weights, ktype="symmetric")
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]

        if details is not None:
            details['min_acc'] = float("Infinity")
            details['max_acc'] = float("-Infinity")

        if params.has_bias:
            biases = qrec.prepare_biases(params,
                                        params.biases,
                                        params.weights,
                                        ktype="symmetric")
            acc_tensor = np.ones(biases.shape, dtype=qrec.acc_q.dtype) * biases
            if qrec.acc_q != qrec.biases_q:
                acc_tensor = qrec.acc_q.expand_from(acc_tensor, qrec.biases_q)
        else:
            acc_tensor = np.zeros(out_dims.shape,
                                dtype=qrec.acc_q.dtype)

        # force the bit dimension of the input tensor to the bit width of the calc
        # so that the dot product occurs in this precision
        in_tensor = in_tensor.astype(qrec.calc_q.dtype)

        in_tensor = in_tensor.reshape((in_dims.size()))
        filt = params.filter.get_filter_dims()
        for out_c in range(out_dims.c):
            # Expand and normalize the accumulator
            if qrec.calc_q != qrec.acc_q:
                acc_tensor = qrec.calc_q.expand_from(acc_tensor, qrec.acc_q)

            w_slice = weights[filt.srange(out_c=out_c)].reshape((in_dims.size()))

            res = np.dot(in_tensor, w_slice)

            if details is not None:
                details['min_acc'] = min(np.sum(res[res < 0]), details['min_acc'])
                details['max_acc'] = min(np.sum(res[res > 0]), details['max_acc'])

            acc_tensor[out_c] += res

            if qrec.calc_q != qrec.acc_q:
                acc_tensor = qrec.acc_q.reduce_from(acc_tensor, qrec.calc_q)

            if details is not None:
                details['min_acc'] = min(np.min(acc_tensor[out_c]), details['min_acc'])
                details['max_acc'] = max(np.max(acc_tensor[out_c]), details['max_acc'])

        # details['acc_before'] = acc_tensor.copy()
        acc_tensor = qrec.apply_multiplicative_bias(
            params, acc_tensor, 0, ktype="symmetric")
        # details['acc_after'] = acc_tensor.copy()

        out_q = qrec.out_qs[0]

        if qrec and out_q != qrec.acc_q:
            acc_tensor = out_q.reduce_from(acc_tensor, qrec.acc_q)

        return qrec.get_outputs(params, [acc_tensor], ktype="symmetric")
Beispiel #3
0
    def execute(cls, params, in_tensors, qrec: QuantizationRecordBase,
                **kwargs):
        '''3D convolution by sub-matrix summing.
        '''
        details = kwargs.get('details')
        # qrec is set by default to Float32QuantizationRecord if None
        if qrec is None or isinstance(qrec, Float32QuantizationRecord):
            qrec = Float32ScalableFilterQuantizationRecord()
        in_dims = params.in_dims[0]
        out_dims = params.out_dims[0]
        weights = qrec.prepare_weights(params,
                                       params.get_uncompressed_weights(),
                                       ktype="float32")
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0]

        if details is not None:
            details['min_acc'] = float("Infinity")
            details['max_acc'] = float("-Infinity")
            details['pre_mul_bias_min'] = float("Infinity")
            details['pre_mul_bias_max'] = float("-Infinity")

        in_tensor = in_tensor.transpose(
            in_dims.transpose_to_order(['h', 'w', 'c'])).astype(np.float32)
        if params.padding.h + params.padding.w > 0:
            in_tensor = np.pad(in_tensor,
                               ([params.padding.t, params.padding.b
                                 ], [params.padding.l, params.padding.r]) +
                               ([0, 0], ) * (np.ndim(in_tensor) - 2),
                               mode='constant',
                               constant_values=0.0)
            pad_w = params.padding.w
            pad_h = params.padding.h
        else:
            pad_w = pad_h = 0

        weights = weights.transpose(
            params.filter.transpose_to_order(['out_c', 'h', 'w',
                                              'in_c'])).astype(np.float32)

        filt_w = params.filter.w
        filt_h = params.filter.h

        in_w = in_dims.w
        in_h = in_dims.h
        out_c = params.filter.out_c

        in_c_per_group = in_dims.c // params.groups
        out_c_per_group = out_c // params.groups
        in_c_off = 0
        out_c_cnt = 0

        dillated_filter_w = (params.dilation.w - 1) * (filt_w - 1) + filt_w
        dillated_filter_h = (params.dilation.h - 1) * (filt_h - 1) + filt_h

        out_w = ((in_w - dillated_filter_w + pad_w)) // params.stride.w + 1
        out_h = ((in_h - dillated_filter_h + pad_h)) // params.stride.h + 1

        if params.has_bias:
            biases = qrec.prepare_biases(params,
                                         params.get_uncompressed_biases(),
                                         params.get_uncompressed_weights(),
                                         ktype="float32")
            result = np.ones(
                (out_c, out_h, out_w), dtype=np.float32) * biases.reshape(
                    out_c, 1, 1)
        else:
            result = np.zeros((out_c, out_h, out_w), dtype=np.float32)

        const_h = pad_h + in_h - dillated_filter_h + 1
        const_w = pad_w + in_w - dillated_filter_w + 1
        for out_c_i in range(out_dims.c):
            for cur_h in range(filt_h):
                for cur_w in range(filt_w):

                    # selects all elements that the filter element needs to multiply
                    slabhw = np.multiply(
                        in_tensor[cur_h * params.dilation.h:const_h +
                                  cur_h * params.dilation.h:params.stride.h,
                                  cur_w * params.dilation.w:const_w +
                                  cur_w * params.dilation.w:params.stride.w,
                                  in_c_off:in_c_off + in_c_per_group:1],
                        weights[out_c_i, cur_h, cur_w],
                        dtype=np.float32)

                    # add depthwise
                    slabhw = slabhw.sum(axis=-1)
                    # add to the previous filter elements
                    result[out_c_i] += slabhw

                    if details is not None:
                        details['min_acc'] = min(np.min(result[out_c_i]),
                                                 details['min_acc'])
                        details['max_acc'] = max(np.max(result[out_c_i]),
                                                 details['max_acc'])

            out_c_cnt += 1
            if out_c_cnt >= out_c_per_group:
                out_c_cnt = 0
                in_c_off += in_c_per_group

        if details is not None:
            details['pre_mul_bias_min'] = min(np.min(result),
                                              details['pre_mul_bias_min'])
            details['pre_mul_bias_max'] = max(np.max(result),
                                              details['pre_mul_bias_max'])

        result = qrec.apply_multiplicative_bias(params,
                                                result,
                                                axis=0,
                                                ktype="float32")

        result = result.transpose(
            out_dims.transpose_from_order(['c', 'h', 'w']))

        return qrec.get_outputs(params, [result], ktype="float32")
Beispiel #4
0
    def execute(cls, params, in_tensors, qrec: QuantizationRecordBase,
                **kwargs):
        '''3D convolution by sub-matrix summing.
        '''
        details = kwargs.get('details')
        in_dims = params.in_dims[0]
        out_dims = params.out_dims[0]
        weights = qrec.prepare_weights(params,
                                       params.weights,
                                       ktype="symmetric")
        in_tensor = qrec.prepare_inputs(params, in_tensors,
                                        ktype="symmetric")[0]

        if details is not None:
            details['min_acc'] = float("Infinity")
            details['max_acc'] = float("-Infinity")

        in_tensor = in_tensor.transpose(
            in_dims.transpose_to_order(['h', 'w', 'c']))
        if params.padding.h + params.padding.w > 0:
            if hasattr(qrec.in_qs[0], 'zero_point'):
                const_pad = qrec.in_qs[0].zero_point[0]
            else:
                const_pad = 0
            in_tensor = np.pad(in_tensor,
                               ([params.padding.t, params.padding.b
                                 ], [params.padding.l, params.padding.r]) +
                               ([0, 0], ) * (np.ndim(in_tensor) - 2),
                               mode='constant',
                               constant_values=const_pad)
            pad_w = params.padding.w
            pad_h = params.padding.h
        else:
            pad_w = pad_h = 0

        weights = weights.transpose(
            params.filter.transpose_to_order(['out_c', 'h', 'w', 'in_c']))

        filt_w = params.filter.w
        filt_h = params.filter.h

        in_w = in_dims.w
        in_h = in_dims.h
        out_c = params.filter.out_c

        in_c_per_group = in_dims.c // params.groups
        out_c_per_group = out_c // params.groups
        in_c_off = 0
        out_c_cnt = 0

        dillated_filter_w = (params.dilation.w - 1) * (filt_w - 1) + filt_w
        dillated_filter_h = (params.dilation.h - 1) * (filt_h - 1) + filt_h

        out_w = ((in_w - dillated_filter_w + pad_w)) // params.stride.w + 1
        out_h = ((in_h - dillated_filter_h + pad_h)) // params.stride.h + 1

        if params.has_bias:
            biases = qrec.prepare_biases(params,
                                         params.biases,
                                         params.weights,
                                         ktype="symmetric")
            if qrec.acc_q != qrec.biases_q:
                biases = qrec.acc_q.expand_from(biases, qrec.biases_q)
            result = np.ones(
                (out_c, out_h, out_w),
                dtype=qrec.acc_q.dtype) * biases.reshape(out_c, 1, 1)
        else:
            result = np.zeros((out_c, out_h, out_w), dtype=qrec.acc_q.dtype)

        const_h = pad_h + in_h - dillated_filter_h + 1
        const_w = pad_w + in_w - dillated_filter_w + 1
        if FORCE_INT64:
            result = result.astype(np.int64)
        for out_c_i in range(out_dims.c):
            for cur_h in range(filt_h):
                for cur_w in range(filt_w):

                    # selects all elements that the filter element needs to multiply
                    slabhw = np.multiply(
                        in_tensor[cur_h * params.dilation.h:const_h +
                                  cur_h * params.dilation.h:params.stride.h,
                                  cur_w * params.dilation.w:const_w +
                                  cur_w * params.dilation.w:params.stride.w,
                                  in_c_off:in_c_off + in_c_per_group:1],
                        weights[out_c_i, cur_h, cur_w],
                        dtype=np.int64 if FORCE_INT64 else qrec.calc_q.dtype)

                    if qrec.calc_q != qrec.acc_q:
                        slabhw = qrec.acc_q.reduce_from(slabhw, qrec.calc_q)

                    # add depthwise
                    slabhw = slabhw.sum(
                        axis=-1,
                        dtype=np.int64 if FORCE_INT64 else qrec.calc_q.dtype)
                    # add to the previous filter elements
                    result[out_c_i] += slabhw

                    if details is not None:
                        details['min_acc'] = min(np.min(result[out_c_i]),
                                                 details['min_acc'])
                        details['max_acc'] = max(np.max(result[out_c_i]),
                                                 details['max_acc'])

            out_c_cnt += 1
            if out_c_cnt >= out_c_per_group:
                out_c_cnt = 0
                in_c_off += in_c_per_group

        result = qrec.apply_multiplicative_bias(params,
                                                result,
                                                0,
                                                ktype="symmetric")

        result = result.transpose(
            out_dims.transpose_from_order(['c', 'h', 'w']))

        if qrec.out_qs[0] != qrec.acc_q:
            result = qrec.out_qs[0].reduce_from(result, qrec.acc_q)

        return qrec.get_outputs(params, [result], ktype="symmetric")