Esempio n. 1
0
def rgb565_rgb888(input_tensor: np.ndarray, in_dim: Dim, out_dim: Dim):
    assert in_dim.is_named and in_dim.c == 1 and out_dim.is_named and out_dim.c == 3
    input_tensor = np.repeat(input_tensor.transpose(
        in_dim.transpose_to_order(("h", "w", "c"))),
                             3,
                             axis=2)
    input_tensor[:, :, 1] = (input_tensor[:, :, 0] & (63 << 5)) >> 3
    input_tensor[:, :, 2] = (input_tensor[:, :, 0] & 31) << 3
    input_tensor[:, :, 0] = (input_tensor[:, :, 0] & (31 << 11)) >> 8
    return input_tensor.astype(np.uint8).transpose(
        out_dim.transpose_from_order(("h", "w", "c")))
Esempio n. 2
0
def prepare_acc(biases: np.array, out_dims: Dim, qrec: FilterQuantizationRecord):
    if biases is None:
        acc_tensor = zeros(out_dims.shape, qrec, 'acc_q')
    else:
        acc_tensor = zeros((out_dims.c, out_dims.h, out_dims.w), qrec, 'acc_q')
        if qrec and qrec.acc_q != qrec.biases_q:
            biases = qrec.acc_q.expand_from(biases, qrec.biases_q)
        for i in range(out_dims.c):
            acc_tensor[i, :] = biases[i]
        acc_tensor = acc_tensor.transpose(out_dims.transpose_from_order(('c', 'h', 'w')))
    return acc_tensor
Esempio n. 3
0
def from_hwc(input_tensor: np.ndarray, in_dim: Dim, out_dim: Dim):
    del in_dim
    return input_tensor.astype(np.uint8).transpose(
        out_dim.transpose_from_order(("h", "w", "c")))
Esempio n. 4
0
def faster_conv_quantized(params,
                          qrec: FilterQuantizationRecord,
                          in_dims: Dim,
                          out_dims: Dim,
                          in_tensor: np.ndarray,
                          weights: np.ndarray,
                          biases: np.ndarray,
                          details,
                          detect_overflow=True):
    '''3D convolution by sub-matrix summing.
    '''
    if details is not None:
        details['min_acc'] = float("Infinity")
        details['max_acc'] = float("-Infinity")
        details['overflow_dot'] = 0
        details['overflow_acc'] = 0

    in_tensor = in_tensor.transpose(in_dims.transpose_to_order(['h', 'w',
                                                                'c']))
    if params.padding.h + params.padding.w > 0:
        in_tensor = np.pad(in_tensor,
                           ([params.padding.t,
                             params.padding.b],
                            [params.padding.l,
                             params.padding.r])\
                               + ([0, 0], ) * (np.ndim(in_tensor)-2),
                           mode='constant',
                           constant_values=0)
        pad_w = params.padding.w
        pad_h = params.padding.h
    else:
        pad_w = pad_h = 0

    in_tensor = in_tensor.astype(qrec.calc_q.dtype)

    weights = weights.transpose(
        params.filter.transpose_to_order(['out_c', 'h', 'w', 'in_c']))

    filt_w = params.filter.w
    filt_h = params.filter.h

    in_w = in_dims.w
    in_h = in_dims.h
    out_c = params.filter.out_c

    out_w = ((in_w - filt_w + pad_w)) + 1
    out_h = ((in_h - filt_h + pad_h)) + 1
    if biases is None:
        result = np.zeros((out_c, out_h, out_w), dtype=qrec.acc_q.dtype)
    else:
        if qrec.acc_q != qrec.biases_q:
            biases = qrec.acc_q.expand_from(biases, qrec.biases_q)
        result = np.ones(
            (out_c, out_h, out_w), dtype=qrec.acc_q.dtype) * biases.reshape(
                out_c, 1, 1)

    if detect_overflow:
        result64 = result.astype(np.int64)

    const_h = pad_h + in_h - filt_h + 1
    const_w = pad_w + in_w - filt_w + 1
    for out_c_i in range(out_dims.c):
        for cur_h in range(filt_h):
            for cur_w in range(filt_w):
                if detect_overflow:
                    # selects all elements that the filter element needs to multiply
                    slabhw64 = in_tensor[cur_h:const_h + cur_h:1,
                                         cur_w:const_w + cur_w:1, ...].astype(
                                             np.int64) * weights[out_c_i,
                                                                 cur_h, cur_w]
                    if qrec.calc_q != qrec.acc_q:
                        # reduce the accumulator
                        slabhwpost = qrec.acc_q.round_normalize_clip(
                            slabhw64, qrec.calc_q, change_type=False)
                    else:
                        slabhwpost = slabhw64
                    # add depthwise
                    slabhw64sum = slabhwpost.sum(axis=-1)
                    # add to the previous filter elements
                    result64[out_c_i] += slabhw64sum

                # selects all elements that the filter element needs to multiply
                slabhw = in_tensor[cur_h:const_h + cur_h:1,
                                   cur_w:const_w + cur_w:1,
                                   ...] * weights[out_c_i, cur_h, cur_w]

                if detect_overflow:
                    if np.any(slabhw < slabhw64):
                        details['overflow_dot'] += 1

                if qrec.calc_q != qrec.acc_q:
                    # reduce the accumulator
                    slabhw = qrec.acc_q.reduce_from(slabhw, qrec.calc_q)

                # add depthwise
                slabhw = slabhw.sum(axis=-1)
                # add to the previous filter elements
                if detect_overflow:
                    acc_overflow_detected = False
                    if np.any(slabhw != slabhw64sum):
                        details['overflow_acc'] += 1
                        acc_overflow_detected = True

                result[out_c_i] += slabhw
                if detect_overflow and not acc_overflow_detected:
                    if np.any(result[out_c_i] != result64[out_c_i]):
                        details['overflow_acc'] += 1

                if details is not None:
                    details['min_acc'] = min(np.min(result[out_c_i]),
                                             details['min_acc'])
                    details['max_acc'] = max(np.max(result[out_c_i]),
                                             details['max_acc'])

    if params.stride.size() > 1:
        result = result[:, ::params.stride.h, ::params.stride.w, ...]

    if qrec.out_qs[0] != qrec.acc_q:
        result = qrec.out_qs[0].reduce_from(result, qrec.acc_q)

    return result.transpose(out_dims.transpose_from_order(['c', 'h', 'w']))
Esempio n. 5
0
def faster_conv(params, in_dims: Dim, out_dims: Dim, in_tensor: np.ndarray,
                weights: np.ndarray, biases: np.ndarray, details):
    '''3D convolution by sub-matrix summing.
    '''
    if details is not None:
        details['min_acc'] = float("Infinity")
        details['max_acc'] = float("-Infinity")

    in_tensor = in_tensor.transpose(in_dims.transpose_to_order(['h', 'w',
                                                                'c']))
    if params.padding.h + params.padding.w > 0:
        in_tensor = np.pad(in_tensor,
                           ([params.padding.t,
                             params.padding.b],
                            [params.padding.l,
                             params.padding.r])\
                               + ([0, 0], ) * (np.ndim(in_tensor)-2),
                           mode='constant',
                           constant_values=0)
        pad_w = params.padding.w
        pad_h = params.padding.h
    else:
        pad_w = pad_h = 0

    weights = weights.transpose(
        params.filter.transpose_to_order(['out_c', 'h', 'w', 'in_c']))

    filt_w = params.filter.w
    filt_h = params.filter.h

    in_w = in_dims.w
    in_h = in_dims.h
    out_c = params.filter.out_c

    out_w = ((in_w - filt_w + pad_w)) + 1
    out_h = ((in_h - filt_h + pad_h)) + 1
    if biases is None:
        result = np.zeros((out_c, out_h, out_w))
    else:
        result = np.ones((out_c, out_h, out_w)) * biases.reshape(out_c, 1, 1)

    const_h = pad_h + in_h - filt_h + 1
    const_w = pad_w + in_w - filt_w + 1
    for out_c_i in range(out_dims.c):
        for cur_h in range(filt_h):
            for cur_w in range(filt_w):
                # selects all elements that the filter element needs to multiply
                slabhw = in_tensor[cur_h:const_h + cur_h:1,
                                   cur_w:const_w + cur_w:1,
                                   ...] * weights[out_c_i, cur_h, cur_w]
                # add depthwise
                slabhw = slabhw.sum(axis=-1)
                # add to the previous filter elements
                result[out_c_i] += slabhw

                if details is not None:
                    details['min_acc'] = min(np.min(result[out_c_i]),
                                             details['min_acc'])
                    details['max_acc'] = max(np.max(result[out_c_i]),
                                             details['max_acc'])

    if params.stride.size() > 1:
        result = result[:, ::params.stride.h, ::params.stride.w, ...]

    return result.transpose(out_dims.transpose_from_order(['c', 'h', 'w']))