Beispiel #1
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dim, out_dim = params.in_dims[0], params.out_dims[0]
        in_tensor = in_tensor.transpose(
            in_dim.transpose_to_order(("h", "w", "c")))
        w_out = out_dim.w
        h_out = out_dim.h
        c_out = out_dim.c
        w_in = in_dim.w
        h_in = in_dim.h
        wstep = (w_in - 1) / (w_out - 1)
        hstep = (h_in - 1) / (h_out - 1)
        out_tensor = np.empty((h_out, w_out, c_out))
        for i in range(h_out):
            h_rounded = int(round(hstep * i))
            for j in range(w_out):
                w_rounded = int(round(wstep * j))
                out_tensor[i, j, :] = in_tensor[h_rounded, w_rounded, :]

        out_tensor = out_tensor.transpose(
            out_dim.transpose_from_order(("h", "w", "c")))
        return qrec.get_outputs(params, [out_tensor], ktype="float")
Beispiel #2
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dims, out_dims = cls.calc_transposed_dims(params)
        in_tensor = in_tensor.transpose(in_dims[0].transpose_to_order(
            ("h", "w", "c")))
        w_out = out_dims[0].w
        h_out = out_dims[0].h
        c_out = out_dims[0].c
        w_in = in_dims[0].w
        h_in = in_dims[0].h
        wstep = (w_in - 1) / w_out
        hstep = (h_in - 1) / h_out
        out_tensor = np.empty((h_out, w_out, c_out))
        for i in range(h_out):
            y_l, y_h = math.floor(hstep * i), math.ceil(hstep * i)
            hc = (hstep * i) - y_l
            for j in range(w_out):
                x_l, x_h = math.floor(wstep * j), math.ceil(wstep * j)
                wc = (wstep * j) - x_l
                P1 = in_tensor[y_l, x_l, :]
                P2 = in_tensor[y_l, x_h, :]
                P3 = in_tensor[y_h, x_l, :]
                P4 = in_tensor[y_h, x_h, :]
                out_tensor[i, j, :] = P1 * (1 - wc) * (1 - hc) \
                    + P2 * wc * (1 - hc) \
                    + P3 * (1 - wc) * hc \
                    + P4 * wc * hc

        out_tensor = out_tensor.transpose(out_dims[0].transpose_from_order(
            ("h", "w", "c")))
        return qrec.get_outputs(params, [out_tensor], ktype="float")
Beispiel #3
0
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float")

        if isinstance(params, MatMulTransposedParameters):
            mat1, mat2 = in_tensors[0], np.transpose(in_tensors[1], (1, 0))
        else:
            mat1, mat2 = in_tensors[0], in_tensors[1]

        if len(in_tensors) > 2:
            biases = in_tensors[2]
            if len(biases.shape) == 1:
                if biases.shape[0] == mat1.shape[0]:
                    biases = np.expand_dims(biases, -1)
        else:
            biases = 0

        out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        output_tensor = np.matmul(mat1, mat2).astype(
            out_dtype) + np.atleast_1d(biases).astype(out_dtype)
        return qrec.get_outputs(params, [output_tensor], ktype="float")
Beispiel #4
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        return qrec.get_outputs(params, [1 / (1 + np.exp(-in_tensor))],
                                ktype="float")
Beispiel #5
0
 def execute(cls, params,
             in_tensors,
             qrec: QRec,
             **kwargs):
     if qrec is None:
         qrec = AllFloatQRec()
     details = kwargs.get('details')
     if details is not None:
         current_control = SymbolStats()
         Symbol.set_default_control(current_control)
         results = {}
     else:
         results = None
         current_control = None
     in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float")
     in_vars = {params.input_symbols[i]: in_tensor
                for i, in_tensor in enumerate(in_tensors)}
     func_col = qrec.cache.get('qfunc_col')
     if func_col is None:
         func_col = params.func_col
     out_vars = func_col(**in_vars,
                         calculate_ranges=current_control is not None,
                         track_results=results)
     out_tensors = [out_vars[out_sym_name]
                    for out_sym_name in params.output_symbols]
     if current_control:
         details.update(current_control.stats)
         details['results'] = results
     return qrec.get_outputs(params, out_tensors, ktype="float")
    def execute(cls,
                params: Parameters,
                input_tensors: Sequence[np.ndarray],
                qrec: QRec,
                details: str = None) -> Sequence[np.ndarray]:
        if params.__class__ not in HANDLERS:
            raise ValueError(
                f"no handlers found for {params.__class__.__name__}")
        handlers = HANDLERS[params.__class__]
        if qrec is None:
            qrec = AllFloatQRec()
        handler = handlers.get(qrec.ktype)
        if handler is None:
            handler = handlers.get('any')
        if handler is None:
            raise ValueError(
                f"no handlers found for {params.__class__.__name__} quantization {qrec.ktype}"
            )

        output_tensors = handler.execute(params,
                                         input_tensors,
                                         qrec,
                                         details=details,
                                         qname=qrec.ktype)

        return output_tensors
Beispiel #7
0
 def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
     if qrec is None:
         qrec = AllFloatQRec()
     old_err = np.seterr(over='raise')
     in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
     in_tensor = softmax_func(in_tensor, axis=params.axis)
     np.seterr(**old_err)
     return qrec.get_outputs(params, [in_tensor], ktype="float")
Beispiel #8
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        return qrec.get_outputs(params,
                                [np.minimum(np.maximum(in_tensor, -1.0), 1.0)],
                                ktype="float")
Beispiel #9
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        output = in_tensor * (in_tensor > 0) + in_tensor * \
            params.leak_factor * (in_tensor < 0)
        return qrec.get_outputs(params, [output], ktype="float")
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):
        if qrec is None:
            qrec = AllFloatQRec()
        boxes = in_tensors[0][0] if not params.center_point_box else convert_cnts2cors(in_tensors[0][0])
        scores = in_tensors[1][0]
        n_boxes = len(scores[0])
        n_classes = len(scores)

        indexes = np.zeros((params.max_output_boxes_per_class*n_classes, 3))
        idxs_count = 0
        for class_id in range(n_classes):
            bbox_buff = []
            counter = 0
            for box_id in range(n_boxes):
                class_score = scores[class_id, box_id]
                if class_score > params.nms_score_threshold:
                    bbox_buff.append({
                        "index": box_id,
                        "score": class_score,
                        "box": boxes[box_id],
                        "alive": True
                    })
                    counter += 1

            # Bubble sort to sort the scores
            changed = True
            while changed:
                changed = False
                for i in range(counter-1):
                    if bbox_buff[i]["score"] < bbox_buff[i+1]["score"]:
                        temp = bbox_buff[i]
                        bbox_buff[i] = bbox_buff[i+1]
                        bbox_buff[i+1] = temp
                        changed = True

            # NMS
            for idx in range(counter):
                for idx_int in range(idx+1, counter):
                    if not bbox_buff[idx_int]["alive"]:
                        continue
                    intersection = rect_intersect_area(bbox_buff[idx]["box"], bbox_buff[idx_int]["box"])
                    union = rect_union_area(bbox_buff[idx]["box"], bbox_buff[idx_int]["box"])
                    if intersection >= (params.nms_iou_threshold * union):
                        bbox_buff[idx_int]["alive"] = False

            class_idxs_count_start = idxs_count
            for bb in bbox_buff:
                if (idxs_count-class_idxs_count_start) > params.max_output_boxes_per_class:
                    break
                if bb["alive"]:
                    indexes[idxs_count] = np.array([0, class_id, bb['index']])
                    idxs_count += 1

        return qrec.get_outputs(params, [indexes], ktype="float")
Beispiel #11
0
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float")
        output = cls.FUNC(*in_tensors)
        return qrec.get_outputs(params, [output], ktype="float")
Beispiel #12
0
    def sum_execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]

        return qrec.get_outputs(params, [
            np.sum(
                in_tensor, axis=tuple(params.axis), keepdims=params.keep_dims)
        ],
                                ktype="float")
Beispiel #13
0
 def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
     if qrec is None:
         qrec = AllFloatQRec()
     in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
     out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
         'float') else np.float32
     out_tensor = np.pad(in_tensor,
                         params.padding,
                         'constant',
                         constant_values=params.pad_vals).astype(out_dtype)
     return qrec.get_outputs(params, [out_tensor], ktype="float")
Beispiel #14
0
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float")
        if len(params.in_dims) == 3:
            return qrec.get_outputs(params, [in_tensors[0] * in_tensors[1] * in_tensors[2]], ktype="float")
        return qrec.get_outputs(params, [in_tensors[0] * in_tensors[1]], ktype="float")
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dtype = qrec.in_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        leak_factor = in_dtype(params.leak_factor)
        output = in_tensor * (in_tensor > 0) + in_tensor * \
            leak_factor * (in_tensor < 0)
        return qrec.get_outputs(params, [output], ktype="float")
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dtype = qrec.in_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        return qrec.get_outputs(
            params,
            [np.minimum(np.maximum(in_tensor, in_dtype(-1.0)), in_dtype(1.0))],
            ktype="float")
Beispiel #17
0
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        output = cls.FUNC(in_tensor).astype(out_dtype)
        return qrec.get_outputs(params, [output], ktype="float")
Beispiel #18
0
 def execute(cls, params,
             in_tensors,
             qrec: QRec,
             **kwargs):
     if qrec is None:
         qrec = AllFloatQRec()
     old_err = np.seterr(over='raise')
     in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
     out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
         'float') else np.float32
     in_tensor = softmax_func(in_tensor, axis=params.axis).astype(out_dtype)
     np.seterr(**old_err)
     return qrec.get_outputs(params, [in_tensor], ktype="float")
Beispiel #19
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        if qrec is None:
            qrec = AllFloatQRec()

        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dims, out_dims = tuple(dims[0]
                                  for dims in cls.calc_transposed_dims(params))
        filter_sz = params.filter.h * params.filter.w

        calc_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32

        pool_factor = np.array(1.0 / filter_sz, dtype=calc_dtype)

        out_tensor = np.zeros(out_dims.shape, dtype=calc_dtype)

        if params.padding.h + params.padding.w > 0:
            in_tensor = np.pad(in_tensor,
                               params.padding.numpy_pad_shape(in_dims),
                               mode='constant',
                               constant_values=0.0)
            pad_w = params.padding.w
            pad_h = params.padding.h
        else:
            pad_w = pad_h = 0

        out_h = 0
        for h_idx in range(0, in_dims.h - params.filter.h + pad_h + 1,
                           params.stride.h):
            out_w = 0
            for w_idx in range(0, in_dims.w - params.filter.w + pad_w + 1,
                               params.stride.w):
                # accumulate - potentially with different Q
                out_slice_args = out_dims.srange(h=out_h, w=out_w)
                in_slice_args = in_dims.srange(
                    c=[0, out_dims.c, 1],
                    h=[h_idx, h_idx + params.filter.h, 1],
                    w=[w_idx, w_idx + params.filter.w, 1])

                res_shape = out_tensor[out_slice_args].shape
                sum_filter = np.sum(
                    in_tensor[in_slice_args],
                    dtype=calc_dtype,
                    axis=(out_dims.keys.index('h'),
                          out_dims.keys.index('w'))).reshape(res_shape)
                sum_filter = np.multiply(sum_filter, pool_factor)
                out_tensor[out_slice_args] = sum_filter
                out_w += 1
            out_h += 1

        return qrec.get_outputs(params, [out_tensor], ktype="float")
Beispiel #20
0
    def execute_piecewise(cls, params,
                          in_tensors,
                          qrec: QRec,
                          op,
                          **kwargs):
        del kwargs
        if qrec is None:
            qrec = AllFloatQRec()
        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float")
        if isinstance(params, Broadcastable) and params.is_broadcasted:
            in_tensors = params.broadcast_inputs(in_tensors)

        out_tensor = op(in_tensors[0], in_tensors[1])
        return qrec.get_outputs(params, [out_tensor], ktype="float")
Beispiel #21
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        if params.upper_bound is None:
            return qrec.get_outputs(
                params, [np.maximum(in_tensor, params.lower_bound)],
                ktype="float")
        return qrec.get_outputs(params, [
            np.minimum(np.maximum(in_tensor, params.lower_bound),
                       params.upper_bound)
        ],
                                ktype="float")
Beispiel #22
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        del in_tensors
        if qrec is None:
            qrec = AllFloatQRec()

        # if value_quantization is set then dequantize
        # if mutated then make a copy otherwise numpy may modify it

        if params.qtype is None:
            value = params.value if not params.is_mutated else params.value.copy(
            )
        else:
            value = params.dqvalue
        value = qrec.out_qs[0].quantize(value)
        return qrec.get_outputs(params, [value], ktype="float")
Beispiel #23
0
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float")
        if len(in_tensors) > 2:
            biases = in_tensors[2]
            if len(biases.shape) == 1:
                biases = np.expand_dims(biases, -1)
        else:
            biases = 0
        output_tensor = np.matmul(in_tensors[0], in_tensors[1]) + biases
        return qrec.get_outputs(params, [output_tensor], ktype="float")
Beispiel #24
0
    def average_execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]

        calc_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        sum_by_chan = np.sum(in_tensor,
                             dtype=calc_dtype,
                             axis=tuple(params.axis),
                             keepdims=params.keep_dims)
        sz = reduce(
            lambda x, y: x * y,
            [i for idx, i in enumerate(in_tensor.shape) if idx in params.axis])

        return qrec.get_outputs(
            params, [(sum_by_chan / sz).reshape(params.out_dims[0].shape)],
            ktype="float")
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dtype = qrec.in_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        if in_dtype == np.float32:
            return qrec.get_outputs(params,
                                    [np.tanh(in_tensor).astype(in_dtype)],
                                    ktype="float")
        else:
            if qrec.cache.get('kernel_type') == "lut":
                return qrec.get_outputs(params,
                                        [tanh_lut_float(in_tensor, in_dtype)],
                                        ktype="float")
            return qrec.get_outputs(
                params, [np_fasttanh(in_tensor, dtype=in_dtype, doalt=True)],
                ktype="float")
Beispiel #26
0
 def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
     if qrec is None:
         qrec = AllFloatQRec()
     in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float")
     offsets = in_tensors[0]
     scores = in_tensors[1]
     anchors = in_tensors[2]
     decoded_bboxes, valid_scores = cls.decoder(params,
                                                qrec,
                                                offsets,
                                                anchors,
                                                scores,
                                                anchors_type='centers')
     out_boxes, out_scores, out_classes = cls.nms(params, qrec,
                                                  decoded_bboxes,
                                                  valid_scores)
     out_count = np.array([sum(out_classes != 0)])
     return qrec.get_outputs(
         params, [out_boxes, out_classes, out_scores, out_count],
         ktype="float")
Beispiel #27
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = in_tensors[params.index]
        if in_tensor.size == params.dims.size():
            if len(in_tensor.shape) == len(params.dims.shape):
                in_shape = tuple(dim for dim in in_tensor.shape if dim > 1)
                expected_shape = tuple(dim for dim in params.dims.shape
                                       if dim > 1)
                if in_shape != expected_shape:
                    raise ValueError(
                        f'{params.name} received input of shape {in_tensor.shape} but expecting {params.dims.shape}'
                    )
            in_tensor = in_tensor.reshape(params.dims.shape)
        else:
            in_tensor = resize(in_tensor, params.dims.shape)
        if params.transpose_out:
            in_tensor = np.transpose(in_tensor, params.transpose_out)
        return qrec.get_outputs(params, [in_tensor], ktype="float")
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = in_tensors[params.index]
        if in_tensor.size == params.dims.size():
            if len(in_tensor.shape) == len(params.dims.shape):
                in_shape = tuple(dim for dim in in_tensor.shape if dim > 1)
                expected_shape = tuple(dim for dim in params.dims.shape
                                       if dim > 1)
                if in_shape != expected_shape:
                    raise ValueError(
                        f'{params.name} received input of shape {in_tensor.shape} but expecting {params.dims.shape}'
                    )
            in_tensor = in_tensor.reshape(params.dims.shape)
        else:
            in_tensor = resize(in_tensor, params.dims.shape)
        out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else (
                params.imported_dtype if params.imported_dtype else np.float32)
        in_tensor = in_tensor.astype(out_dtype)
        return qrec.get_outputs(params, [in_tensor], ktype="float")
Beispiel #29
0
    def execute(cls,
                params: Parameters,
                input_tensors: Sequence[np.ndarray],
                qrec: QRec,
                details: str = None) -> Sequence[np.ndarray]:
        if params.__class__ not in HANDLERS:
            raise ValueError(
                f"no handlers found for {params.__class__.__name__}")
        handlers = HANDLERS[params.__class__]
        if qrec is None:
            qrec = AllFloatQRec()
        handler = handlers.get(qrec.ktype)
        if handler is None:
            handler = handlers.get('any')
        if handler is None:
            raise ValueError(
                f"no handlers found for {params.__class__.__name__} quantization {qrec.ktype}"
            )

        if isinstance(params, Transposable) and params.transpose_in:
            input_tensors = [(np.transpose(in_tensor, params.transpose_in[idx])
                              if params.transpose_in[idx] else in_tensor)
                             for idx, in_tensor in enumerate(input_tensors)]

        output_tensors = handler.execute(params,
                                         input_tensors,
                                         qrec,
                                         details=details,
                                         qname=qrec.ktype)

        if isinstance(params, Transposable) and params.transpose_out:
            output_tensors = [
                (np.transpose(out_tensor, params.transpose_out[idx])
                 if params.transpose_out[idx] else out_tensor)
                for idx, out_tensor in enumerate(output_tensors)
            ]
        return output_tensors
Beispiel #30
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dim, out_dim = params.in_dims[0], params.out_dims[0]
        in_tensor = in_tensor.transpose(
            in_dim.transpose_to_order(("h", "w", "c")))
        w_out = out_dim.w
        h_out = out_dim.h
        c_out = out_dim.c
        w_in = in_dim.w
        h_in = in_dim.h
        wstep = (w_in - 1) / w_out
        hstep = (h_in - 1) / h_out
        out_tensor = np.empty((h_out, w_out, c_out))
        out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        for i in range(h_out):
            y_l, y_h = math.floor(hstep * i), math.ceil(hstep * i)
            hc = out_dtype((hstep * i) - y_l)
            for j in range(w_out):
                x_l, x_h = math.floor(wstep * j), math.ceil(wstep * j)
                wc = out_dtype((wstep * j) - x_l)
                P1 = in_tensor[y_l, x_l, :]
                P2 = in_tensor[y_l, x_h, :]
                P3 = in_tensor[y_h, x_l, :]
                P4 = in_tensor[y_h, x_h, :]
                out_tensor[i, j, :] = P1 * (out_dtype(1) - wc) * (out_dtype(1) - hc) \
                    + P2 * wc * (out_dtype(1) - hc) \
                    + P3 * (out_dtype(1) - wc) * hc \
                    + P4 * wc * hc

        out_tensor = out_tensor.transpose(
            out_dim.transpose_from_order(("h", "w", "c")))
        return qrec.get_outputs(params, [out_tensor], ktype="float")