def gen_at_globalpool(code_block, name, in_q, out_q, c, h, w, at_globalpool, gen_ctrl=None, at_ver=3): if gen_ctrl is None: gen_ctrl = "0" else: gen_ctrl = gen_ctrl.ctrl_name #raise NotImplementedError("genctrl is not yet implemented") if at_ver < 3: code_block.write('{}("{}", {}, {}, {}, 1, 1, {}, {}, {}, {}, {});', GEN_GLOBALPOOL, name, gen_ctrl, at_bits(in_q), at_bits(out_q), c, c, h, w, at_globalpool.GlobalPoolOper) else: code_block.write( '{}("{}", {}, {}, {}, {}, {}, 1, 1, {}, {}, {}, {}, {});', GEN_GLOBALPOOL, name, gen_ctrl, at_bits(in_q), at_bits(out_q), in_q.q, out_q.q, c, c, h, w, at_globalpool.GlobalPoolOper)
def cache_values(cls, node, qrec): if all(k in qrec.cache for k in ['kernel_type', 'in_at_size', 'out_at_size']): return True in_q = qrec.in_qs[0] out_q = qrec.out_qs[0] qrec.cache[ 'float_conversion'] = float_conversion = in_q.is_floating or out_q.is_floating qrec.cache['bit_conversion'] = bit_conversion = in_q.bits != out_q.bits if float_conversion: if in_q.is_floating: qrec.cache['kernel_type'] = 'KOP_CONVERT_FL_FP' else: qrec.cache['kernel_type'] = 'KOP_CONVERT_FP_FL' qrec.cache['in_at_size'] = at_bits(in_q) qrec.cache['out_at_size'] = at_bits(out_q) return True else: qrec.cache['in_at_size'] = at_bits(in_q) qrec.cache['out_at_size'] = at_bits(out_q) qrec.cache['same_sign'] = same_sign = in_q.signed == out_q.signed input_precision = math.pow( 2, -(in_q.bits - (1 if in_q.signed else 0))) if in_q.bits > out_q.bits: bit_diff = in_q.bits - out_q.bits same_scale = np.allclose( (in_q.scale * np.power(2, bit_diff)).astype(np.float32), out_q.scale.astype(np.float32), atol=input_precision) same_zeropoint = np.all( in_q.zero_point >> bit_diff == out_q.zero_point) elif out_q.bits > in_q.bits: bit_diff = out_q.bits - in_q.bits same_scale = np.allclose( (out_q.scale * np.power(2, bit_diff)).astype(np.float32), in_q.scale.astype(np.float32), atol=input_precision) same_zeropoint = np.all( in_q.zero_point == out_q.zero_point >> bit_diff) else: same_scale = np.allclose(out_q.scale.astype(np.float32), in_q.scale.astype(np.float32)) same_zeropoint = np.all(in_q.zero_point == out_q.zero_point) qrec.cache['same_scale'] = same_scale qrec.cache['same_zeropoint'] = same_zeropoint if same_scale and same_sign and bit_conversion and same_zeropoint: qrec.cache['kernel_type'] = 'KOP_CONVERT_FP_FP' elif same_scale and not bit_conversion: qrec.cache['kernel_type'] = 'KOP_CONVERT_FP_FP_ZEROPOINT' else: qrec.cache['kernel_type'] = 'KOP_CONVERT_FP_FP_SCALE' return True return False
def gen_at_2d_transpose(code_block, name, in_q, out_q, in_shape, gen_ctrl=None, at_ver=3): if gen_ctrl is None: gen_ctrl = "0" else: raise NotImplementedError("genctrl is not yet implemented") code_block.write('CNN_MatTranspose("{}", {}, {}, {}, {}, {}, 1, 1, 1, {}, {});', name, gen_ctrl, at_bits(in_q), at_bits(out_q), in_q.q, out_q.q, in_shape[1], in_shape[0])
def __init__(self, node_name, cname, matmul_params, matmul_qrec, act_params, gen_ctrl=None, force_relu=True): if gen_ctrl is None: self.gen_ctrl = gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if act_params is not None: act_op = gen_activation_op( act_params.activation, force_relu=force_relu) else: act_op = 'KOP_NONE' height_1 = matmul_params.in_dims[0][0] width_1 = matmul_params.in_dims[0][1] height_2 = matmul_params.in_dims[1][1] width_2 = matmul_params.in_dims[1][0] bias_datasize = at_bits(matmul_qrec.in_qs[2]) in1_datasize = at_bits(matmul_qrec.in_qs[0]) in2_datasize_bits = matmul_qrec.in_qs[1].bits out_datasize = at_bits(matmul_qrec.out_qs[0]) matmul_op = 'KOP_MATMUL_TRANSPOSED' # attributes affecting generation attrs = { 'height_1': height_1, 'width_1': width_1, 'height_2': height_2, 'width_2': width_2, 'bias_datasize': bias_datasize, 'in1_datasize': in1_datasize, 'in2_datasize_bits': in2_datasize_bits, 'out_datasize': out_datasize, 'matmul_op': matmul_op, 'act_op': act_op } # other attributes extra_attrs = { 'cname': cname, 'node_name': node_name } super().__init__(attrs, extra_attrs, gen_ctrl=gen_ctrl)
def __init__(self, node_name, cname, matmul_params, matmul_qrec, act_params, gen_ctrl=None, force_relu=True): if gen_ctrl is None: self.gen_ctrl = gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if act_params is not None: act_op = gen_activation_op( act_params.activation, force_relu=force_relu) else: act_op = 'KOP_NONE' height_1 = matmul_params.in_dims[0][0] width_1 = matmul_params.in_dims[0][1] height_2 = matmul_params.in_dims[1][0] width_2 = matmul_params.in_dims[1][1] if len(matmul_params.in_dims) == 3: bias_datasize = at_bits(matmul_qrec.in_qs[2]) matmul_op = 'KOP_MATMUL' else: bias_datasize = 0 matmul_op = 'KOP_MATMUL_NOBIAS' if len(matmul_qrec.in_qs[1].scale) == 1: matmul_op += '_SCALE_SCALAR' if isinstance(matmul_params, MatMulTransposedParameters): matmul_op += '_TRANSPOSED' height_2 = matmul_params.in_dims[1][1] width_2 = matmul_params.in_dims[1][0] # attributes affecting generation attrs = { 'height_1': height_1, 'width_1': width_1, 'height_2': height_2, 'width_2': width_2, 'bias_datasize': bias_datasize, 'matmul_op': matmul_op, 'act_op': act_op } # other attributes extra_attrs = { 'cname': cname, 'node_name': node_name } super().__init__(attrs, extra_attrs, gen_ctrl=gen_ctrl)
def __init__(self, cname, params, matmul_params, matmul_qrec, act_params, act_qrec, gen_ctrl=None, out_qtype=None): if gen_ctrl is None: gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname if len(params.in_dims[0]) != 2 or len(params.in_dims[1]) != 2: raise ValueError(f'Matmul {params.name} has inputs of rank {len(params.in_dims[0])} and {len(params.in_dims[1])}' f'which are not supported by the matmul kernel') in1_shape = params.in_dims[0].shape in2_shape = params.in_dims[1].shape height_2 = in2_shape[0] width_2 = in2_shape[1] out_shape = params.out_dims[0].shape in1_qtype = matmul_qrec.in_qs[0] in2_qtype = matmul_qrec.in_qs[1] if len(matmul_params.in_dims) == 3: bias_bits = at_bits(matmul_qrec.in_qs[2]) bias_q = matmul_qrec.in_qs[2].q matmul_op = 'KOP_MATMUL' else: bias_q = 0 bias_bits = 0 matmul_op = 'KOP_MATMUL_NOBIAS' if isinstance(matmul_params, MatMulTransposedParameters): matmul_op += '_TRANSPOSED' height_2 = in2_shape[1] width_2 = in2_shape[0] if act_params is not None: act_op = gen_activation_op(act_params.activation) out_qtype = act_qrec.out_qs[0] relu_lower = 0 if act_params.activation == "relu6" and out_qtype.q != 0: relu_upper = 6 << out_qtype.q else: relu_upper = 0 else: out_qtype = matmul_qrec.out_qs[0] relu_upper = relu_lower = 0 act_op = "KOP_NONE" # attributes used to test equality - i.e. this kernel can be reused attrs = { 'in1_qtype': in1_qtype, 'in2_qtype': in2_qtype, 'bias_q': bias_q, 'bias_bits': bias_bits, 'out_qtype': out_qtype, 'in1_shape': in1_shape, 'height_2': height_2, 'width_2': width_2, 'out_shape': out_shape, 'relu_lower': relu_lower, 'relu_upper': relu_upper, 'mult_op': matmul_op, 'act_op': act_op } # other attributes extra_attrs = { 'cname': cname, 'node_name': params.name } super().__init__(attrs, extra_attrs, gen_ctrl=gen_ctrl)