def gen_scales(gen, pnode, fnode, fqrec): cname_mul_scale, file_name_mul_scale = gen_constant( gen, pnode, fnode, MULSCALE) cname_mul_shift, file_name_mul_shift = gen_constant( gen, pnode, fnode, MULSHIFT) mul_biases_q = fqrec.cache['mul_biases_q'] const_info_mul_scale = ConstantInfo(file_name_mul_scale, mul_biases_q, contents=gen_mul_biases(fqrec, fnode)) const_info_mul_shift = ConstantInfo(file_name_mul_shift, mul_biases_q.shift_qtype, contents=mul_biases_q.qnorms) gen.globals.append( GlobalArgInfo(mul_biases_q.ctype, cname_mul_scale, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info_mul_scale)) gen.globals.append( GlobalArgInfo(mul_biases_q.shift_ctype, cname_mul_shift, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info_mul_shift))
def gen_ssd_globals(gen, node, qrec): qrec.set_scales(node) scores_q = qrec.in_qs[1] scores_scale, scores_norm = compute_mul_bias(scores_q.scale) cname_scales, file_name_scales = gen_constant(gen, node, node, SSD_SCALES) contents = np.array([qrec.scale_x_q.qbiases, qrec.scale_x_anc_q.qbiases, qrec.scale_y_q.qbiases, qrec.scale_y_anc_q.qbiases, qrec.scale_h_q.qbiases, qrec.scale_w_q.qbiases, qrec.scale_ao_q.qbiases, scores_scale], dtype=np.int8) scale_info = ConstantInfo(file_name_scales, QType(bits=8, q=0, signed=True), contents=contents) cname_norms, file_name_norms = gen_constant(gen, node, node, SSD_NORMS) contents = np.array([qrec.scale_x_q.qnorms, qrec.scale_x_anc_q.qnorms, qrec.scale_y_q.qnorms, qrec.scale_y_anc_q.qnorms, qrec.scale_h_q.qnorms, qrec.scale_w_q.qnorms, qrec.scale_ao_q.qnorms, scores_norm], dtype=np.int8) norms_info = ConstantInfo(file_name_norms, QType(bits=8, q=0, signed=True), contents=contents) score_threshold = scores_q.quantize(node.nms_score_threshold) cname_infos, file_name_infos = gen_constant(gen, node, node, INFOS) contents = np.array([round(node.nms_iou_threshold * 2**7), # Q7 score_threshold, # Q0 [0:255] node.max_detections, # Q0 [0:255] node.max_classes_per_detection, # Q0 [0:255] node.max_bb_before_nms >> 8, node.max_bb_before_nms], dtype=np.int8) # max_bb = Infos[4]<<8 + Infos[5] ssd_infos = ConstantInfo(file_name_infos, QType(bits=8, q=0, signed=True), contents=contents) gen.globals.append(GlobalArgInfo(qrec.scale_x_q.ctype, cname_scales, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=scale_info)) gen.globals.append(GlobalArgInfo(qrec.scale_x_q.shift_ctype, cname_norms, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=norms_info)) gen.globals.append(GlobalArgInfo('uint8', cname_infos, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=ssd_infos))
def rnn_infos(gen, node, qrec): i_state_q = qrec.in_qs[node.INPUT_NAMES.index('i_state')] contents = [] comments = [] # info for activation (scale the act input to the proper scale) info, comment = INFOS_FUNCS[node.activation]("f", qrec.s_2_s_q, i_state_q) contents.append(info) comments.append(comment) # info for input scaling (only used with non SameInputStateScale kernels) info, comment = scale_infos("f", getattr(qrec, "i_2_a_q")) contents.append(info) comments.append(comment) # info for scaling the activation out to out scale (only used for non Hard activations kernels) info, comment = scale_infos("f", getattr(qrec, "s_2_o_q")) contents.append(info) comments.append(comment) cname, file_name = gen_constant(gen, node, node, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=np.hstack(tuple(contents))) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment))
def gru_infos(gen, node, qrec): i_qtype = internal_qtype(qrec) contents = [] comments = [] r_to_int_scale = qrec.cache['r_WR_2_int_q'].qbiases[0] r_to_int_scalen = qrec.cache['r_WR_2_int_q'].qnorms[0] r_to_in_scale = qrec.cache['i_2_r_WR_q'].qbiases[0] r_to_in_scalen = qrec.cache['i_2_r_WR_q'].qnorms[0] z_to_int_scale = qrec.cache['z_WR_2_int_q'].qbiases[0] z_to_int_scalen = qrec.cache['z_WR_2_int_q'].qnorms[0] z_to_in_scale = qrec.cache['i_2_z_WR_q'].qbiases[0] z_to_in_scalen = qrec.cache['i_2_z_WR_q'].qnorms[0] ht_to_in_scale = qrec.cache['i_2_h_WR_q'].qbiases[0] ht_to_in_scalen = qrec.cache['i_2_h_WR_q'].qnorms[0] h_to_int_scale = qrec.cache['h_WR_2_int_q'].qbiases[0] h_to_int_scalen = qrec.cache['h_WR_2_int_q'].qnorms[0] # GRU_R_INFOS comments.append(str.format("r_to_int_scale: {} r_to_int_scalen: {} r_to_in_scale: {} r_to_in_scalen: {}", r_to_int_scale, r_to_int_scalen, r_to_in_scale, r_to_in_scalen,)) contents.append(np.array( [r_to_int_scale, r_to_int_scalen, r_to_in_scale, r_to_in_scalen], dtype=np.int8)) # GRU_Z_INFOS comments.append(str.format("z_to_int_scale: {} z_to_int_scalen: {} z_to_in_scale: {} z_to_in_scalen: {}", z_to_int_scale, z_to_int_scalen, z_to_in_scale, z_to_in_scalen,)) contents.append(np.array( [z_to_int_scale, z_to_int_scalen, z_to_in_scale, z_to_in_scalen], dtype=np.int8)) # GRU_HT_INFOS comments.append(str.format("ht_to_in_scale: {} ht_to_in_scalen: {}", ht_to_in_scale, ht_to_in_scalen,)) contents.append(np.array([ht_to_in_scale, ht_to_in_scalen], dtype=np.int8)) # GRU_H_INFOS comments.append(str.format("h_to_int_scale: {} h_to_int_scalen: {}", h_to_int_scale, h_to_int_scalen,)) contents.append(np.array([h_to_int_scale, h_to_int_scalen], dtype=np.int8)) three = i_qtype.quantize(np.array([3]))[0] six = i_qtype.quantize(np.array([6]))[0] sixth = i_qtype.quantize(np.array([1/6]))[0] comments.append(str.format("int_q: {} A0: {} B0: {} C0: {}", i_qtype.q, six, three, sixth)) contents.append(np.array([lowb(six), highb(six), lowb(three), highb(three), lowb(sixth), highb(sixth), i_qtype.q], dtype=np.int8)) cname, file_name = gen_constant(gen, node, node, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=np.hstack(tuple(contents))) gen.globals.append(GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=" ".join(comments)))
def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool: if isinstance(pnode, FcParameters): gen_scales(gen, pnode, pnode, qrec) infos, infos_comment = np.array([0, 0, 0, 0, 0]), "no activation" fnode = pnode filt_q = qrec elif isinstance(pnode, LinearFusionParameters) and isinstance( fnode, FcParameters) and pnode.fusion_type == "linear_active": cnodes = pnode.contained_nodes() quants = [ gen.G.quantization[NodeId(pnode, fnode)] for fnode in cnodes ] filt_q = quants[0] gen_scales(gen, pnode, cnodes[0], quants[0]) infos, infos_comment = gen_act_infos(cnodes[1], quants[1]) else: return False infos = np.append(infos, [0, 0, 0, 0]) comment = str.format("BiasQ: {}", 0) + infos_comment infos[5] = 0 # BiasQ if filt_q.cache.get('ne16'): conv_mul_bias = filt_q.cache.get('mul_biases_q') prenorm = conv_mul_bias.pre_normalization if isinstance( conv_mul_bias, MultMulBiasScaleQType) else 0 pad_value = np.array(filt_q.in_qs[0].zero_point).astype(np.int16) pad_value1 = np.bitwise_and(pad_value, 0xFF) pad_value2 = np.bitwise_and(pad_value, 0xFF00) >> 8 w_offset = -np.array(filt_q.in_qs[1].zero_point).astype(np.int32) w_offset1 = np.bitwise_and(w_offset, 0xFF) w_offset2 = np.bitwise_and(w_offset, 0xFF00) >> 8 w_offset3 = np.bitwise_and(w_offset, 0xFF0000) >> 16 w_offset4 = np.bitwise_and(w_offset, 0xFF000000) >> 24 infos = np.append( infos, verify_scalar([ prenorm if prenorm else 0, pad_value1, pad_value2, w_offset1, w_offset2, w_offset3, w_offset4 ])) cname, file_name = gen_constant(gen, pnode, fnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=infos) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) return True
def constant_input_globals_generator(gen, node, qrec, pnode, fnode) -> bool: del node, fnode if not pnode.generate_value: return True # the name cache will be updated when all the edges are analysed by local_generator # the name of the constant is attached to the output edge so find it out_edge = gen.G.out_edges(pnode.name)[0] eparams = out_edge.params cname = gen.naming_convension.get_edge_name(eparams.creating_node, eparams.creating_step, eparams.edge_type, eparams.edge_order) if not pnode.is_constant: # This is an initializer which may have a reset if pnode.reset_name and not next( (tc for tc in gen.globals if tc.arg_name == pnode.reset_name), None): gen.globals.append( GlobalResetArgInfo(pnode.reset_name, 'AT_MEM_L2', 'AT_MEM_UNDEF')) if pnode.is_global: home_location = gen.opts['default_input_home_location'] exec_location = gen.opts['default_input_exec_location'] gen.globals.append( InputArgInfo(qrec.out_qs[0].ctype, cname, home_location=home_location, exec_location=exec_location, allocate=pnode.at_options.allocate, is_inout=pnode.is_mutated)) elif pnode.is_global: file_name = os.path.join(gen.opts['tensor_directory'], cname + ".tensor") value = pnode.value_as(qrec.out_qs[0]) if pnode.concated_nodes: values = [value] concated_qrecs = [ gen.G.quantization.get(NodeId(pn, None)) for pn in pnode.concated_nodes ] for other_node, concated_qrec in zip(pnode.concated_nodes, concated_qrecs): values += [other_node.value_as(concated_qrec.out_qs[0])] value = np.hstack(tuple(values)) const_info = ConstantInfo(file_name, qrec.out_qs[0], contents=value) gen.globals.append( GlobalArgInfo(qrec.out_qs[0].ctype, cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info)) return True
def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool: if isinstance(pnode, MatMulOpParameters): mul_node = pnode mul_qrec = qrec fnode = pnode infos, comment = np.array([0, 0, 0, 0, 0]), "no activation" elif isinstance(pnode, MatMulOpFusionParameters) and isinstance(fnode, MatMulOpParameters): cnodes = pnode.contained_nodes() quants = [gen.G.quantization[NodeId( pnode, fnode)] for fnode in cnodes] mul_node = cnodes[0] mul_qrec = quants[0] infos, comment = gen_act_infos(cnodes[1], quants[1]) else: return False if len(mul_qrec.in_qs[1].scale) > 1: gen_scales(gen, pnode, mul_node, mul_qrec) pl_scale = 0 pl_scalen = 0 else: pl_scale = mul_qrec.cache['mul_biases_q'].qbiases[0] pl_scalen = mul_qrec.cache['mul_biases_q'].qnorms[0] infos = np.append(infos, [0, 0, pl_scale, pl_scalen]) if mul_qrec.cache.get('ne16'): conv_mul_bias = mul_qrec.cache.get('mul_biases_q') prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0 pad_value = np.array(mul_qrec.in_qs[0].zero_point).astype(np.int16) pad_value1 = np.bitwise_and(pad_value, 0xFF) pad_value2 = np.bitwise_and(pad_value, 0xFF00) >> 8 w_offset = -np.array(mul_qrec.in_qs[1].zero_point).astype(np.int32) w_offset1 = np.bitwise_and(w_offset, 0xFF) w_offset2 = np.bitwise_and(w_offset, 0xFF00) >> 8 w_offset3 = np.bitwise_and(w_offset, 0xFF0000) >> 16 w_offset4 = np.bitwise_and(w_offset, 0xFF000000) >> 24 infos = np.append( infos, verify_scalar([prenorm if prenorm else 0, pad_value1, pad_value2, w_offset1, w_offset2, w_offset3, w_offset4])) cname, file_name = gen_constant(gen, pnode, fnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=infos) gen.globals.append(GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) return True
def rnn_infos(gen, node, qrec): i_state_q = qrec.in_qs[node.INPUT_NAMES.index('i_state')] contents, comment = htanh_infos("f", qrec.s_2_s_q, i_state_q) cname, file_name = gen_constant(gen, node, node, INFOS) const_info = ConstantInfo(file_name, QType(bits=8, q=0, signed=True), contents=contents) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment))
def lstm_infos(gen, node, qrec): i_qtype = internal_qtype(qrec) contents = [] comments = [] for k, v in LSTM_INFOS_ORDER.items(): info, comment = scale_infos(k, qrec.cache["r_2_%s_q" % k]) contents.append(info) comments.append(comment) cin_scale = qrec.cache['cell_in_q'].qbiases[0] cin_scalen = qrec.cache['cell_in_q'].qnorms[0] cout_scale = qrec.cache['cell_out_q'].qbiases[0] cout_scalen = qrec.cache['cell_out_q'].qnorms[0] out_scale = qrec.cache['state_out_q'].qbiases[0] out_scalen = qrec.cache['state_out_q'].qnorms[0] comments.append(str.format("cin_scale: {} cin_scale_n: {} cout_scale: {} cout_scale_n: {}", cin_scale, cin_scalen, cout_scale, cout_scalen,)) comments.append(str.format("out_scale: {} out_scale_n: {}", out_scale, out_scalen)) contents.append(np.array([cin_scale, cin_scalen, cout_scale, cout_scalen, out_scale, out_scalen], dtype=np.int8)) three = i_qtype.quantize(np.array([3]))[0] six = i_qtype.quantize(np.array([6]))[0] sixth = i_qtype.quantize(np.array([1/6]))[0] comments.append(str.format("int_q: {} A0: {} B0: {} C0: {}", i_qtype.q, six, three, sixth)) contents.append(np.array([lowb(six), highb(six), lowb(three), highb(three), lowb(sixth), highb(sixth), i_qtype.q], dtype=np.int8)) for k in LSTM_INFOS_ORDER.keys(): info, comment = scale_infos(k, qrec.cache["i_2_%s_q" % k]) contents.append(info) comments.append(comment) cname, file_name = gen_constant(gen, node, node, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=np.hstack(tuple(contents))) gen.globals.append(GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=" ".join(comments)))
def constant_input_globals_generator(gen, node, qrec, pnode, fnode) -> bool: del node, fnode # the name cache will be updated when all the edges are analysed by local_generator # the name of the constant is attached to the output edge so find it out_edge = gen.G.out_edges(pnode.name)[0] eparams = out_edge.params cname = gen.naming_convension.get_edge_name(eparams.creating_node.name, eparams.creating_step, eparams.edge_type, eparams.edge_order) file_name = os.path.join(gen.opts['tensor_directory'], cname + ".tensor") const_info = ConstantInfo(file_name, qrec.out_qs[0], contents=qrec.out_qs[0].quantize(node.value)) gen.globals.append( GlobalArgInfo(qrec.out_qs[0].ctype, cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info)) return True
def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool: if isinstance(pnode, (GlobalPoolingParameters, PoolingParameters, GlobalSumPoolParameters)): compute_in_out_scale(qrec) infos, comment = np.array([ qrec.cache['scale_mul_biases_q'].qbiases[0], qrec.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0 ]), "no activation" fnode = pnode pool_q = qrec elif isinstance(pnode, ActivationFusion) and isinstance( fnode, (GlobalPoolingParameters, PoolingParameters)): cnodes = pnode.contained_nodes() quants = [ gen.G.quantization[NodeId(pnode, fnode)] for fnode in cnodes ] pool_q = quants[0] infos, comment = gen_act_infos(cnodes[1], quants[1]) else: return False infos = np.append(infos, [0, 0, 0, 0]) if isinstance(fnode, GlobalSumPoolParameters): compute_in_out_scale(pool_q, in_idx=0, out_idx=0) infos[0] = 0 infos[1] = 0 infos[5] = pool_q.cache['scale_mul_biases_q'].qbiases[0] infos[6] = pool_q.cache['scale_mul_biases_q'].qnorms[0] cname, file_name = gen_constant(gen, pnode, fnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=infos) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) return True
def filter_globals_generator(gen, node, qrec, pnode, fnode) -> bool: del fnode if isinstance(node, MultiplicativeBiasParameters) and node.has_mul_bias: mul_biases_q = qrec.mul_biases_q cname = gen.naming_convension.get_global_name(pnode.name, pnode.step_idx, pnode, MULSCALE) file_name = os.path.join(gen.opts['tensor_directory'], cname+".tensor") gen.name_cache.set(node, MULSCALE, cname) contents = mul_biases_q.quantize(node.mul_biases).astype(mul_biases_q.dtype, order='C', casting='no', copy=True) const_info = ConstantInfo(file_name, mul_biases_q, contents=contents) gen.globals.append(GlobalArgInfo(mul_biases_q.ctype, cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info)) return True
def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool: names = { val: idx for idx, val in enumerate(LSTMParameters.INPUT_NAMES) } scales = [] weight_zero = None for gate in ['i', 'c', 'f', 'o']: for input_tensor in ['i', 'r']: scale_name = f'{input_tensor}_2_{gate}_q' weight_name = f'{input_tensor}_2_{gate}_w' if weight_zero is None: weight_zero = qrec.in_qs[names[weight_name]].zero_point[0] else: assert weight_zero == qrec.in_qs[ names[weight_name]].zero_point[0] w_q = qrec.in_qs[names['r_2_i_w']] qscale = qrec.cache[scale_name] scales.append(qscale.qbiases) scales.append(qscale.qnorms) contents = interleave(*scales) cname, file_name = gen_constant(gen, pnode, pnode, "scalenorm") const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=False), contents=contents) gen.globals.append( GlobalArgInfo("uint8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=f"{node.name} scales and norms")) if node.rnn_states_as_inputs: gen.globals.append( GlobalResetArgInfo(f"{node.name}_Reset", 'AT_MEM_L2', 'AT_MEM_UNDEF')) out_q = qrec.out_qs[0] out_scale = qrec.cache["state_out_q"].qbiases[0] out_scalen = qrec.cache["state_out_q"].qnorms[0] cin_scale = qrec.cache["cell_in_q"].qbiases[0] cin_scalen = qrec.cache["cell_in_q"].qnorms[0] cout_scale = qrec.cache["cell_out_q"].qbiases[0] cout_scalen = qrec.cache["cell_out_q"].qnorms[0] out_zeropoint = out_q.zero_point[0] # define LSTM_NE16_W_ZEROPOINT 0 # define LSTM_NE16_GATE_PRENORM 1 # define LSTM_NE16_CIN_SCALE (0 + LSTM_NE16_OUT_OFF) # define LSTM_NE16_CIN_SCALEN (1 + LSTM_NE16_OUT_OFF) # define LSTM_NE16_COUT_SCALE (2 + LSTM_NE16_OUT_OFF) # define LSTM_NE16_COUT_SCALEN (3 + LSTM_NE16_OUT_OFF) # define LSTM_NE16_OUT_SCALE (4 + LSTM_NE16_OUT_OFF) # define LSTM_NE16_OUT_SCALEN (5 + LSTM_NE16_OUT_OFF) # define LSTM_NE16_OUT_ZEROPOINT (6 + LSTM_NE16_OUT_OFF) # define LSTM_NE16_INT_A0 (0 + LSTM_NE16_INT_OFF) # define LSTM_NE16_INT_B0 (1 + LSTM_NE16_INT_OFF) # define LSTM_NE16_INT_C0 (2 + LSTM_NE16_INT_OFF) sigmoid_table = interleave(SIGMOID_TABLE & 0xff, SIGMOID_TABLE >> 8).astype(np.int8) if out_q.dtype == np.uint8: # Maybe get rid of this if qrec.cache.get('act_qtype'): min_val = qrec.cache['act_qtype'].quantize(-1) max_val = qrec.cache['act_qtype'].quantize(1) else: min_val = max_val = 0 contents = np.concatenate( (sigmoid_table, np.array([ -weight_zero.astype(np.int8), qrec.cache['gate_prenorm'], cin_scale.astype(np.int8), cin_scalen.astype(np.int8), cout_scale.astype(np.int8), cout_scalen.astype(np.int8), out_scale.astype(np.int8), out_scalen.astype(np.int8), out_zeropoint.astype(np.int8), 0, 0, 0, 0 ], dtype=np.int8))) else: contents = np.concatenate( (sigmoid_table, np.array([ -weight_zero.astype(np.int8), qrec.cache['gate_prenorm'], cin_scale.astype(np.int8), cin_scalen.astype(np.int8), cout_scale.astype(np.int8), cout_scalen.astype(np.int8), out_scale.astype(np.int8), out_scalen.astype(np.int8), out_zeropoint.astype(np.uint16) & 0xff, out_zeropoint.astype(np.uint16) >> 8, ], dtype=np.int8))) comment = ( f"WZP: {weight_zero}, Out: {out_scale}/{out_scalen}, Cin: {cin_scale}/{cin_scalen}" f"Cout: {cout_scale}/{cout_scalen}, OZP: {out_zeropoint}") cname, file_name = gen_constant(gen, pnode, pnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=contents) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) if node.rnn_states_as_inputs: gen.globals.append( GlobalResetArgInfo(f"{node.name}_Reset", 'AT_MEM_L2', 'AT_MEM_UNDEF')) return True
def gen_filter_globals(gen, pnode, fnode, fqrec): cname, file_name = gen_constant(gen, pnode, fnode, WEIGHTS) weights_q = fqrec.weights_q const_info = ConstantInfo(file_name, weights_q, contents=fqrec.gen_weights(fnode, fnode.weights)) gen.globals.append( GlobalArgInfo(weights_q.ctype, cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info)) # biases are always generated even if they are 0 if fnode.has_bias: biases_q = fqrec.biases_q biases = fnode.biases else: biases_q = fqrec.out_qs[0] biases = np.zeros((fnode.out_dims[0].c)) contents = fqrec.gen_biases(fnode, biases, fnode.weights) cname, file_name = gen_constant(gen, pnode, fnode, BIASES) const_info = ConstantInfo(file_name, biases_q, contents=contents) gen.globals.append( GlobalArgInfo(biases_q.ctype, cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info)) cname_mul_scale, file_name_mul_scale = gen_constant( gen, pnode, fnode, MULSCALE) cname_mul_shift, file_name_mul_shift = gen_constant( gen, pnode, fnode, MULSHIFT) mul_biases_q = fqrec.mul_biases_q const_info_mul_scale = ConstantInfo(file_name_mul_scale, mul_biases_q, contents=fqrec.gen_mul_biases(fnode)) const_info_mul_shift = ConstantInfo(file_name_mul_shift, mul_biases_q.shift_qtype, contents=fqrec.mul_biases_q.qnorms) gen.globals.append( GlobalArgInfo(mul_biases_q.ctype, cname_mul_scale, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info_mul_scale)) gen.globals.append( GlobalArgInfo(mul_biases_q.shift_ctype, cname_mul_shift, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info_mul_shift))
def mult8_infos_generator(gen, node, qrec, pnode, fnode) -> bool: if fnode is not None: return False # if isinstance(pnode, Conv2DParameters): # for_ne16 = qrec.cache.get('ne16') # in_zero_point = qrec.in_qs[0].zero_point # conv_mul_bias = qrec.cache.get('mul_biases_q') # prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0 # act_infos(gen, pnode, pnode, None, None, prenorm=prenorm, extra1=0, # for_ne16=for_ne16, in_zero_point=in_zero_point) # elif isinstance(pnode, (GlobalPoolingParameters, PoolingParameters)): # compute_in_out_scale(qrec) # act_infos(gen, pnode, pnode, None, qrec) elif isinstance(pnode, ActivationParameters): act_infos(gen, pnode, pnode, pnode, gen.G.quantization[NodeId(pnode)]) # elif isinstance(pnode, ConvFusionParameters): # cnodes = node.contained_nodes() # quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes] # for_ne16 = any([qrec.cache.get('ne16') for qrec in quants]) # in_zero_point = quants[0].in_qs[0].zero_point # for qrec in quants: # compute_in_out_scale(qrec) # if node.fusion_type.startswith('linear') or node.fusion_type.startswith('conv') or node.fusion_type.startswith('pool'): # if node.fusion_type in ("pool_active"): # act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1], # extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point) # else: # conv_mul_bias = quants[0].cache.get('mul_biases_q') # prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0 # if node.fusion_type in ("conv_active_pool", "conv_active", "linear_active"): # act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1], prenorm=prenorm, # extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point) # elif node.fusion_type == "conv_pool_active": # act_infos(gen, pnode, cnodes[0], cnodes[2], quants[2], prenorm=prenorm, # extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point) # elif node.fusion_type == "conv_pool": # act_infos(gen, pnode, cnodes[0], None, None, prenorm=prenorm, # extra1=0, for_ne16=for_ne16) elif isinstance(pnode, MatrixMulParameters): compute_in_out_scale(qrec, in_idx=(0, 1), out_idx=0) act_infos(gen, pnode, pnode, None, None, extra1=qrec.cache['scale_mul_biases_q'].qbiases[0], extra2=qrec.cache['scale_mul_biases_q'].qnorms[0]) elif isinstance(pnode, SoftMaxParameters): act_infos(gen, pnode, pnode, pnode, qrec) # elif isinstance(pnode, ActivationFusionBase): # cnodes = node.contained_nodes() # quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes] # for qrec in quants: # compute_in_out_scale(qrec) # if isinstance(cnodes[0], (GlobalPoolingParameters, PoolingParameters)): # act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1]) # else: # return False # return True elif isinstance(pnode, (MatMulOpParameters, MatMulOpFusionParameters)): if isinstance(pnode, MatMulOpFusionParameters): cnodes = node.contained_nodes() quants = [ gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes ] mul_node = cnodes[0] mul_qrec = quants[0] act_node = cnodes[1] act_qrec = quants[1] else: mul_node = pnode mul_qrec = qrec act_node = None act_qrec = None if len(pnode.in_dims) == 3 and len(mul_qrec.in_qs[0].scale) > 1: gen_scales(gen, pnode, mul_node, mul_qrec) extra3 = 0 extra4 = 0 else: extra3 = mul_qrec.cache['mul_biases_q'].qbiases[0] extra4 = mul_qrec.cache['mul_biases_q'].qnorms[0] act_infos(gen, pnode, mul_node, act_node, act_qrec, extra3=extra3, extra4=extra4) elif isinstance(pnode, QuantizeParameters): in_q = qrec.in_qs[0] out_q = qrec.out_qs[0] comment = f'in q: {in_q} out_q: {out_q}' if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP_ZEROPOINT': bits = 8 if in_q.dtype == np.int8 else 16 if in_q.signed: contents = ((int(math.pow(2, bits)) + in_q.zero_point[0] - out_q.zero_point[0]) % int(math.pow(2, bits))).astype(np.uint8) else: contents = (int(math.pow(2, bits)) - in_q.zero_point[0] + out_q.zero_point[0]).astype(np.uint8) # if in_q.dtype == np.int8 and out_q.dtype == np.uint8: # if not np.allclose(in_q.scale, out_q.scale): # return False # if not np.all(in_q.zero_point == (out_q.zero_point - 128)): # return False # contents = ( # (256 + in_q.zero_point[0] - out_q.zero_point[0]) % 256).astype(np.uint8) # elif in_q.dtype == np.uint8 and out_q.dtype == np.int8: # if not np.allclose(in_q.scale, out_q.scale): # return False # if not np.all(in_q.zero_point == (out_q.zero_point - 128)): # return False # contents = ( # 256 - in_q.zero_point[0] + out_q.zero_point[0]).astype(np.uint8) elif in_q.dtype == np.int8 and out_q.dtype == np.int16: if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP': return True raise NotImplementedError() elif in_q.dtype == np.int16 and out_q.dtype == np.int8: if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP': return True raise NotImplementedError() else: raise ValueError(f"strange dtype change in {pnode.name}") cname, file_name = gen_constant(gen, pnode, pnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=contents) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) else: return False return True
def act_infos(gen, pnode, fnode, act_params, act_q, extra1=0, extra2=0, extra3=0, extra4=0, extra5=None, extra6=None, prenorm=0, extra_name='', for_ne16=False, in_zero_point=0): if isinstance(pnode, FilterParameters): comment = str.format("BiasQ: {}", extra1) elif isinstance(pnode, MatrixAddParameters): comment = str.format( "In1Scale: {} In1ScaleN: {} OutScale: {} OutScaleN: {}", extra1, extra2, extra3, extra4) else: comment = "" if act_params is None: contents = np.array([0, 0, 0, 0, 0], dtype=np.int8) elif isinstance(act_params, ReluActivationParameters): compute_in_out_scale(act_q) actscale = act_q.cache['scale_mul_biases_q'].qbiases[0] actscalen = act_q.cache['scale_mul_biases_q'].qnorms[0] if act_params.upper_bound is None: # or fnode is not None: if act_q.in_qs[0].zero_point == 0: contents = np.array([actscale, actscalen, 0, 0, 0], dtype=np.int8) if len(comment) == 0: comment = "all 0" else: fac_1 = act_q.in_qs[0].zero_point contents = np.array([actscale, actscalen, fac_1, 0, 0], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], fac_1[0]) else: if act_q.in_qs[0].zero_point == 0: fac_1 = act_q.in_qs[0].quantize(act_params.upper_bound) contents = np.array([actscale, actscalen, fac_1, 0, 0], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], fac_1[0]) else: fac_1 = act_q.in_qs[0].zero_point fac_2 = act_q.in_qs[0].quantize(act_params.upper_bound) contents = np.array([actscale, actscalen, fac_1, fac_2, 0], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} A0: {} B0: {} C0: 0", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], fac_1[0], fac_2[0]) elif isinstance(act_params, HSigmoidActivationParameters): # currently combines all scaling factors into one scale and shift assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" fac_1, upper_bound, _ = hsigmoid_mult_gen_factors(act_params, act_q) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0], fac_1[0]) elif isinstance(act_params, HSwishActivationParameters): # currently combines all scaling factors into one scale and shift assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" fac_1, upper_bound, _ = hswish_mult_gen_factors(act_q) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0], fac_1[0]) elif isinstance(act_params, SoftMaxParameters): assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" norm = 15 + np.ceil(np.log2(act_q.in_qs[0].scale)) contents = np.array([norm, 0, 0, 0, 0], dtype=np.int8) comment += str.format("in: {:05f} out: {:05f} NORM: {}", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], int(norm[0])) elif isinstance(act_params, LeakyActivationParameters): assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" compute_in_out_scale(act_q) leak_factor_quant = leak_mult_gen_factor_q7(act_params) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant, 0, 0 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: x C0: x", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant) elif isinstance(act_params, (SigmoidActivationParameters, TanHActivationParameters)): assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" compute_in_out_scale( act_q, extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale / act_q.in_qs[0].scale) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: x B0: x C0: x", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0]) else: raise NotImplementedError("activation tye not implemented") if isinstance(pnode, (GlobalPoolingParameters, PoolingParameters)): contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0 ], dtype=np.int8) contents = np.append(contents, [extra1, extra2, extra3, extra4]) if extra5 is not None: contents = np.append(contents, [extra5]) if extra6 is not None: contents = np.append(contents, [extra6]) if for_ne16: # append weights_offset and pad_val for ne16 # TODO - default config maybe in future if isinstance(pnode, (ConvFusionParameters, LinearFusionParameters)): filt_q = gen.G.quantization[NodeId(pnode, fnode)] else: filt_q = gen.G.quantization[NodeId(pnode)] pad_value = np.array(in_zero_point).astype(np.int16) pad_value1 = np.bitwise_and(pad_value, 0xFF) pad_value2 = np.bitwise_and(pad_value, 0xFF00) >> 8 w_offset = -np.array(filt_q.in_qs[1].zero_point).astype(np.int32) w_offset1 = np.bitwise_and(w_offset, 0xFF) w_offset2 = np.bitwise_and(w_offset, 0xFF00) >> 8 w_offset3 = np.bitwise_and(w_offset, 0xFF0000) >> 16 w_offset4 = np.bitwise_and(w_offset, 0xFF000000) >> 24 contents = np.append( contents, [[prenorm] if prenorm else [0], pad_value1, pad_value2, w_offset1, w_offset2, w_offset3, w_offset4]) cname, file_name = gen_constant(gen, pnode, fnode, INFOS, extra_name) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=contents) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment))
def act_infos(gen, pnode, fnode, act_params, act_q, extra1=0, extra2=0, extra3=0, extra4=0): if isinstance(pnode, FilterParameters): comment = str.format("BiasQ: {}", extra1) elif isinstance(pnode, MatrixAddParameters): comment = str.format( "In1Scale: {} In1ScaleN: {} OutScale: {} OutScaleN: {}", extra1, extra2, extra3, extra4) else: comment = "" if act_params is None: contents = np.array([0, 0, 0, 0, 0, extra1, extra2, extra3, extra4], dtype=np.int8) elif isinstance(act_params, ReluActivationParameters): actscale = act_q.scale_mul_biases_q.qbiases[0] actscalen = act_q.scale_mul_biases_q.qnorms[0] if act_params.upper_bound is None: # or fnode is not None: contents = np.array( [actscale, actscalen, 0, 0, 0, extra1, extra2, extra3, extra4], dtype=np.int8) if len(comment) == 0: comment = "all 0" else: fac_1 = act_q.in_qs[0].quantize(act_params.upper_bound) contents = np.array([ actscale, actscalen, fac_1, 0, 0, extra1, extra2, extra3, extra4 ], dtype=np.int8) comment += str.format("in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], fac_1[0]) elif isinstance(act_params, HSigmoidActivationParameters): # currently combines all scaling factors into one scale and shift fac_1, upper_bound, _ = hsigmoid_mult_gen_factors(act_params, act_q) contents = np.array([ act_q.scale_mul_biases_q.qbiases[0], act_q.scale_mul_biases_q.qnorms[0], upper_bound, fac_1, 1, extra1, extra2, extra3, extra4 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.scale_mul_biases_q.qbiases[0], act_q.scale_mul_biases_q.qnorms[0], upper_bound[0], fac_1[0]) elif isinstance(act_params, HSwishActivationParameters): # currently combines all scaling factors into one scale and shift fac_1, upper_bound, _ = hswish_mult_gen_factors(act_q) contents = np.array([ act_q.scale_mul_biases_q.qbiases[0], act_q.scale_mul_biases_q.qnorms[0], upper_bound, fac_1, 1, extra1, extra2, extra3, extra4 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.scale_mul_biases_q.qbiases[0], act_q.scale_mul_biases_q.qnorms[0], upper_bound[0], fac_1[0]) elif isinstance(act_params, SoftMaxParameters): norm = 15 + np.ceil(np.log2(act_q.in_qs[0].scale)) contents = np.array([norm, 0, 0, 0, 0, extra1, extra2, extra3, extra4], dtype=np.int8) comment += str.format("in: {:05f} out: {:05f} NORM: {}", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], int(norm[0])) elif isinstance(act_params, LeakyActivationParameters): act_q.set_scale() leak_factor_quant = leak_mult_gen_factor_q7(act_params) contents = np.array([ act_q.scale_mul_biases_q.qbiases[0], act_q.scale_mul_biases_q.qnorms[0], leak_factor_quant, 0, 0, extra1, extra2, extra3, extra4 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: x C0: x", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.scale_mul_biases_q.qbiases[0], act_q.scale_mul_biases_q.qnorms[0], leak_factor_quant) else: raise NotImplementedError("activation tye not implemented") if isinstance(pnode, (GlobalPoolParameters, PoolingParameters)): contents = np.array([ act_q.scale_mul_biases_q.qbiases[0], act_q.scale_mul_biases_q.qnorms[0], 0, 0, 0, extra1, extra2, extra3, extra4 ], dtype=np.int8) comment += str.format("in: {:05f} out: {:05f}", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0]) cname, file_name = gen_constant(gen, pnode, fnode, INFOS) const_info = ConstantInfo(file_name, QType(bits=8, q=0, signed=True), contents=contents) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment))
def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool: if not cls.cache_values(node, qrec): return False in_q = qrec.in_qs[0] out_q = qrec.out_qs[0] comment = f'in q: {in_q} out_q: {out_q}' if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP_ZEROPOINT': bits = 8 if in_q.dtype in [np.int8, np.uint8] else 16 if in_q.signed: offset = ((int(math.pow(2, bits)) + in_q.zero_point[0] - out_q.zero_point[0]) % int(math.pow(2, bits))).astype(out_q.dtype) else: offset = (int(math.pow(2, bits)) - in_q.zero_point[0] + out_q.zero_point[0]).astype(out_q.dtype) contents = np.array(list(offset.tobytes()) + ([0] * 7), dtype=np.uint8) elif qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP': # no infos needed return True elif qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP_SCALE': scale = in_q.scale / out_q.scale in_abs_zp = in_q.zero_point.astype(np.int32) out_abs_zp = out_q.zero_point.astype(np.int32) if out_q.bits > in_q.bits: zero_adjust = (np.round(-in_abs_zp * scale) + out_abs_zp).astype(np.int32) else: zero_adjust = (-in_abs_zp + np.round(out_abs_zp * 1 / scale)).astype( np.int32) zero_adjust = list(zero_adjust.tobytes()) if len(scale) > 1: raise NotImplementedError( 'multiscale conversion not supported') scale = scale[0] if in_q.dtype_bits == 8 and out_q.dtype_bits == 16: # scale Q16 * Q8 OK scale_adjust = MultMulBiasScaleQType(scale=scale, dtype=np.int16, available_bits=16) else: scale_adjust = MultMulBiasScaleQType(scale=scale, dtype=np.int8, available_bits=8) qbias = list(scale_adjust.qbiases.tobytes()) qbias = qbias + [0] * (2 - len(qbias)) qnorm = list(scale_adjust.qnorms.tobytes()) contents = np.array(zero_adjust + qbias + qnorm + [0], dtype=np.int8) elif qrec.cache['kernel_type'] == 'KOP_CONVERT_FL_FP': qbias = list((1 / out_q.scale).astype(np.float32).tobytes()) zero_adjust = list((out_q.zero_point.astype(np.int32) * out_q.scale).astype(np.float32).tobytes()) contents = np.array(zero_adjust + qbias, dtype=np.int8) elif qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FL': qbias = list((in_q.scale).astype(np.float32).tobytes()) zero_adjust = list((-in_q.zero_point.astype(np.int32)).astype( np.float32).tobytes()) contents = np.array(zero_adjust + qbias, dtype=np.int8) else: raise ValueError(f"strange dtype change in {pnode.name}") cname, file_name = gen_constant(gen, pnode, pnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=contents) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment))
def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool: names = {val: idx for idx, val in enumerate(GRUParameters.INPUT_NAMES)} scales = [] weight_zero = None for gate in ['r', 'h', 'z']: input_order = ['r', 'w'] if gate == 'h' else ['w', 'r'] for input_tensor in input_order: scale_name = f'{input_tensor}_2_{gate}_q' weight_name = f'{input_tensor}_2_{gate}_w' if weight_zero is None: weight_zero = qrec.in_qs[names[weight_name]].zero_point[0] else: assert weight_zero == qrec.in_qs[ names[weight_name]].zero_point[0] qscale = qrec.cache[scale_name] scales.append(qscale.qbiases) scales.append(qscale.qnorms) contents = interleave(*scales) cname, file_name = gen_constant(gen, pnode, pnode, "scalenorm") const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=False), contents=contents) gen.globals.append( GlobalArgInfo("uint8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=f"{node.name} scales and norms")) if node.rnn_states_as_inputs: gen.globals.append( GlobalResetArgInfo(f"{node.name}_Reset", 'AT_MEM_L2', 'AT_MEM_UNDEF')) out_q = qrec.out_qs[0] sigmoid_table = interleave(SIGMOID_TABLE & 0xff, SIGMOID_TABLE >> 8).astype(np.int8) if out_q.dtype == np.uint8: contents = np.concatenate( (sigmoid_table, np.array([-weight_zero.astype(np.int8), 0], dtype=np.int8))) else: contents = np.concatenate( (sigmoid_table, np.array([ -weight_zero.astype(np.int8), qrec.cache['gate_prenorm'] ], dtype=np.int8))) comment = (f"WZP: {weight_zero}") cname, file_name = gen_constant(gen, pnode, pnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=contents) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) if node.rnn_states_as_inputs: gen.globals.append( GlobalResetArgInfo(f"{node.name}_Reset", 'AT_MEM_L2', 'AT_MEM_UNDEF')) return True
def constant_input_globals_generator(gen, node, qrec, pnode, fnode) -> bool: del node, fnode qtype = qrec.out_qs[0] if qtype.attr.dont_generate_value: return True # the name cache will be updated when all the edges are analysed by local_generator # the name of the constant is attached to the output edge so find it out_edge = gen.G.out_edges(pnode.name)[0] eparams = out_edge.params cname = gen.naming_convension.get_edge_name(eparams.creating_node, eparams.creating_step, eparams.edge_type, eparams.edge_order) if not pnode.is_constant: # This is an initializer which may have a reset if pnode.reset_name and not next( (tc for tc in gen.globals if tc.arg_name == pnode.reset_name), None): gen.globals.append( GlobalResetArgInfo(pnode.reset_name, 'AT_MEM_L2', 'AT_MEM_UNDEF')) if pnode.is_global: home_location = gen.opts['default_input_home_location'] exec_location = gen.opts['default_input_exec_location'] gen.globals.append( InputArgInfo(qtype.ctype, cname, home_location=home_location, exec_location=exec_location, allocate=pnode.at_options.allocate, is_inout=pnode.is_mutated)) elif pnode.is_global: file_name = os.path.join(gen.opts['tensor_directory'], cname + ".tensor") value = pnode.value_as(qtype, generation=True) if qtype.attr.concatenated_nodes: values = [value] concatenated_nodes = [ gen.G[node_name] for node_name in qtype.attr.concatenated_nodes ] concated_qrecs = [ gen.G.quantization.get(NodeId(pn, None)) for pn in concatenated_nodes ] for other_node, concated_qrec in zip(concatenated_nodes, concated_qrecs): values += [ other_node.value_as(concated_qrec.out_qs[0], generation=True) ] value = np.hstack(tuple(values)) elif qtype.attr.interleaved_values: value = interleave(value, *qtype.attr.interleaved_values) if qtype.attr.resize: padding = tuple( (0, new - orig) for orig, new in zip(*qtype.attr.resize)) value = np.pad(value, padding) if qtype.attr.bit_pack and qtype.attr.bit_pack != 8: # pack value into qtype.attr.bit_pack bit items. Requires uint8 input assert value.dtype == np.uint8, "bit pack only works on uint8 datatypes" value = packbits(value, qtype.attr.bit_pack) if qtype.attr.ne16_biases: to_node = gen.G.out_edges(pnode.name)[0].to_node if isinstance(to_node, (ConvFusionParameters, LinearFusionParameters)): cnodes = to_node.contained_nodes() quants = [ gen.G.quantization[NodeId(to_node, fnode)] for fnode in cnodes ] filter_qrec = quants[0] else: filter_qrec = gen.G.quantization[NodeId(to_node)] mul_qbiases = filter_qrec.cache['mul_biases_q'].qbiases mul_qnorms = filter_qrec.cache['mul_biases_q'].qnorms value = np.where( mul_qnorms > 0, value * mul_qbiases + (1 << (mul_qnorms - 1).astype(np.int32)), value * mul_qbiases) const_info = ConstantInfo(file_name, qtype, contents=value, numeric_format="fixed") gen.globals.append( GlobalArgInfo(qtype.ctype, cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info)) return True
def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool: names = {val: idx for idx, val in enumerate(RNNParameters.INPUT_NAMES)} w_q = qrec.in_qs[names['r_2_i_w']] out_q = qrec.out_qs[0] out_scale = qrec.cache["s_2_o_q"] assert len(w_q.zero_point) == 1 assert len(out_scale.qbiases) == 1 assert len(out_scale.qnorms) == 1 if out_q.dtype == np.uint8: if qrec.cache['act_qtype']: min_val = qrec.cache['act_qtype'].quantize(-1) max_val = qrec.cache['act_qtype'].quantize(1) else: min_val = max_val = 0 contents = np.array([ min_val, max_val, (-w_q.zero_point[0]).astype(np.int8), out_q.zero_point[0], 0, out_scale.qbiases[0].astype( np.int8), out_scale.qnorms[0].astype(np.int8), 0, 0 ], dtype=np.int8) else: out_zp = out_q.zero_point[0].astype(np.uint16) contents = np.array([ 0, 0, (-w_q.zero_point[0]).astype(np.int8), out_zp & 0xff, out_zp >> 8, out_scale.qbiases[0].astype( np.int8), out_scale.qnorms[0].astype( np.int8), qrec.cache["i_2_s_q"].pre_normalization, qrec.cache["s_2_s_q"].pre_normalization ], dtype=np.int8) comment = f"A0: {1} B0: {-1}, ZP: {w_q.zero_point}, OutS: {out_scale.qbiases[0]}, OutN: {out_scale.qnorms[0]}" cname, file_name = gen_constant(gen, pnode, pnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=contents) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) state_scale = qrec.cache["s_2_s_q"] if node.rnn_same_inout_scale: contents = interleave(state_scale.qbiases, state_scale.qnorms) else: input_scale = qrec.cache["i_2_s_q"] contents = interleave(state_scale.qbiases, input_scale.qbiases, state_scale.qnorms, input_scale.qnorms) cname, file_name = gen_constant(gen, pnode, pnode, "scalenorm") const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=False), contents=contents) gen.globals.append( GlobalArgInfo("uint8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=f"{node.name} scales and norms")) if node.rnn_states_as_inputs: gen.globals.append( GlobalResetArgInfo(f"{node.name}_Reset", 'AT_MEM_L2', 'AT_MEM_UNDEF')) return True
def filter_globals_generator(gen, node, qrec, pnode, fnode) -> bool: del fnode cname = gen.naming_convension.get_global_name(pnode.name, pnode.step_idx, pnode, WEIGHTS) gen.name_cache.set(node, WEIGHTS, cname) file_name = os.path.join(gen.opts['tensor_directory'], cname + ".tensor") weights_q = qrec.weights_q contents = weights_q.quantize(node.weights).astype(weights_q.dtype, order='C', casting='no', copy=True) const_info = ConstantInfo(file_name, qrec.weights_q, contents=contents) gen.globals.append( GlobalArgInfo(qrec.weights_q.ctype, cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info)) # biases are always generated even if they are 0 if node.has_bias: biases_q = qrec.biases_q contents = biases_q.quantize(node.biases).astype(biases_q.dtype, order='C', casting='no', copy=True) else: biases_q = qrec.out_q contents = biases_q.quantize(np.zeros( (node.out_dims[0].c))).astype(biases_q.dtype, order='C', casting='no', copy=True) cname = gen.naming_convension.get_global_name(pnode.name, pnode.step_idx, pnode, BIASES) gen.name_cache.set(node, BIASES, cname) file_name = os.path.join(gen.opts['tensor_directory'], cname + ".tensor") const_info = ConstantInfo(file_name, biases_q, contents=contents) gen.globals.append( GlobalArgInfo(biases_q.ctype, cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info)) if isinstance(node, MultiplicativeBiasParameters) and node.has_mul_bias: mul_biases_q = qrec.mul_biases_q cname = gen.naming_convension.get_global_name(pnode.name, pnode.step_idx, pnode, MULSCALE) gen.name_cache.set(node, MULSCALE, cname) contents = mul_biases_q.quantize(node.mul_biases).astype( mul_biases_q.dtype, order='C', casting='no', copy=True) const_info = ConstantInfo(file_name, mul_biases_q, contents=contents) gen.globals.append( GlobalArgInfo(mul_biases_q.ctype, cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info)) return True
def constant_input_globals_generator(gen, node, qrec, pnode, fnode) -> bool: del node, fnode if not pnode.generate_value: return True # the name cache will be updated when all the edges are analysed by local_generator # the name of the constant is attached to the output edge so find it out_edge = gen.G.out_edges(pnode.name)[0] eparams = out_edge.params cname = gen.naming_convension.get_edge_name(eparams.creating_node, eparams.creating_step, eparams.edge_type, eparams.edge_order) if not pnode.is_constant: # This is an initializer which may have a reset if pnode.reset_name and not next( (tc for tc in gen.globals if tc.arg_name == pnode.reset_name), None): gen.globals.append( GlobalResetArgInfo(pnode.reset_name, 'AT_MEM_L2', 'AT_MEM_UNDEF')) if pnode.is_global: home_location = gen.opts['default_input_home_location'] exec_location = gen.opts['default_input_exec_location'] gen.globals.append( InputArgInfo(qrec.out_qs[0].ctype, cname, home_location=home_location, exec_location=exec_location, allocate=pnode.at_options.allocate, is_inout=pnode.is_mutated)) elif pnode.is_global: file_name = os.path.join(gen.opts['tensor_directory'], cname + ".tensor") value = pnode.value_as(qrec.out_qs[0], generation=True) if pnode.concated_nodes: values = [value] concated_qrecs = [ gen.G.quantization.get(NodeId(pn, None)) for pn in pnode.concated_nodes ] for other_node, concated_qrec in zip(pnode.concated_nodes, concated_qrecs): values += [ other_node.value_as(concated_qrec.out_qs[0], generation=True) ] value = np.hstack(tuple(values)) if qrec.out_qs[0].attr.ne16_order: to_node = gen.G.out_edges(pnode.name)[0].to_node if isinstance(to_node, FcParameters) or ( isinstance(to_node, ConvFusionParameters) and to_node.fusion_type == "linear_active"): value = value # value = ne16_linear_weight_layout( # value, w_bits=qrec.out_qs[0].bits) else: value = value.transpose((0, 3, 1, 2)) #if isinstance(to_node, ConvFusionParameters): # is_dw = to_node.contained_nodes()[0].is_depthwise_conv() #else: # is_dw = to_node.is_depthwise_conv() #value = ne16_conv_weight_layout( # value, w_bits=qrec.out_qs[0].bits) numeric_format = "fixed" else: if qrec.out_qs[0].attr.ne16_biases: to_node = gen.G.out_edges(pnode.name)[0].to_node if isinstance(to_node, ConvFusionParameters): cnodes = to_node.contained_nodes() quants = [ gen.G.quantization[NodeId(to_node, fnode)] for fnode in cnodes ] filter_qrec = quants[0] else: filter_qrec = gen.G.quantization[NodeId(to_node)] mul_qbiases = filter_qrec.cache['mul_biases_q'].qbiases mul_qnorms = filter_qrec.cache['mul_biases_q'].qnorms value = value * mul_qbiases + \ (1 << (mul_qnorms-1).astype(np.int32)) numeric_format = "fixed" const_info = ConstantInfo(file_name, qrec.out_qs[0], contents=value, numeric_format=numeric_format) gen.globals.append( GlobalArgInfo(qrec.out_qs[0].ctype, cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info)) return True