Beispiel #1
0
def gen_scales(gen, pnode, fnode, fqrec):
    cname_mul_scale, file_name_mul_scale = gen_constant(
        gen, pnode, fnode, MULSCALE)
    cname_mul_shift, file_name_mul_shift = gen_constant(
        gen, pnode, fnode, MULSHIFT)

    mul_biases_q = fqrec.cache['mul_biases_q']

    const_info_mul_scale = ConstantInfo(file_name_mul_scale,
                                        mul_biases_q,
                                        contents=gen_mul_biases(fqrec, fnode))
    const_info_mul_shift = ConstantInfo(file_name_mul_shift,
                                        mul_biases_q.shift_qtype,
                                        contents=mul_biases_q.qnorms)

    gen.globals.append(
        GlobalArgInfo(mul_biases_q.ctype,
                      cname_mul_scale,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info_mul_scale))

    gen.globals.append(
        GlobalArgInfo(mul_biases_q.shift_ctype,
                      cname_mul_shift,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info_mul_shift))
Beispiel #2
0
def gen_ssd_globals(gen, node, qrec):
    qrec.set_scales(node)
    scores_q = qrec.in_qs[1]
    scores_scale, scores_norm = compute_mul_bias(scores_q.scale)

    cname_scales, file_name_scales = gen_constant(gen, node, node, SSD_SCALES)
    contents = np.array([qrec.scale_x_q.qbiases,
                         qrec.scale_x_anc_q.qbiases,
                         qrec.scale_y_q.qbiases,
                         qrec.scale_y_anc_q.qbiases,
                         qrec.scale_h_q.qbiases,
                         qrec.scale_w_q.qbiases,
                         qrec.scale_ao_q.qbiases,
                         scores_scale], dtype=np.int8)
    scale_info = ConstantInfo(file_name_scales, QType(bits=8, q=0, signed=True), contents=contents)

    cname_norms, file_name_norms = gen_constant(gen, node, node, SSD_NORMS)
    contents = np.array([qrec.scale_x_q.qnorms,
                         qrec.scale_x_anc_q.qnorms,
                         qrec.scale_y_q.qnorms,
                         qrec.scale_y_anc_q.qnorms,
                         qrec.scale_h_q.qnorms,
                         qrec.scale_w_q.qnorms,
                         qrec.scale_ao_q.qnorms,
                         scores_norm], dtype=np.int8)
    norms_info = ConstantInfo(file_name_norms, QType(bits=8, q=0, signed=True), contents=contents)

    score_threshold = scores_q.quantize(node.nms_score_threshold)
    cname_infos, file_name_infos = gen_constant(gen, node, node, INFOS)
    contents = np.array([round(node.nms_iou_threshold * 2**7),     # Q7
                         score_threshold,                          # Q0 [0:255]
                         node.max_detections,                      # Q0 [0:255]
                         node.max_classes_per_detection,           # Q0 [0:255]
                         node.max_bb_before_nms >> 8,
                         node.max_bb_before_nms], dtype=np.int8)   # max_bb = Infos[4]<<8 + Infos[5]
    ssd_infos = ConstantInfo(file_name_infos, QType(bits=8, q=0, signed=True), contents=contents)

    gen.globals.append(GlobalArgInfo(qrec.scale_x_q.ctype, cname_scales,
                                     gen.opts['default_global_home_location'],
                                     gen.opts['default_global_exec_location'],
                                     const_info=scale_info))

    gen.globals.append(GlobalArgInfo(qrec.scale_x_q.shift_ctype, cname_norms,
                                     gen.opts['default_global_home_location'],
                                     gen.opts['default_global_exec_location'],
                                     const_info=norms_info))

    gen.globals.append(GlobalArgInfo('uint8', cname_infos,
                                     gen.opts['default_global_home_location'],
                                     gen.opts['default_global_exec_location'],
                                     const_info=ssd_infos))
Beispiel #3
0
def rnn_infos(gen, node, qrec):
    i_state_q = qrec.in_qs[node.INPUT_NAMES.index('i_state')]

    contents = []
    comments = []

    # info for activation (scale the act input to the proper scale)
    info, comment = INFOS_FUNCS[node.activation]("f", qrec.s_2_s_q, i_state_q)
    contents.append(info)
    comments.append(comment)

    # info for input scaling (only used with non SameInputStateScale kernels)
    info, comment = scale_infos("f", getattr(qrec, "i_2_a_q"))
    contents.append(info)
    comments.append(comment)

    # info for scaling the activation out to out scale (only used for non Hard activations kernels)
    info, comment = scale_infos("f", getattr(qrec, "s_2_o_q"))
    contents.append(info)
    comments.append(comment)

    cname, file_name = gen_constant(gen, node, node, INFOS)
    const_info = ConstantInfo(file_name,
                              QType.Pow2(bits=8, q=0, signed=True),
                              contents=np.hstack(tuple(contents)))

    gen.globals.append(
        GlobalArgInfo("int8",
                      cname,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info,
                      comment=comment))
def gru_infos(gen, node, qrec):
    i_qtype = internal_qtype(qrec)
    contents = []
    comments = []
    r_to_int_scale = qrec.cache['r_WR_2_int_q'].qbiases[0]
    r_to_int_scalen = qrec.cache['r_WR_2_int_q'].qnorms[0]
    r_to_in_scale = qrec.cache['i_2_r_WR_q'].qbiases[0]
    r_to_in_scalen = qrec.cache['i_2_r_WR_q'].qnorms[0]
    z_to_int_scale = qrec.cache['z_WR_2_int_q'].qbiases[0]
    z_to_int_scalen = qrec.cache['z_WR_2_int_q'].qnorms[0]
    z_to_in_scale = qrec.cache['i_2_z_WR_q'].qbiases[0]
    z_to_in_scalen = qrec.cache['i_2_z_WR_q'].qnorms[0]
    ht_to_in_scale = qrec.cache['i_2_h_WR_q'].qbiases[0]
    ht_to_in_scalen = qrec.cache['i_2_h_WR_q'].qnorms[0]
    h_to_int_scale = qrec.cache['h_WR_2_int_q'].qbiases[0]
    h_to_int_scalen = qrec.cache['h_WR_2_int_q'].qnorms[0]

    # GRU_R_INFOS
    comments.append(str.format("r_to_int_scale: {} r_to_int_scalen: {} r_to_in_scale: {} r_to_in_scalen: {}",
                               r_to_int_scale, r_to_int_scalen, r_to_in_scale, r_to_in_scalen,))
    contents.append(np.array(
        [r_to_int_scale, r_to_int_scalen, r_to_in_scale, r_to_in_scalen], dtype=np.int8))

    # GRU_Z_INFOS
    comments.append(str.format("z_to_int_scale: {} z_to_int_scalen: {} z_to_in_scale: {} z_to_in_scalen: {}",
                               z_to_int_scale, z_to_int_scalen, z_to_in_scale, z_to_in_scalen,))
    contents.append(np.array(
        [z_to_int_scale, z_to_int_scalen, z_to_in_scale, z_to_in_scalen], dtype=np.int8))

    # GRU_HT_INFOS
    comments.append(str.format("ht_to_in_scale: {} ht_to_in_scalen: {}",
                               ht_to_in_scale, ht_to_in_scalen,))
    contents.append(np.array([ht_to_in_scale, ht_to_in_scalen], dtype=np.int8))

    # GRU_H_INFOS
    comments.append(str.format("h_to_int_scale: {} h_to_int_scalen: {}",
                               h_to_int_scale, h_to_int_scalen,))
    contents.append(np.array([h_to_int_scale, h_to_int_scalen], dtype=np.int8))

    three = i_qtype.quantize(np.array([3]))[0]
    six = i_qtype.quantize(np.array([6]))[0]
    sixth = i_qtype.quantize(np.array([1/6]))[0]

    comments.append(str.format("int_q: {} A0: {} B0: {} C0: {}",
                               i_qtype.q, six, three, sixth))
    contents.append(np.array([lowb(six), highb(six),
                              lowb(three), highb(three),
                              lowb(sixth), highb(sixth), i_qtype.q],
                             dtype=np.int8))

    cname, file_name = gen_constant(gen, node, node, INFOS)
    const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True),
                              contents=np.hstack(tuple(contents)))

    gen.globals.append(GlobalArgInfo("int8", cname,
                                     gen.opts['default_global_home_location'],
                                     gen.opts['default_global_exec_location'],
                                     const_info=const_info,
                                     comment=" ".join(comments)))
    def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool:
        if isinstance(pnode, FcParameters):
            gen_scales(gen, pnode, pnode, qrec)
            infos, infos_comment = np.array([0, 0, 0, 0, 0]), "no activation"
            fnode = pnode
            filt_q = qrec
        elif isinstance(pnode, LinearFusionParameters) and isinstance(
                fnode, FcParameters) and pnode.fusion_type == "linear_active":
            cnodes = pnode.contained_nodes()
            quants = [
                gen.G.quantization[NodeId(pnode, fnode)] for fnode in cnodes
            ]
            filt_q = quants[0]
            gen_scales(gen, pnode, cnodes[0], quants[0])
            infos, infos_comment = gen_act_infos(cnodes[1], quants[1])
        else:
            return False
        infos = np.append(infos, [0, 0, 0, 0])
        comment = str.format("BiasQ: {}", 0) + infos_comment
        infos[5] = 0  # BiasQ

        if filt_q.cache.get('ne16'):
            conv_mul_bias = filt_q.cache.get('mul_biases_q')
            prenorm = conv_mul_bias.pre_normalization if isinstance(
                conv_mul_bias, MultMulBiasScaleQType) else 0
            pad_value = np.array(filt_q.in_qs[0].zero_point).astype(np.int16)
            pad_value1 = np.bitwise_and(pad_value, 0xFF)
            pad_value2 = np.bitwise_and(pad_value, 0xFF00) >> 8
            w_offset = -np.array(filt_q.in_qs[1].zero_point).astype(np.int32)
            w_offset1 = np.bitwise_and(w_offset, 0xFF)
            w_offset2 = np.bitwise_and(w_offset, 0xFF00) >> 8
            w_offset3 = np.bitwise_and(w_offset, 0xFF0000) >> 16
            w_offset4 = np.bitwise_and(w_offset, 0xFF000000) >> 24

            infos = np.append(
                infos,
                verify_scalar([
                    prenorm if prenorm else 0, pad_value1, pad_value2,
                    w_offset1, w_offset2, w_offset3, w_offset4
                ]))

        cname, file_name = gen_constant(gen, pnode, fnode, INFOS)
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=True),
                                  contents=infos)
        gen.globals.append(
            GlobalArgInfo("int8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=comment))
        return True
Beispiel #6
0
def constant_input_globals_generator(gen, node, qrec, pnode, fnode) -> bool:
    del node, fnode
    if not pnode.generate_value:
        return True
    # the name cache will be updated when all the edges are analysed by local_generator
    # the name of the constant is attached to the output edge so find it
    out_edge = gen.G.out_edges(pnode.name)[0]
    eparams = out_edge.params
    cname = gen.naming_convension.get_edge_name(eparams.creating_node,
                                                eparams.creating_step,
                                                eparams.edge_type,
                                                eparams.edge_order)
    if not pnode.is_constant:
        # This is an initializer which may have a reset
        if pnode.reset_name and not next(
            (tc
             for tc in gen.globals if tc.arg_name == pnode.reset_name), None):
            gen.globals.append(
                GlobalResetArgInfo(pnode.reset_name, 'AT_MEM_L2',
                                   'AT_MEM_UNDEF'))
        if pnode.is_global:
            home_location = gen.opts['default_input_home_location']
            exec_location = gen.opts['default_input_exec_location']
            gen.globals.append(
                InputArgInfo(qrec.out_qs[0].ctype,
                             cname,
                             home_location=home_location,
                             exec_location=exec_location,
                             allocate=pnode.at_options.allocate,
                             is_inout=pnode.is_mutated))
    elif pnode.is_global:
        file_name = os.path.join(gen.opts['tensor_directory'],
                                 cname + ".tensor")
        value = pnode.value_as(qrec.out_qs[0])
        if pnode.concated_nodes:
            values = [value]
            concated_qrecs = [
                gen.G.quantization.get(NodeId(pn, None))
                for pn in pnode.concated_nodes
            ]
            for other_node, concated_qrec in zip(pnode.concated_nodes,
                                                 concated_qrecs):
                values += [other_node.value_as(concated_qrec.out_qs[0])]
            value = np.hstack(tuple(values))
        const_info = ConstantInfo(file_name, qrec.out_qs[0], contents=value)
        gen.globals.append(
            GlobalArgInfo(qrec.out_qs[0].ctype,
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info))
    return True
Beispiel #7
0
    def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool:
        if isinstance(pnode, MatMulOpParameters):
            mul_node = pnode
            mul_qrec = qrec
            fnode = pnode
            infos, comment = np.array([0, 0, 0, 0, 0]), "no activation"
        elif isinstance(pnode, MatMulOpFusionParameters) and isinstance(fnode, MatMulOpParameters):
            cnodes = pnode.contained_nodes()
            quants = [gen.G.quantization[NodeId(
                pnode, fnode)] for fnode in cnodes]
            mul_node = cnodes[0]
            mul_qrec = quants[0]
            infos, comment = gen_act_infos(cnodes[1], quants[1])
        else:
            return False

        if len(mul_qrec.in_qs[1].scale) > 1:
            gen_scales(gen, pnode, mul_node, mul_qrec)
            pl_scale = 0
            pl_scalen = 0
        else:
            pl_scale = mul_qrec.cache['mul_biases_q'].qbiases[0]
            pl_scalen = mul_qrec.cache['mul_biases_q'].qnorms[0]

        infos = np.append(infos, [0, 0, pl_scale, pl_scalen])

        if mul_qrec.cache.get('ne16'):
            conv_mul_bias = mul_qrec.cache.get('mul_biases_q')
            prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0
            pad_value = np.array(mul_qrec.in_qs[0].zero_point).astype(np.int16)
            pad_value1 = np.bitwise_and(pad_value, 0xFF)
            pad_value2 = np.bitwise_and(pad_value, 0xFF00) >> 8
            w_offset = -np.array(mul_qrec.in_qs[1].zero_point).astype(np.int32)
            w_offset1 = np.bitwise_and(w_offset, 0xFF)
            w_offset2 = np.bitwise_and(w_offset, 0xFF00) >> 8
            w_offset3 = np.bitwise_and(w_offset, 0xFF0000) >> 16
            w_offset4 = np.bitwise_and(w_offset, 0xFF000000) >> 24

            infos = np.append(
                infos, verify_scalar([prenorm if prenorm else 0, pad_value1, pad_value2, w_offset1, w_offset2, w_offset3, w_offset4]))

        cname, file_name = gen_constant(gen, pnode, fnode, INFOS)
        const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=infos)
        gen.globals.append(GlobalArgInfo("int8", cname,
                           gen.opts['default_global_home_location'],
                           gen.opts['default_global_exec_location'],
                           const_info=const_info,
                           comment=comment))
        return True
Beispiel #8
0
def rnn_infos(gen, node, qrec):
    i_state_q = qrec.in_qs[node.INPUT_NAMES.index('i_state')]

    contents, comment = htanh_infos("f", qrec.s_2_s_q, i_state_q)
    cname, file_name = gen_constant(gen, node, node, INFOS)
    const_info = ConstantInfo(file_name,
                              QType(bits=8, q=0, signed=True),
                              contents=contents)

    gen.globals.append(
        GlobalArgInfo("int8",
                      cname,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info,
                      comment=comment))
def lstm_infos(gen, node, qrec):
    i_qtype = internal_qtype(qrec)
    contents = []
    comments = []
    for k, v in LSTM_INFOS_ORDER.items():
        info, comment = scale_infos(k, qrec.cache["r_2_%s_q" % k])
        contents.append(info)
        comments.append(comment)
    cin_scale = qrec.cache['cell_in_q'].qbiases[0]
    cin_scalen = qrec.cache['cell_in_q'].qnorms[0]
    cout_scale = qrec.cache['cell_out_q'].qbiases[0]
    cout_scalen = qrec.cache['cell_out_q'].qnorms[0]
    out_scale = qrec.cache['state_out_q'].qbiases[0]
    out_scalen = qrec.cache['state_out_q'].qnorms[0]
    comments.append(str.format("cin_scale: {} cin_scale_n: {} cout_scale: {} cout_scale_n: {}",
                               cin_scale, cin_scalen, cout_scale, cout_scalen,))

    comments.append(str.format("out_scale: {} out_scale_n: {}",
                               out_scale, out_scalen))
    contents.append(np.array([cin_scale, cin_scalen, cout_scale, cout_scalen,
                              out_scale, out_scalen], dtype=np.int8))

    three = i_qtype.quantize(np.array([3]))[0]
    six = i_qtype.quantize(np.array([6]))[0]
    sixth = i_qtype.quantize(np.array([1/6]))[0]

    comments.append(str.format("int_q: {} A0: {} B0: {} C0: {}",
                               i_qtype.q, six, three, sixth))
    contents.append(np.array([lowb(six), highb(six),
                              lowb(three), highb(three),
                              lowb(sixth), highb(sixth), i_qtype.q],
                             dtype=np.int8))

    for k in LSTM_INFOS_ORDER.keys():
        info, comment = scale_infos(k, qrec.cache["i_2_%s_q" % k])
        contents.append(info)
        comments.append(comment)

    cname, file_name = gen_constant(gen, node, node, INFOS)
    const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True),
                              contents=np.hstack(tuple(contents)))

    gen.globals.append(GlobalArgInfo("int8", cname,
                                     gen.opts['default_global_home_location'],
                                     gen.opts['default_global_exec_location'],
                                     const_info=const_info,
                                     comment=" ".join(comments)))
def constant_input_globals_generator(gen, node, qrec, pnode, fnode) -> bool:
    del node, fnode
    # the name cache will be updated when all the edges are analysed by local_generator
    # the name of the constant is attached to the output edge so find it
    out_edge = gen.G.out_edges(pnode.name)[0]
    eparams = out_edge.params
    cname = gen.naming_convension.get_edge_name(eparams.creating_node.name,
                                                eparams.creating_step,
                                                eparams.edge_type,
                                                eparams.edge_order)
    file_name = os.path.join(gen.opts['tensor_directory'], cname + ".tensor")
    const_info = ConstantInfo(file_name,
                              qrec.out_qs[0],
                              contents=qrec.out_qs[0].quantize(node.value))
    gen.globals.append(
        GlobalArgInfo(qrec.out_qs[0].ctype,
                      cname,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info))
    return True
    def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool:
        if isinstance(pnode, (GlobalPoolingParameters, PoolingParameters,
                              GlobalSumPoolParameters)):
            compute_in_out_scale(qrec)
            infos, comment = np.array([
                qrec.cache['scale_mul_biases_q'].qbiases[0],
                qrec.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0
            ]), "no activation"
            fnode = pnode
            pool_q = qrec
        elif isinstance(pnode, ActivationFusion) and isinstance(
                fnode, (GlobalPoolingParameters, PoolingParameters)):
            cnodes = pnode.contained_nodes()
            quants = [
                gen.G.quantization[NodeId(pnode, fnode)] for fnode in cnodes
            ]
            pool_q = quants[0]
            infos, comment = gen_act_infos(cnodes[1], quants[1])
        else:
            return False
        infos = np.append(infos, [0, 0, 0, 0])
        if isinstance(fnode, GlobalSumPoolParameters):
            compute_in_out_scale(pool_q, in_idx=0, out_idx=0)
            infos[0] = 0
            infos[1] = 0
            infos[5] = pool_q.cache['scale_mul_biases_q'].qbiases[0]
            infos[6] = pool_q.cache['scale_mul_biases_q'].qnorms[0]

        cname, file_name = gen_constant(gen, pnode, fnode, INFOS)
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=True),
                                  contents=infos)
        gen.globals.append(
            GlobalArgInfo("int8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=comment))
        return True
Beispiel #12
0
def filter_globals_generator(gen, node, qrec, pnode, fnode) -> bool:
    del fnode
    if isinstance(node, MultiplicativeBiasParameters) and node.has_mul_bias:
        mul_biases_q = qrec.mul_biases_q

        cname = gen.naming_convension.get_global_name(pnode.name, pnode.step_idx,
                                                      pnode, MULSCALE)
        file_name = os.path.join(gen.opts['tensor_directory'],
                                 cname+".tensor")
        gen.name_cache.set(node, MULSCALE, cname)

        contents = mul_biases_q.quantize(node.mul_biases).astype(mul_biases_q.dtype,
                                                                 order='C',
                                                                 casting='no',
                                                                 copy=True)
        const_info = ConstantInfo(file_name, mul_biases_q, contents=contents)

        gen.globals.append(GlobalArgInfo(mul_biases_q.ctype, cname,
                                         gen.opts['default_global_home_location'],
                                         gen.opts['default_global_exec_location'],
                                         const_info=const_info))
    return True
    def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool:
        names = {
            val: idx
            for idx, val in enumerate(LSTMParameters.INPUT_NAMES)
        }
        scales = []
        weight_zero = None
        for gate in ['i', 'c', 'f', 'o']:
            for input_tensor in ['i', 'r']:
                scale_name = f'{input_tensor}_2_{gate}_q'
                weight_name = f'{input_tensor}_2_{gate}_w'
                if weight_zero is None:
                    weight_zero = qrec.in_qs[names[weight_name]].zero_point[0]
                else:
                    assert weight_zero == qrec.in_qs[
                        names[weight_name]].zero_point[0]
                w_q = qrec.in_qs[names['r_2_i_w']]
                qscale = qrec.cache[scale_name]
                scales.append(qscale.qbiases)
                scales.append(qscale.qnorms)

        contents = interleave(*scales)

        cname, file_name = gen_constant(gen, pnode, pnode, "scalenorm")
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=False),
                                  contents=contents)
        gen.globals.append(
            GlobalArgInfo("uint8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=f"{node.name} scales and norms"))
        if node.rnn_states_as_inputs:
            gen.globals.append(
                GlobalResetArgInfo(f"{node.name}_Reset", 'AT_MEM_L2',
                                   'AT_MEM_UNDEF'))

        out_q = qrec.out_qs[0]
        out_scale = qrec.cache["state_out_q"].qbiases[0]
        out_scalen = qrec.cache["state_out_q"].qnorms[0]
        cin_scale = qrec.cache["cell_in_q"].qbiases[0]
        cin_scalen = qrec.cache["cell_in_q"].qnorms[0]
        cout_scale = qrec.cache["cell_out_q"].qbiases[0]
        cout_scalen = qrec.cache["cell_out_q"].qnorms[0]
        out_zeropoint = out_q.zero_point[0]

        # define LSTM_NE16_W_ZEROPOINT   0
        # define LSTM_NE16_GATE_PRENORM  1
        # define LSTM_NE16_CIN_SCALE     (0 + LSTM_NE16_OUT_OFF)
        # define LSTM_NE16_CIN_SCALEN    (1 + LSTM_NE16_OUT_OFF)
        # define LSTM_NE16_COUT_SCALE    (2 + LSTM_NE16_OUT_OFF)
        # define LSTM_NE16_COUT_SCALEN   (3 + LSTM_NE16_OUT_OFF)
        # define LSTM_NE16_OUT_SCALE     (4 + LSTM_NE16_OUT_OFF)
        # define LSTM_NE16_OUT_SCALEN    (5 + LSTM_NE16_OUT_OFF)
        # define LSTM_NE16_OUT_ZEROPOINT (6 + LSTM_NE16_OUT_OFF)

        # define LSTM_NE16_INT_A0        (0 + LSTM_NE16_INT_OFF)
        # define LSTM_NE16_INT_B0        (1 + LSTM_NE16_INT_OFF)
        # define LSTM_NE16_INT_C0        (2 + LSTM_NE16_INT_OFF)

        sigmoid_table = interleave(SIGMOID_TABLE & 0xff,
                                   SIGMOID_TABLE >> 8).astype(np.int8)
        if out_q.dtype == np.uint8:
            # Maybe get rid of this
            if qrec.cache.get('act_qtype'):
                min_val = qrec.cache['act_qtype'].quantize(-1)
                max_val = qrec.cache['act_qtype'].quantize(1)
            else:
                min_val = max_val = 0
            contents = np.concatenate(
                (sigmoid_table,
                 np.array([
                     -weight_zero.astype(np.int8), qrec.cache['gate_prenorm'],
                     cin_scale.astype(np.int8),
                     cin_scalen.astype(np.int8),
                     cout_scale.astype(np.int8),
                     cout_scalen.astype(np.int8),
                     out_scale.astype(np.int8),
                     out_scalen.astype(np.int8),
                     out_zeropoint.astype(np.int8), 0, 0, 0, 0
                 ],
                          dtype=np.int8)))
        else:
            contents = np.concatenate(
                (sigmoid_table,
                 np.array([
                     -weight_zero.astype(np.int8),
                     qrec.cache['gate_prenorm'],
                     cin_scale.astype(np.int8),
                     cin_scalen.astype(np.int8),
                     cout_scale.astype(np.int8),
                     cout_scalen.astype(np.int8),
                     out_scale.astype(np.int8),
                     out_scalen.astype(np.int8),
                     out_zeropoint.astype(np.uint16) & 0xff,
                     out_zeropoint.astype(np.uint16) >> 8,
                 ],
                          dtype=np.int8)))

        comment = (
            f"WZP: {weight_zero}, Out: {out_scale}/{out_scalen}, Cin: {cin_scale}/{cin_scalen}"
            f"Cout: {cout_scale}/{cout_scalen}, OZP: {out_zeropoint}")
        cname, file_name = gen_constant(gen, pnode, pnode, INFOS)
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=True),
                                  contents=contents)

        gen.globals.append(
            GlobalArgInfo("int8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=comment))

        if node.rnn_states_as_inputs:
            gen.globals.append(
                GlobalResetArgInfo(f"{node.name}_Reset", 'AT_MEM_L2',
                                   'AT_MEM_UNDEF'))
        return True
Beispiel #14
0
def gen_filter_globals(gen, pnode, fnode, fqrec):
    cname, file_name = gen_constant(gen, pnode, fnode, WEIGHTS)
    weights_q = fqrec.weights_q
    const_info = ConstantInfo(file_name,
                              weights_q,
                              contents=fqrec.gen_weights(fnode, fnode.weights))

    gen.globals.append(
        GlobalArgInfo(weights_q.ctype,
                      cname,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info))

    # biases are always generated even if they are 0
    if fnode.has_bias:
        biases_q = fqrec.biases_q
        biases = fnode.biases
    else:
        biases_q = fqrec.out_qs[0]
        biases = np.zeros((fnode.out_dims[0].c))

    contents = fqrec.gen_biases(fnode, biases, fnode.weights)

    cname, file_name = gen_constant(gen, pnode, fnode, BIASES)
    const_info = ConstantInfo(file_name, biases_q, contents=contents)

    gen.globals.append(
        GlobalArgInfo(biases_q.ctype,
                      cname,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info))

    cname_mul_scale, file_name_mul_scale = gen_constant(
        gen, pnode, fnode, MULSCALE)
    cname_mul_shift, file_name_mul_shift = gen_constant(
        gen, pnode, fnode, MULSHIFT)

    mul_biases_q = fqrec.mul_biases_q

    const_info_mul_scale = ConstantInfo(file_name_mul_scale,
                                        mul_biases_q,
                                        contents=fqrec.gen_mul_biases(fnode))
    const_info_mul_shift = ConstantInfo(file_name_mul_shift,
                                        mul_biases_q.shift_qtype,
                                        contents=fqrec.mul_biases_q.qnorms)

    gen.globals.append(
        GlobalArgInfo(mul_biases_q.ctype,
                      cname_mul_scale,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info_mul_scale))

    gen.globals.append(
        GlobalArgInfo(mul_biases_q.shift_ctype,
                      cname_mul_shift,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info_mul_shift))
def mult8_infos_generator(gen, node, qrec, pnode, fnode) -> bool:
    if fnode is not None:
        return False
    # if isinstance(pnode, Conv2DParameters):
    #     for_ne16 = qrec.cache.get('ne16')
    #     in_zero_point = qrec.in_qs[0].zero_point
    #     conv_mul_bias = qrec.cache.get('mul_biases_q')
    #     prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0
    #     act_infos(gen, pnode, pnode, None, None, prenorm=prenorm, extra1=0,
    #               for_ne16=for_ne16, in_zero_point=in_zero_point)
    # elif isinstance(pnode, (GlobalPoolingParameters, PoolingParameters)):
    #     compute_in_out_scale(qrec)
    #     act_infos(gen, pnode, pnode, None, qrec)
    elif isinstance(pnode, ActivationParameters):
        act_infos(gen, pnode, pnode, pnode, gen.G.quantization[NodeId(pnode)])
    # elif isinstance(pnode, ConvFusionParameters):
    #     cnodes = node.contained_nodes()
    #     quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes]
    #     for_ne16 = any([qrec.cache.get('ne16') for qrec in quants])
    #     in_zero_point = quants[0].in_qs[0].zero_point
    #     for qrec in quants:
    #         compute_in_out_scale(qrec)
    #     if node.fusion_type.startswith('linear') or node.fusion_type.startswith('conv') or node.fusion_type.startswith('pool'):
    #         if node.fusion_type in ("pool_active"):
    #             act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1],
    #                       extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point)
    #         else:
    #             conv_mul_bias = quants[0].cache.get('mul_biases_q')
    #             prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0
    #             if node.fusion_type in ("conv_active_pool", "conv_active", "linear_active"):
    #                 act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1], prenorm=prenorm,
    #                           extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point)
    #             elif node.fusion_type == "conv_pool_active":
    #                 act_infos(gen, pnode, cnodes[0], cnodes[2], quants[2], prenorm=prenorm,
    #                           extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point)
    #             elif node.fusion_type == "conv_pool":
    #                 act_infos(gen, pnode, cnodes[0], None, None, prenorm=prenorm,
    #                           extra1=0, for_ne16=for_ne16)
    elif isinstance(pnode, MatrixMulParameters):
        compute_in_out_scale(qrec, in_idx=(0, 1), out_idx=0)
        act_infos(gen,
                  pnode,
                  pnode,
                  None,
                  None,
                  extra1=qrec.cache['scale_mul_biases_q'].qbiases[0],
                  extra2=qrec.cache['scale_mul_biases_q'].qnorms[0])
    elif isinstance(pnode, SoftMaxParameters):
        act_infos(gen, pnode, pnode, pnode, qrec)
    # elif isinstance(pnode, ActivationFusionBase):
    #     cnodes = node.contained_nodes()
    #     quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes]
    #     for qrec in quants:
    #         compute_in_out_scale(qrec)
    #     if isinstance(cnodes[0], (GlobalPoolingParameters, PoolingParameters)):
    #         act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1])
    #     else:
    #         return False
    #     return True
    elif isinstance(pnode, (MatMulOpParameters, MatMulOpFusionParameters)):
        if isinstance(pnode, MatMulOpFusionParameters):
            cnodes = node.contained_nodes()
            quants = [
                gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes
            ]
            mul_node = cnodes[0]
            mul_qrec = quants[0]
            act_node = cnodes[1]
            act_qrec = quants[1]
        else:
            mul_node = pnode
            mul_qrec = qrec
            act_node = None
            act_qrec = None

        if len(pnode.in_dims) == 3 and len(mul_qrec.in_qs[0].scale) > 1:
            gen_scales(gen, pnode, mul_node, mul_qrec)
            extra3 = 0
            extra4 = 0
        else:
            extra3 = mul_qrec.cache['mul_biases_q'].qbiases[0]
            extra4 = mul_qrec.cache['mul_biases_q'].qnorms[0]

        act_infos(gen,
                  pnode,
                  mul_node,
                  act_node,
                  act_qrec,
                  extra3=extra3,
                  extra4=extra4)
    elif isinstance(pnode, QuantizeParameters):
        in_q = qrec.in_qs[0]
        out_q = qrec.out_qs[0]
        comment = f'in q: {in_q} out_q: {out_q}'
        if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP_ZEROPOINT':
            bits = 8 if in_q.dtype == np.int8 else 16
            if in_q.signed:
                contents = ((int(math.pow(2, bits)) + in_q.zero_point[0] -
                             out_q.zero_point[0]) %
                            int(math.pow(2, bits))).astype(np.uint8)
            else:
                contents = (int(math.pow(2, bits)) - in_q.zero_point[0] +
                            out_q.zero_point[0]).astype(np.uint8)
        # if in_q.dtype == np.int8 and out_q.dtype == np.uint8:
        #     if not np.allclose(in_q.scale, out_q.scale):
        #         return False
        #     if not np.all(in_q.zero_point == (out_q.zero_point - 128)):
        #         return False
        #     contents = (
        #         (256 + in_q.zero_point[0] - out_q.zero_point[0]) % 256).astype(np.uint8)
        # elif in_q.dtype == np.uint8 and out_q.dtype == np.int8:
        #     if not np.allclose(in_q.scale, out_q.scale):
        #         return False
        #     if not np.all(in_q.zero_point == (out_q.zero_point - 128)):
        #         return False
        #     contents = (
        #         256 - in_q.zero_point[0] + out_q.zero_point[0]).astype(np.uint8)
        elif in_q.dtype == np.int8 and out_q.dtype == np.int16:
            if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP':
                return True
            raise NotImplementedError()
        elif in_q.dtype == np.int16 and out_q.dtype == np.int8:
            if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP':
                return True
            raise NotImplementedError()
        else:
            raise ValueError(f"strange dtype change in {pnode.name}")
        cname, file_name = gen_constant(gen, pnode, pnode, INFOS)
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=True),
                                  contents=contents)

        gen.globals.append(
            GlobalArgInfo("int8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=comment))
    else:
        return False
    return True
def act_infos(gen,
              pnode,
              fnode,
              act_params,
              act_q,
              extra1=0,
              extra2=0,
              extra3=0,
              extra4=0,
              extra5=None,
              extra6=None,
              prenorm=0,
              extra_name='',
              for_ne16=False,
              in_zero_point=0):
    if isinstance(pnode, FilterParameters):
        comment = str.format("BiasQ: {}", extra1)
    elif isinstance(pnode, MatrixAddParameters):
        comment = str.format(
            "In1Scale: {} In1ScaleN: {} OutScale: {} OutScaleN: {}", extra1,
            extra2, extra3, extra4)
    else:
        comment = ""

    if act_params is None:
        contents = np.array([0, 0, 0, 0, 0], dtype=np.int8)
    elif isinstance(act_params, ReluActivationParameters):
        compute_in_out_scale(act_q)
        actscale = act_q.cache['scale_mul_biases_q'].qbiases[0]
        actscalen = act_q.cache['scale_mul_biases_q'].qnorms[0]
        if act_params.upper_bound is None:  # or fnode is not None:
            if act_q.in_qs[0].zero_point == 0:
                contents = np.array([actscale, actscalen, 0, 0, 0],
                                    dtype=np.int8)
                if len(comment) == 0:
                    comment = "all 0"
            else:
                fac_1 = act_q.in_qs[0].zero_point
                contents = np.array([actscale, actscalen, fac_1, 0, 0],
                                    dtype=np.int8)
                comment += str.format(
                    "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0",
                    act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
                    fac_1[0])
        else:
            if act_q.in_qs[0].zero_point == 0:
                fac_1 = act_q.in_qs[0].quantize(act_params.upper_bound)
                contents = np.array([actscale, actscalen, fac_1, 0, 0],
                                    dtype=np.int8)
                comment += str.format(
                    "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0",
                    act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
                    fac_1[0])
            else:
                fac_1 = act_q.in_qs[0].zero_point
                fac_2 = act_q.in_qs[0].quantize(act_params.upper_bound)
                contents = np.array([actscale, actscalen, fac_1, fac_2, 0],
                                    dtype=np.int8)
                comment += str.format(
                    "in: {:05f} out: {:05f} A0: {} B0: {} C0: 0",
                    act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
                    fac_1[0], fac_2[0])
    elif isinstance(act_params, HSigmoidActivationParameters):
        # currently combines all scaling factors into one scale and shift
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        fac_1, upper_bound, _ = hsigmoid_mult_gen_factors(act_params, act_q)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0],
            fac_1[0])
    elif isinstance(act_params, HSwishActivationParameters):
        # currently combines all scaling factors into one scale and shift
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        fac_1, upper_bound, _ = hswish_mult_gen_factors(act_q)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0],
            fac_1[0])
    elif isinstance(act_params, SoftMaxParameters):
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        norm = 15 + np.ceil(np.log2(act_q.in_qs[0].scale))
        contents = np.array([norm, 0, 0, 0, 0], dtype=np.int8)
        comment += str.format("in: {:05f} out: {:05f} NORM: {}",
                              act_q.in_qs[0].scale[0],
                              act_q.out_qs[0].scale[0], int(norm[0]))
    elif isinstance(act_params, LeakyActivationParameters):
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        compute_in_out_scale(act_q)
        leak_factor_quant = leak_mult_gen_factor_q7(act_params)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant, 0,
            0
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: x C0: x",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant)
    elif isinstance(act_params,
                    (SigmoidActivationParameters, TanHActivationParameters)):
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        compute_in_out_scale(
            act_q,
            extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale /
            act_q.in_qs[0].scale)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: x B0: x C0: x",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0])
    else:
        raise NotImplementedError("activation tye not implemented")

    if isinstance(pnode, (GlobalPoolingParameters, PoolingParameters)):
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0
        ],
                            dtype=np.int8)

    contents = np.append(contents, [extra1, extra2, extra3, extra4])
    if extra5 is not None:
        contents = np.append(contents, [extra5])
    if extra6 is not None:
        contents = np.append(contents, [extra6])

    if for_ne16:
        # append weights_offset and pad_val for ne16
        # TODO - default config maybe in future
        if isinstance(pnode, (ConvFusionParameters, LinearFusionParameters)):
            filt_q = gen.G.quantization[NodeId(pnode, fnode)]
        else:
            filt_q = gen.G.quantization[NodeId(pnode)]
        pad_value = np.array(in_zero_point).astype(np.int16)
        pad_value1 = np.bitwise_and(pad_value, 0xFF)
        pad_value2 = np.bitwise_and(pad_value, 0xFF00) >> 8
        w_offset = -np.array(filt_q.in_qs[1].zero_point).astype(np.int32)
        w_offset1 = np.bitwise_and(w_offset, 0xFF)
        w_offset2 = np.bitwise_and(w_offset, 0xFF00) >> 8
        w_offset3 = np.bitwise_and(w_offset, 0xFF0000) >> 16
        w_offset4 = np.bitwise_and(w_offset, 0xFF000000) >> 24

        contents = np.append(
            contents, [[prenorm] if prenorm else [0], pad_value1, pad_value2,
                       w_offset1, w_offset2, w_offset3, w_offset4])

    cname, file_name = gen_constant(gen, pnode, fnode, INFOS, extra_name)
    const_info = ConstantInfo(file_name,
                              QType.Pow2(bits=8, q=0, signed=True),
                              contents=contents)

    gen.globals.append(
        GlobalArgInfo("int8",
                      cname,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info,
                      comment=comment))
Beispiel #17
0
def act_infos(gen,
              pnode,
              fnode,
              act_params,
              act_q,
              extra1=0,
              extra2=0,
              extra3=0,
              extra4=0):
    if isinstance(pnode, FilterParameters):
        comment = str.format("BiasQ: {}", extra1)
    elif isinstance(pnode, MatrixAddParameters):
        comment = str.format(
            "In1Scale: {} In1ScaleN: {} OutScale: {} OutScaleN: {}", extra1,
            extra2, extra3, extra4)
    else:
        comment = ""

    if act_params is None:
        contents = np.array([0, 0, 0, 0, 0, extra1, extra2, extra3, extra4],
                            dtype=np.int8)
    elif isinstance(act_params, ReluActivationParameters):
        actscale = act_q.scale_mul_biases_q.qbiases[0]
        actscalen = act_q.scale_mul_biases_q.qnorms[0]
        if act_params.upper_bound is None:  # or fnode is not None:
            contents = np.array(
                [actscale, actscalen, 0, 0, 0, extra1, extra2, extra3, extra4],
                dtype=np.int8)
            if len(comment) == 0:
                comment = "all 0"
        else:
            fac_1 = act_q.in_qs[0].quantize(act_params.upper_bound)
            contents = np.array([
                actscale, actscalen, fac_1, 0, 0, extra1, extra2, extra3,
                extra4
            ],
                                dtype=np.int8)
            comment += str.format("in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0",
                                  act_q.in_qs[0].scale[0],
                                  act_q.out_qs[0].scale[0], fac_1[0])
    elif isinstance(act_params, HSigmoidActivationParameters):
        # currently combines all scaling factors into one scale and shift
        fac_1, upper_bound, _ = hsigmoid_mult_gen_factors(act_params, act_q)
        contents = np.array([
            act_q.scale_mul_biases_q.qbiases[0],
            act_q.scale_mul_biases_q.qnorms[0], upper_bound, fac_1, 1, extra1,
            extra2, extra3, extra4
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.scale_mul_biases_q.qbiases[0],
            act_q.scale_mul_biases_q.qnorms[0], upper_bound[0], fac_1[0])
    elif isinstance(act_params, HSwishActivationParameters):
        # currently combines all scaling factors into one scale and shift
        fac_1, upper_bound, _ = hswish_mult_gen_factors(act_q)
        contents = np.array([
            act_q.scale_mul_biases_q.qbiases[0],
            act_q.scale_mul_biases_q.qnorms[0], upper_bound, fac_1, 1, extra1,
            extra2, extra3, extra4
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.scale_mul_biases_q.qbiases[0],
            act_q.scale_mul_biases_q.qnorms[0], upper_bound[0], fac_1[0])
    elif isinstance(act_params, SoftMaxParameters):
        norm = 15 + np.ceil(np.log2(act_q.in_qs[0].scale))
        contents = np.array([norm, 0, 0, 0, 0, extra1, extra2, extra3, extra4],
                            dtype=np.int8)
        comment += str.format("in: {:05f} out: {:05f} NORM: {}",
                              act_q.in_qs[0].scale[0],
                              act_q.out_qs[0].scale[0], int(norm[0]))
    elif isinstance(act_params, LeakyActivationParameters):
        act_q.set_scale()
        leak_factor_quant = leak_mult_gen_factor_q7(act_params)
        contents = np.array([
            act_q.scale_mul_biases_q.qbiases[0],
            act_q.scale_mul_biases_q.qnorms[0], leak_factor_quant, 0, 0,
            extra1, extra2, extra3, extra4
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: x C0: x",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.scale_mul_biases_q.qbiases[0],
            act_q.scale_mul_biases_q.qnorms[0], leak_factor_quant)
    else:
        raise NotImplementedError("activation tye not implemented")

    if isinstance(pnode, (GlobalPoolParameters, PoolingParameters)):
        contents = np.array([
            act_q.scale_mul_biases_q.qbiases[0],
            act_q.scale_mul_biases_q.qnorms[0], 0, 0, 0, extra1, extra2,
            extra3, extra4
        ],
                            dtype=np.int8)
        comment += str.format("in: {:05f} out: {:05f}",
                              act_q.in_qs[0].scale[0],
                              act_q.out_qs[0].scale[0])

    cname, file_name = gen_constant(gen, pnode, fnode, INFOS)
    const_info = ConstantInfo(file_name,
                              QType(bits=8, q=0, signed=True),
                              contents=contents)

    gen.globals.append(
        GlobalArgInfo("int8",
                      cname,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info,
                      comment=comment))
Beispiel #18
0
    def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool:
        if not cls.cache_values(node, qrec):
            return False
        in_q = qrec.in_qs[0]
        out_q = qrec.out_qs[0]
        comment = f'in q: {in_q} out_q: {out_q}'
        if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP_ZEROPOINT':
            bits = 8 if in_q.dtype in [np.int8, np.uint8] else 16
            if in_q.signed:
                offset = ((int(math.pow(2, bits)) + in_q.zero_point[0] -
                           out_q.zero_point[0]) %
                          int(math.pow(2, bits))).astype(out_q.dtype)
            else:
                offset = (int(math.pow(2, bits)) - in_q.zero_point[0] +
                          out_q.zero_point[0]).astype(out_q.dtype)
            contents = np.array(list(offset.tobytes()) + ([0] * 7),
                                dtype=np.uint8)
        elif qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP':
            # no infos needed
            return True
        elif qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP_SCALE':
            scale = in_q.scale / out_q.scale
            in_abs_zp = in_q.zero_point.astype(np.int32)
            out_abs_zp = out_q.zero_point.astype(np.int32)
            if out_q.bits > in_q.bits:
                zero_adjust = (np.round(-in_abs_zp * scale) +
                               out_abs_zp).astype(np.int32)
            else:
                zero_adjust = (-in_abs_zp +
                               np.round(out_abs_zp * 1 / scale)).astype(
                                   np.int32)

            zero_adjust = list(zero_adjust.tobytes())

            if len(scale) > 1:
                raise NotImplementedError(
                    'multiscale conversion not supported')
            scale = scale[0]
            if in_q.dtype_bits == 8 and out_q.dtype_bits == 16:
                # scale Q16 * Q8 OK
                scale_adjust = MultMulBiasScaleQType(scale=scale,
                                                     dtype=np.int16,
                                                     available_bits=16)
            else:
                scale_adjust = MultMulBiasScaleQType(scale=scale,
                                                     dtype=np.int8,
                                                     available_bits=8)
            qbias = list(scale_adjust.qbiases.tobytes())
            qbias = qbias + [0] * (2 - len(qbias))
            qnorm = list(scale_adjust.qnorms.tobytes())
            contents = np.array(zero_adjust + qbias + qnorm + [0],
                                dtype=np.int8)
        elif qrec.cache['kernel_type'] == 'KOP_CONVERT_FL_FP':
            qbias = list((1 / out_q.scale).astype(np.float32).tobytes())
            zero_adjust = list((out_q.zero_point.astype(np.int32) *
                                out_q.scale).astype(np.float32).tobytes())
            contents = np.array(zero_adjust + qbias, dtype=np.int8)
        elif qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FL':
            qbias = list((in_q.scale).astype(np.float32).tobytes())
            zero_adjust = list((-in_q.zero_point.astype(np.int32)).astype(
                np.float32).tobytes())
            contents = np.array(zero_adjust + qbias, dtype=np.int8)
        else:
            raise ValueError(f"strange dtype change in {pnode.name}")
        cname, file_name = gen_constant(gen, pnode, pnode, INFOS)
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=True),
                                  contents=contents)

        gen.globals.append(
            GlobalArgInfo("int8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=comment))
    def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool:
        names = {val: idx for idx, val in enumerate(GRUParameters.INPUT_NAMES)}
        scales = []
        weight_zero = None
        for gate in ['r', 'h', 'z']:
            input_order = ['r', 'w'] if gate == 'h' else ['w', 'r']
            for input_tensor in input_order:
                scale_name = f'{input_tensor}_2_{gate}_q'
                weight_name = f'{input_tensor}_2_{gate}_w'
                if weight_zero is None:
                    weight_zero = qrec.in_qs[names[weight_name]].zero_point[0]
                else:
                    assert weight_zero == qrec.in_qs[
                        names[weight_name]].zero_point[0]
                qscale = qrec.cache[scale_name]
                scales.append(qscale.qbiases)
                scales.append(qscale.qnorms)

        contents = interleave(*scales)

        cname, file_name = gen_constant(gen, pnode, pnode, "scalenorm")
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=False),
                                  contents=contents)
        gen.globals.append(
            GlobalArgInfo("uint8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=f"{node.name} scales and norms"))
        if node.rnn_states_as_inputs:
            gen.globals.append(
                GlobalResetArgInfo(f"{node.name}_Reset", 'AT_MEM_L2',
                                   'AT_MEM_UNDEF'))

        out_q = qrec.out_qs[0]

        sigmoid_table = interleave(SIGMOID_TABLE & 0xff,
                                   SIGMOID_TABLE >> 8).astype(np.int8)
        if out_q.dtype == np.uint8:
            contents = np.concatenate(
                (sigmoid_table,
                 np.array([-weight_zero.astype(np.int8), 0], dtype=np.int8)))
        else:
            contents = np.concatenate(
                (sigmoid_table,
                 np.array([
                     -weight_zero.astype(np.int8), qrec.cache['gate_prenorm']
                 ],
                          dtype=np.int8)))

        comment = (f"WZP: {weight_zero}")
        cname, file_name = gen_constant(gen, pnode, pnode, INFOS)
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=True),
                                  contents=contents)

        gen.globals.append(
            GlobalArgInfo("int8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=comment))

        if node.rnn_states_as_inputs:
            gen.globals.append(
                GlobalResetArgInfo(f"{node.name}_Reset", 'AT_MEM_L2',
                                   'AT_MEM_UNDEF'))
        return True
def constant_input_globals_generator(gen, node, qrec, pnode, fnode) -> bool:
    del node, fnode
    qtype = qrec.out_qs[0]
    if qtype.attr.dont_generate_value:
        return True
    # the name cache will be updated when all the edges are analysed by local_generator
    # the name of the constant is attached to the output edge so find it
    out_edge = gen.G.out_edges(pnode.name)[0]
    eparams = out_edge.params
    cname = gen.naming_convension.get_edge_name(eparams.creating_node,
                                                eparams.creating_step,
                                                eparams.edge_type,
                                                eparams.edge_order)
    if not pnode.is_constant:
        # This is an initializer which may have a reset
        if pnode.reset_name and not next(
            (tc
             for tc in gen.globals if tc.arg_name == pnode.reset_name), None):
            gen.globals.append(
                GlobalResetArgInfo(pnode.reset_name, 'AT_MEM_L2',
                                   'AT_MEM_UNDEF'))
        if pnode.is_global:
            home_location = gen.opts['default_input_home_location']
            exec_location = gen.opts['default_input_exec_location']
            gen.globals.append(
                InputArgInfo(qtype.ctype,
                             cname,
                             home_location=home_location,
                             exec_location=exec_location,
                             allocate=pnode.at_options.allocate,
                             is_inout=pnode.is_mutated))
    elif pnode.is_global:
        file_name = os.path.join(gen.opts['tensor_directory'],
                                 cname + ".tensor")
        value = pnode.value_as(qtype, generation=True)

        if qtype.attr.concatenated_nodes:
            values = [value]
            concatenated_nodes = [
                gen.G[node_name] for node_name in qtype.attr.concatenated_nodes
            ]
            concated_qrecs = [
                gen.G.quantization.get(NodeId(pn, None))
                for pn in concatenated_nodes
            ]
            for other_node, concated_qrec in zip(concatenated_nodes,
                                                 concated_qrecs):
                values += [
                    other_node.value_as(concated_qrec.out_qs[0],
                                        generation=True)
                ]
            value = np.hstack(tuple(values))
        elif qtype.attr.interleaved_values:
            value = interleave(value, *qtype.attr.interleaved_values)

        if qtype.attr.resize:
            padding = tuple(
                (0, new - orig) for orig, new in zip(*qtype.attr.resize))
            value = np.pad(value, padding)

        if qtype.attr.bit_pack and qtype.attr.bit_pack != 8:
            # pack value into qtype.attr.bit_pack bit items. Requires uint8 input
            assert value.dtype == np.uint8, "bit pack only works on uint8 datatypes"
            value = packbits(value, qtype.attr.bit_pack)

        if qtype.attr.ne16_biases:
            to_node = gen.G.out_edges(pnode.name)[0].to_node
            if isinstance(to_node,
                          (ConvFusionParameters, LinearFusionParameters)):
                cnodes = to_node.contained_nodes()
                quants = [
                    gen.G.quantization[NodeId(to_node, fnode)]
                    for fnode in cnodes
                ]
                filter_qrec = quants[0]
            else:
                filter_qrec = gen.G.quantization[NodeId(to_node)]
            mul_qbiases = filter_qrec.cache['mul_biases_q'].qbiases
            mul_qnorms = filter_qrec.cache['mul_biases_q'].qnorms
            value = np.where(
                mul_qnorms > 0,
                value * mul_qbiases + (1 << (mul_qnorms - 1).astype(np.int32)),
                value * mul_qbiases)

        const_info = ConstantInfo(file_name,
                                  qtype,
                                  contents=value,
                                  numeric_format="fixed")
        gen.globals.append(
            GlobalArgInfo(qtype.ctype,
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info))
    return True
Beispiel #21
0
    def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool:
        names = {val: idx for idx, val in enumerate(RNNParameters.INPUT_NAMES)}
        w_q = qrec.in_qs[names['r_2_i_w']]
        out_q = qrec.out_qs[0]
        out_scale = qrec.cache["s_2_o_q"]
        assert len(w_q.zero_point) == 1
        assert len(out_scale.qbiases) == 1
        assert len(out_scale.qnorms) == 1
        if out_q.dtype == np.uint8:
            if qrec.cache['act_qtype']:
                min_val = qrec.cache['act_qtype'].quantize(-1)
                max_val = qrec.cache['act_qtype'].quantize(1)
            else:
                min_val = max_val = 0
            contents = np.array([
                min_val, max_val, (-w_q.zero_point[0]).astype(np.int8),
                out_q.zero_point[0], 0, out_scale.qbiases[0].astype(
                    np.int8), out_scale.qnorms[0].astype(np.int8), 0, 0
            ],
                                dtype=np.int8)
        else:
            out_zp = out_q.zero_point[0].astype(np.uint16)
            contents = np.array([
                0, 0, (-w_q.zero_point[0]).astype(np.int8), out_zp & 0xff,
                out_zp >> 8, out_scale.qbiases[0].astype(
                    np.int8), out_scale.qnorms[0].astype(
                        np.int8), qrec.cache["i_2_s_q"].pre_normalization,
                qrec.cache["s_2_s_q"].pre_normalization
            ],
                                dtype=np.int8)
        comment = f"A0: {1} B0: {-1}, ZP: {w_q.zero_point}, OutS: {out_scale.qbiases[0]}, OutN: {out_scale.qnorms[0]}"

        cname, file_name = gen_constant(gen, pnode, pnode, INFOS)
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=True),
                                  contents=contents)

        gen.globals.append(
            GlobalArgInfo("int8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=comment))

        state_scale = qrec.cache["s_2_s_q"]
        if node.rnn_same_inout_scale:
            contents = interleave(state_scale.qbiases, state_scale.qnorms)
        else:
            input_scale = qrec.cache["i_2_s_q"]
            contents = interleave(state_scale.qbiases, input_scale.qbiases,
                                  state_scale.qnorms, input_scale.qnorms)

        cname, file_name = gen_constant(gen, pnode, pnode, "scalenorm")
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=False),
                                  contents=contents)
        gen.globals.append(
            GlobalArgInfo("uint8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=f"{node.name} scales and norms"))
        if node.rnn_states_as_inputs:
            gen.globals.append(
                GlobalResetArgInfo(f"{node.name}_Reset", 'AT_MEM_L2',
                                   'AT_MEM_UNDEF'))
        return True
Beispiel #22
0
def filter_globals_generator(gen, node, qrec, pnode, fnode) -> bool:
    del fnode
    cname = gen.naming_convension.get_global_name(pnode.name, pnode.step_idx,
                                                  pnode, WEIGHTS)
    gen.name_cache.set(node, WEIGHTS, cname)

    file_name = os.path.join(gen.opts['tensor_directory'], cname + ".tensor")
    weights_q = qrec.weights_q
    contents = weights_q.quantize(node.weights).astype(weights_q.dtype,
                                                       order='C',
                                                       casting='no',
                                                       copy=True)

    const_info = ConstantInfo(file_name, qrec.weights_q, contents=contents)

    gen.globals.append(
        GlobalArgInfo(qrec.weights_q.ctype,
                      cname,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info))

    # biases are always generated even if they are 0
    if node.has_bias:
        biases_q = qrec.biases_q
        contents = biases_q.quantize(node.biases).astype(biases_q.dtype,
                                                         order='C',
                                                         casting='no',
                                                         copy=True)
    else:
        biases_q = qrec.out_q
        contents = biases_q.quantize(np.zeros(
            (node.out_dims[0].c))).astype(biases_q.dtype,
                                          order='C',
                                          casting='no',
                                          copy=True)

    cname = gen.naming_convension.get_global_name(pnode.name, pnode.step_idx,
                                                  pnode, BIASES)

    gen.name_cache.set(node, BIASES, cname)
    file_name = os.path.join(gen.opts['tensor_directory'], cname + ".tensor")
    const_info = ConstantInfo(file_name, biases_q, contents=contents)

    gen.globals.append(
        GlobalArgInfo(biases_q.ctype,
                      cname,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info))

    if isinstance(node, MultiplicativeBiasParameters) and node.has_mul_bias:
        mul_biases_q = qrec.mul_biases_q

        cname = gen.naming_convension.get_global_name(pnode.name,
                                                      pnode.step_idx, pnode,
                                                      MULSCALE)
        gen.name_cache.set(node, MULSCALE, cname)

        contents = mul_biases_q.quantize(node.mul_biases).astype(
            mul_biases_q.dtype, order='C', casting='no', copy=True)
        const_info = ConstantInfo(file_name, mul_biases_q, contents=contents)

        gen.globals.append(
            GlobalArgInfo(mul_biases_q.ctype,
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info))
    return True
Beispiel #23
0
def constant_input_globals_generator(gen, node, qrec, pnode, fnode) -> bool:
    del node, fnode
    if not pnode.generate_value:
        return True
    # the name cache will be updated when all the edges are analysed by local_generator
    # the name of the constant is attached to the output edge so find it
    out_edge = gen.G.out_edges(pnode.name)[0]
    eparams = out_edge.params
    cname = gen.naming_convension.get_edge_name(eparams.creating_node,
                                                eparams.creating_step,
                                                eparams.edge_type,
                                                eparams.edge_order)
    if not pnode.is_constant:
        # This is an initializer which may have a reset
        if pnode.reset_name and not next(
            (tc
             for tc in gen.globals if tc.arg_name == pnode.reset_name), None):
            gen.globals.append(
                GlobalResetArgInfo(pnode.reset_name, 'AT_MEM_L2',
                                   'AT_MEM_UNDEF'))
        if pnode.is_global:
            home_location = gen.opts['default_input_home_location']
            exec_location = gen.opts['default_input_exec_location']
            gen.globals.append(
                InputArgInfo(qrec.out_qs[0].ctype,
                             cname,
                             home_location=home_location,
                             exec_location=exec_location,
                             allocate=pnode.at_options.allocate,
                             is_inout=pnode.is_mutated))
    elif pnode.is_global:
        file_name = os.path.join(gen.opts['tensor_directory'],
                                 cname + ".tensor")
        value = pnode.value_as(qrec.out_qs[0], generation=True)
        if pnode.concated_nodes:
            values = [value]
            concated_qrecs = [
                gen.G.quantization.get(NodeId(pn, None))
                for pn in pnode.concated_nodes
            ]
            for other_node, concated_qrec in zip(pnode.concated_nodes,
                                                 concated_qrecs):
                values += [
                    other_node.value_as(concated_qrec.out_qs[0],
                                        generation=True)
                ]
            value = np.hstack(tuple(values))

        if qrec.out_qs[0].attr.ne16_order:
            to_node = gen.G.out_edges(pnode.name)[0].to_node
            if isinstance(to_node, FcParameters) or (
                    isinstance(to_node, ConvFusionParameters)
                    and to_node.fusion_type == "linear_active"):
                value = value
                # value = ne16_linear_weight_layout(
                #     value, w_bits=qrec.out_qs[0].bits)
            else:
                value = value.transpose((0, 3, 1, 2))
                #if isinstance(to_node, ConvFusionParameters):
                #    is_dw = to_node.contained_nodes()[0].is_depthwise_conv()
                #else:
                #    is_dw = to_node.is_depthwise_conv()
                #value = ne16_conv_weight_layout(
                #    value, w_bits=qrec.out_qs[0].bits)
            numeric_format = "fixed"
        else:
            if qrec.out_qs[0].attr.ne16_biases:
                to_node = gen.G.out_edges(pnode.name)[0].to_node
                if isinstance(to_node, ConvFusionParameters):
                    cnodes = to_node.contained_nodes()
                    quants = [
                        gen.G.quantization[NodeId(to_node, fnode)]
                        for fnode in cnodes
                    ]
                    filter_qrec = quants[0]
                else:
                    filter_qrec = gen.G.quantization[NodeId(to_node)]
                mul_qbiases = filter_qrec.cache['mul_biases_q'].qbiases
                mul_qnorms = filter_qrec.cache['mul_biases_q'].qnorms
                value = value * mul_qbiases + \
                    (1 << (mul_qnorms-1).astype(np.int32))

            numeric_format = "fixed"

        const_info = ConstantInfo(file_name,
                                  qrec.out_qs[0],
                                  contents=value,
                                  numeric_format=numeric_format)
        gen.globals.append(
            GlobalArgInfo(qrec.out_qs[0].ctype,
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info))
    return True