def get_bn(scale, bias): norm_shift, norm_mul = 15, scale return { 'norm_mul': tools.signed_to_hex(norm_mul, 24), 'norm_add': tools.signed_to_hex(bias, 32), 'norm_shift': norm_shift }
def gen_weights_code(dlayer, idx, eight_bit_mode, prefix): weights = dlayer[0]['kernel_load_cfg']['para_start_addr'] weights_data = ', '.join([ ('\n ' if i % 64 == 0 else '') + tools.signed_to_hex(item, 8 if eight_bit_mode else 16) for item, i in zip(weights, range(len(weights))) ]) para_type = 'static uint8_t' if eight_bit_mode else 'static uint16_t' return para_type + \ ' {prefix}para_start_addr_{idx}[] __attribute__((aligned(128))) = {{{data}}};'\ .format(idx=idx, data=weights_data, prefix=prefix)
def gen_act_code(dlayer, idx, prefix): act_list = dlayer[0]['kernel_calc_type_cfg']['active_addr'] active_para = ' .activate_para = {\n' + ',\n'.join([ ' {{.data = {{.shift_number={dxs}, .y_mul={dy}, .x_start={x} }}}}'.format( dxs=item['dxs'], dy=int(item['dy']), x=tools.signed_to_hex(item['x'], 36) ) for item in act_list ]) + '\n }' bias_list = [int(item['y']) for item in act_list] active_para_bias0 = ( ' .activate_para_bias0.data = {{\n .result_bias = {{{},{},{},{},{},{},{},{}}}\n }}' ).format(*(bias_list[:8])) active_para_bias1 = ( ' .activate_para_bias1.data = {{\n .result_bias = {{{},{},{},{},{},{},{},{}}}\n }}' ).format(*(bias_list[8:])) return 'static kpu_activate_table_t ' + prefix + 'active_addr_' + str(idx) + ' __attribute__((aligned(128))) = {\n' + \ ',\n'.join([active_para, active_para_bias0, active_para_bias1]) + \ '\n};'
def gen_layer_struct(klayer: k210_layer.K210Layer, idx: int): reserved = 0 set_to_zero = 0 img_ram_size = 2 * 1024 * 1024 conv_arg = klayer.conv and klayer.conv.to_k210() or default_conv_arg bn_arg = klayer.bn and klayer.bn.to_k210(conv_arg['swsx']) or default_bn_arg act_arg = klayer.act and klayer.act.to_k210(bn_arg['post_scale']) or default_act_arg pool_arg = klayer.pool and klayer.pool.to_k210() or default_pool_arg io_arg = klayer.to_k210() mino, maxo = klayer.act.min_y, klayer.act.max_y output_scale, output_bias = tools.min_max_to_scale_bias(mino, maxo) img_input_size = int(math.ceil(io_arg['i_ch_num'] / conv_arg['coef_group']) * 64 * conv_arg['channel_switch_addr']) img_output_size = int(math.ceil(io_arg['o_ch_num'] / io_arg['wb_group']) * 64 * io_arg['wb_channel_switch_addr']) assert (img_input_size + img_output_size <= img_ram_size) interrupt_enabe = { 'int_en': set_to_zero, 'ram_flag': reserved, 'full_add': set_to_zero, 'depth_wise_layer': conv_arg['depth_wise_layer'] } image_addr = { 'image_src_addr': '(uint64_t)' + hex(int((0 if not idx & 1 else (img_ram_size - img_input_size)) / 64)), 'image_dst_addr': '(uint64_t)' + hex(int((0 if idx & 1 else (img_ram_size - img_output_size)) / 64)) } image_channel_num = { 'i_ch_num': hex(io_arg['i_ch_num'] - 1), 'o_ch_num': hex(io_arg['o_ch_num'] - 1), 'o_ch_num_coef': hex(conv_arg['o_ch_num_coef'] - 1), } image_size = { 'i_row_wid': hex(conv_arg['i_row_wid'] - 1), 'i_col_high': hex(conv_arg['i_col_high'] - 1), 'o_row_wid': hex(io_arg['o_row_wid'] - 1), 'o_col_high': hex(io_arg['o_col_high'] - 1), } kernel_pool_type_cfg = { 'kernel_type': conv_arg['kernel_type'], 'pad_type': conv_arg['pad_type'], 'pool_type': pool_arg['pool_type'], 'first_stride': conv_arg['first_stride'], 'bypass_conv': 0 if klayer.conv else 1, 'load_para': bn_arg['load_para'], 'dma_burst_size': io_arg['dma_burst_size'], 'pad_value': tools.signed_to_hex(conv_arg['pad_value'], 8), 'bwsx_base_addr': bn_arg['bwsx_base_addr'], } kernel_load_cfg = { 'load_coor': conv_arg['load_coor'], 'load_time': conv_arg['load_time'] - 1, 'para_size': conv_arg['para_size'], 'para_start_addr': conv_arg['para_start_addr'], } kernel_offset = { 'coef_column_offset': set_to_zero, 'coef_row_offset': set_to_zero, } kernel_calc_type_cfg = { 'channel_switch_addr': hex(conv_arg['channel_switch_addr']), 'row_switch_addr': hex(conv_arg['row_switch_addr']), 'coef_size': reserved, 'coef_group': conv_arg['coef_group'], 'load_act': 1 if klayer.act else 0, 'active_addr': act_arg['active_addr'] } write_back_cfg = { 'wb_channel_switch_addr': hex(io_arg['wb_channel_switch_addr']), 'wb_row_switch_addr': hex(io_arg['wb_row_switch_addr']), 'wb_group': io_arg['wb_group'] } conv_value = { 'shr_w': conv_arg['shr_w'], 'shr_x': conv_arg['shr_x'], 'arg_w': tools.signed_to_hex(conv_arg['arg_w'], 24), 'arg_x': tools.signed_to_hex(conv_arg['arg_x'], 24), } conv_value2 = { 'arg_add': int(round(conv_arg['arg_add'])), } dma_parameter = { 'send_data_out': io_arg['send_data_out'], 'channel_byte_num': io_arg['channel_byte_num'] - 1, 'dma_total_byte': io_arg['dma_total_byte'] - 1, } return { 'interrupt_enabe': interrupt_enabe, 'image_addr': image_addr, 'image_channel_num': image_channel_num, 'image_size': image_size, 'kernel_pool_type_cfg': kernel_pool_type_cfg, 'kernel_load_cfg': kernel_load_cfg, 'kernel_offset': kernel_offset, 'kernel_calc_type_cfg': kernel_calc_type_cfg, 'write_back_cfg': write_back_cfg, 'conv_value': conv_value, 'conv_value2': conv_value2, 'dma_parameter': dma_parameter }, (output_scale, output_bias)