def _create_npu_op_conv2d( serial_2d_convolution: spec.Serial2DConvolution, ) -> Tuple[vapi.NpuConv2DOperation, int]: """This is a helper function to capture a list of arguments to create Vela NpuConv2DOperation object. """ has_two_weights = serial_2d_convolution.weight2.address != -1 has_two_biases = serial_2d_convolution.scale_bias2.address != -1 npu_conv2d_op = vapi.NpuConv2DOperation() npu_conv2d_op.ifm = _create_npu_feature_map(serial_2d_convolution.ifm) npu_conv2d_op.ofm = _create_npu_feature_map(serial_2d_convolution.ofm) npu_conv2d_op.kernel = _create_npu_kernel(serial_2d_convolution.kernel) npu_conv2d_op.weights = ([ _create_npu_address_range(serial_2d_convolution.weight), _create_npu_address_range(serial_2d_convolution.weight2), ] if has_two_weights else [ _create_npu_address_range(serial_2d_convolution.weight) ]) weights_zero_point = np.int64( serial_2d_convolution.weight_zero_point.value) npu_conv2d_op.biases = ([ _create_npu_address_range(serial_2d_convolution.scale_bias), _create_npu_address_range(serial_2d_convolution.scale_bias2), ] if has_two_biases else [ _create_npu_address_range(serial_2d_convolution.scale_bias) ]) npu_conv2d_op.padding = _create_npu_padding(serial_2d_convolution.padding) npu_conv2d_op.activation = _create_npu_activation( serial_2d_convolution.activation) if (npu_conv2d_op.activation and npu_conv2d_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU): _convert_clip_bounds(npu_conv2d_op) npu_conv2d_op.rounding_mode = _create_npu_rounding_mode( serial_2d_convolution.rounding_mode) npu_conv2d_op.ifm_upscale = _create_npu_resampling_mode( serial_2d_convolution.upscale) weights_shape_ohwi = [ npu_conv2d_op.ofm.shape.depth, npu_conv2d_op.kernel.height, npu_conv2d_op.kernel.width, npu_conv2d_op.ifm.shape.depth, ] npu_conv2d_op.block_traversal = vela_api.calculate_block_traversal_mode( is_depthwise=False, weights_shape_ohwi=weights_shape_ohwi, ifm_bitdepth=npu_conv2d_op.ifm.data_type.size_in_bits(), ) npu_conv2d_op.block_config = _create_npu_block_config( serial_2d_convolution.block_config) if not npu_conv2d_op.block_config: target_accel_config = vela_api.get_accelerator_config() block_config = vela_api.get_optimal_block_config( npu_conv2d_op, target_accel_config) npu_conv2d_op.block_config = block_config return npu_conv2d_op, weights_zero_point
def _create_npu_op_pooling(serial_pooling: spec.SerialPooling): pooling_type = serial_pooling.pooling_type if pooling_type == "AVG": npu_pooling_op = vapi.NpuPoolingOp.AVERAGE elif pooling_type == "MAX": npu_pooling_op = vapi.NpuPoolingOp.MAX npu_pooling_op = vapi.NpuPoolingOperation(npu_pooling_op) npu_pooling_op.ifm = _create_npu_feature_map(serial_pooling.ifm) npu_pooling_op.ofm = _create_npu_feature_map(serial_pooling.ofm) npu_pooling_op.kernel = _create_npu_kernel(serial_pooling.pool_shape) npu_pooling_op.padding = _create_npu_padding(serial_pooling.padding) npu_pooling_op.activation = _create_npu_activation( serial_pooling.activation) if (npu_pooling_op.activation and npu_pooling_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU): _convert_clip_bounds(npu_pooling_op) npu_pooling_op.rounding_mode = _create_npu_rounding_mode( serial_pooling.rounding_mode) npu_pooling_op.ifm_upscale = _create_npu_resampling_mode( serial_pooling.upscale) npu_pooling_op.block_config = _create_npu_block_config( serial_pooling.block_config) if not npu_pooling_op.block_config: target_accel_config = vela_api.get_accelerator_config() block_config = vela_api.get_optimal_block_config( npu_pooling_op, target_accel_config) npu_pooling_op.block_config = block_config return npu_pooling_op
def _create_npu_op_unary_elementwise(serial_unary_elementwise): operator_type = serial_unary_elementwise.operator_type if operator_type == "ABS": op = vapi.NpuElementWiseOp.ABS if operator_type == "CLZ": op = vapi.NpuElementWiseOp.CLZ npu_unary_elementwise_op = vapi.NpuElementWiseOperation(op) npu_unary_elementwise_op.ifm = _create_npu_feature_map( serial_unary_elementwise.ifm) npu_unary_elementwise_op.ofm = _create_npu_feature_map( serial_unary_elementwise.ofm) npu_unary_elementwise_op.activation = _create_npu_activation( serial_unary_elementwise.activation) if (npu_unary_elementwise_op.activation and npu_unary_elementwise_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU): _convert_clip_bounds(npu_unary_elementwise_op) npu_unary_elementwise_op.rounding_mode = _create_npu_rounding_mode( serial_unary_elementwise.rounding_mode) npu_unary_elementwise_op.block_config = _create_npu_block_config( serial_unary_elementwise.block_config) if not npu_unary_elementwise_op.block_config: target_accel_type = vela_api.get_accelerator_config() block_config = vela_api.get_optimal_block_config( npu_unary_elementwise_op, target_accel_type) npu_unary_elementwise_op.block_config = block_config return npu_unary_elementwise_op
def _create_npu_op_depthwise_conv2d(serial_2d_depthwise): npu_depthwise_conv2d_op = vapi.NpuConvDepthWiseOperation() npu_depthwise_conv2d_op.ifm = _create_npu_feature_map(serial_2d_depthwise.ifm) npu_depthwise_conv2d_op.ofm = _create_npu_feature_map(serial_2d_depthwise.ofm) npu_depthwise_conv2d_op.kernel = _create_npu_kernel(serial_2d_depthwise.kernel) npu_depthwise_conv2d_op.weights = [_create_npu_address_range(serial_2d_depthwise.weight)] weights_zero_point = np.int64(serial_2d_depthwise.weight_zero_point.value) npu_depthwise_conv2d_op.biases = [_create_npu_address_range(serial_2d_depthwise.scale_bias)] npu_depthwise_conv2d_op.padding = _create_npu_padding(serial_2d_depthwise.padding) npu_depthwise_conv2d_op.activation = _create_npu_activation(serial_2d_depthwise.activation) if ( npu_depthwise_conv2d_op.activation and npu_depthwise_conv2d_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU ): _convert_clip_bounds(npu_depthwise_conv2d_op) npu_depthwise_conv2d_op.rounding_mode = _create_npu_rounding_mode( serial_2d_depthwise.rounding_mode ) npu_depthwise_conv2d_op.ifm_upscale = _create_npu_resampling_mode(serial_2d_depthwise.upscale) npu_depthwise_conv2d_op.block_config = _create_npu_block_config( serial_2d_depthwise.block_config ) if not npu_depthwise_conv2d_op.block_config: target_accel_config = vela_api.get_accelerator_config() block_config = vela_api.get_optimal_block_config( npu_depthwise_conv2d_op, target_accel_config ) npu_depthwise_conv2d_op.block_config = block_config return npu_depthwise_conv2d_op, weights_zero_point
def test_force_block_config(block_config_str, expected_block_config): config = { "dev_force_block_config": block_config_str, } with tvm.transform.PassContext( config={"relay.ext.ethos-u.options": config}): block_config = vela_api.get_optimal_block_config( None, vapi.NpuAccelerator.Ethos_U55_128) assert block_config == expected_block_config
def _create_npu_op_binary_elementwise(serial_binary_elementwise: spec.SerialBinaryElementwise): operator_type = serial_binary_elementwise.operator_type if operator_type == "ADD": op = vapi.NpuElementWiseOp.ADD elif operator_type == "SUB": op = vapi.NpuElementWiseOp.SUB elif operator_type == "MUL": op = vapi.NpuElementWiseOp.MUL elif operator_type == "MIN": op = vapi.NpuElementWiseOp.MIN elif operator_type == "MAX": op = vapi.NpuElementWiseOp.MAX elif operator_type == "SHR": op = vapi.NpuElementWiseOp.SHR elif operator_type == "SHL": op = vapi.NpuElementWiseOp.SHL npu_binary_elementwise_op = vapi.NpuElementWiseOperation(op) npu_binary_elementwise_op.ifm = _create_npu_feature_map(serial_binary_elementwise.ifm) npu_binary_elementwise_op.ifm2 = _create_npu_feature_map(serial_binary_elementwise.ifm2) npu_binary_elementwise_op.ofm = _create_npu_feature_map(serial_binary_elementwise.ofm) npu_binary_elementwise_op.reversed_operands = serial_binary_elementwise.reversed_operands npu_binary_elementwise_op.activation = _create_npu_activation( serial_binary_elementwise.activation ) if ( npu_binary_elementwise_op.activation and npu_binary_elementwise_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU ): _convert_clip_bounds(npu_binary_elementwise_op) npu_binary_elementwise_op.rounding_mode = _create_npu_rounding_mode( serial_binary_elementwise.rounding_mode ) npu_binary_elementwise_op.block_config = _create_npu_block_config( serial_binary_elementwise.block_config ) if not npu_binary_elementwise_op.block_config: target_accel_config = vela_api.get_accelerator_config() block_config = vela_api.get_optimal_block_config( npu_binary_elementwise_op, target_accel_config ) npu_binary_elementwise_op.block_config = block_config return npu_binary_elementwise_op
def _create_npu_op_conv2d(serial_2d_convolution): """This is a helper function to capture a list of arguments to create Vela NpuConv2DOperation object """ npu_conv2d_op = vapi.NpuConv2DOperation() npu_conv2d_op.ifm = _create_npu_feature_map(serial_2d_convolution.ifm) npu_conv2d_op.ofm = _create_npu_feature_map(serial_2d_convolution.ofm) npu_conv2d_op.kernel = _create_npu_kernel(serial_2d_convolution.kernel) npu_conv2d_op.weights = [ _create_npu_address_range(serial_2d_convolution.weight) ] weights_zero_point = np.int64( serial_2d_convolution.weight_zero_point.value) npu_conv2d_op.biases = [ _create_npu_address_range(serial_2d_convolution.scale_bias) ] npu_conv2d_op.padding = _create_npu_padding(serial_2d_convolution.padding) npu_conv2d_op.activation = _create_npu_activation( serial_2d_convolution.activation) if (npu_conv2d_op.activation and npu_conv2d_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU): _convert_clip_bounds(npu_conv2d_op) npu_conv2d_op.upscale = _create_npu_resampling_mode( serial_2d_convolution.upscale) target_accel_type = vela_api.get_target_accel_type() # type: ignore block_config = vela_api.get_optimal_block_config(npu_conv2d_op, target_accel_type) npu_conv2d_op.block_config = block_config weights_shape_ohwi = [ npu_conv2d_op.ofm.shape.depth, npu_conv2d_op.kernel.height, npu_conv2d_op.kernel.width, npu_conv2d_op.ifm.shape.depth, ] npu_conv2d_op.block_traversal = vela_api.calculate_block_traversal_mode( is_depthwise=False, weights_shape_ohwi=weights_shape_ohwi, ifm_bitdepth=npu_conv2d_op.ifm.data_type.size_in_bits(), ) return npu_conv2d_op, weights_zero_point