def run_specify_abi(self, flags, configs, target_abi): if target_abi not in self.target_abis: six.print_('There is no device with soc: %s abi: %s' % (self.target_socs, target_abi)) return library_name = configs[YAMLKeyword.library_name] mace_lib_type = flags.mace_lib_type embed_model_data = \ configs[YAMLKeyword.model_data_format] == ModelFormat.code build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) # get target name for run if flags.example: if mace_lib_type == MACELibType.static: target_name = EXAMPLE_STATIC_NAME else: target_name = EXAMPLE_DYNAMIC_NAME else: if mace_lib_type == MACELibType.static: target_name = MACE_RUN_STATIC_NAME else: target_name = MACE_RUN_DYNAMIC_NAME link_dynamic = mace_lib_type == MACELibType.dynamic model_output_dirs = [] for model_name in configs[YAMLKeyword.models]: check_model_converted(library_name, model_name, configs[YAMLKeyword.model_graph_format], configs[YAMLKeyword.model_data_format], target_abi) if target_abi != ABIType.host: self.clear_data_dir() MaceLogger.header( StringFormatter.block('Run model {} on {}'.format( model_name, self.device_name))) model_config = configs[YAMLKeyword.models][model_name] model_runtime = model_config[YAMLKeyword.runtime] subgraphs = model_config[YAMLKeyword.subgraphs] if not configs[YAMLKeyword.target_socs] \ or target_abi == ABIType.host: model_output_base_dir, model_output_dir, mace_model_dir = \ get_build_model_dirs( library_name, model_name, target_abi, self, model_config[YAMLKeyword.model_file_path]) else: model_output_base_dir, model_output_dir, mace_model_dir = \ get_build_model_dirs( library_name, model_name, target_abi, self, model_config[YAMLKeyword.model_file_path]) # clear temp model output dir if os.path.exists(model_output_dir): sh.rm('-rf', model_output_dir) os.makedirs(model_output_dir) is_tuned = False model_opencl_output_bin_path = '' model_opencl_parameter_path = '' if not flags.address_sanitizer \ and not flags.example \ and target_abi != ABIType.host \ and configs[YAMLKeyword.target_socs] \ and self.target_socs \ and model_runtime in [RuntimeType.gpu, RuntimeType.cpu_gpu] \ and not flags.disable_tuning: self.tuning(library_name, model_name, model_config, configs[YAMLKeyword.model_graph_format], configs[YAMLKeyword.model_data_format], target_abi, mace_lib_type) model_output_dirs.append(model_output_dir) model_opencl_output_bin_path = \ '{}/{}/{}'.format(model_output_dir, BUILD_TMP_OPENCL_BIN_DIR, CL_COMPILED_BINARY_FILE_NAME) model_opencl_parameter_path = \ '{}/{}/{}'.format(model_output_dir, BUILD_TMP_OPENCL_BIN_DIR, CL_TUNED_PARAMETER_FILE_NAME) self.clear_data_dir() is_tuned = True elif target_abi != ABIType.host and self.target_socs: model_opencl_output_bin_path = get_opencl_binary_output_path( library_name, target_abi, self) model_opencl_parameter_path = get_opencl_parameter_output_path( library_name, target_abi, self) sh_commands.gen_random_input( model_output_dir, subgraphs[0][YAMLKeyword.input_tensors], subgraphs[0][YAMLKeyword.input_shapes], subgraphs[0][YAMLKeyword.validation_inputs_data], input_ranges=subgraphs[0][YAMLKeyword.input_ranges], input_data_types=subgraphs[0][YAMLKeyword.input_data_types]) runtime_list = [] if target_abi == ABIType.host: runtime_list.append(RuntimeType.cpu) elif model_runtime == RuntimeType.cpu_gpu: runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu]) else: runtime_list.append(model_runtime) for runtime in runtime_list: device_type = parse_device_type(runtime) # run for specified soc run_output = self.tuning_run( abi=target_abi, target_dir=build_tmp_binary_dir, target_name=target_name, vlog_level=flags.vlog_level, embed_model_data=embed_model_data, model_output_dir=model_output_dir, input_nodes=subgraphs[0][YAMLKeyword.input_tensors], output_nodes=subgraphs[0][YAMLKeyword.output_tensors], input_shapes=subgraphs[0][YAMLKeyword.input_shapes], output_shapes=subgraphs[0][YAMLKeyword.output_shapes], mace_model_dir=mace_model_dir, model_tag=model_name, device_type=device_type, running_round=flags.round, restart_round=flags.restart_round, limit_opencl_kernel_time=model_config[ YAMLKeyword.limit_opencl_kernel_time], tuning=False, out_of_range_check=flags.gpu_out_of_range_check, model_graph_format=configs[YAMLKeyword.model_graph_format], omp_num_threads=flags.omp_num_threads, cpu_affinity_policy=flags.cpu_affinity_policy, gpu_perf_hint=flags.gpu_perf_hint, gpu_priority_hint=flags.gpu_priority_hint, runtime_failure_ratio=flags.runtime_failure_ratio, address_sanitizer=flags.address_sanitizer, opencl_binary_file=model_opencl_output_bin_path, opencl_parameter_file=model_opencl_parameter_path, libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, link_dynamic=link_dynamic, quantize_stat=flags.quantize_stat, ) if flags.validate: model_file_path, weight_file_path = get_model_files( model_config[YAMLKeyword.model_file_path], model_config[YAMLKeyword.model_sha256_checksum], BUILD_DOWNLOADS_DIR, model_config[YAMLKeyword.weight_file_path], model_config[YAMLKeyword.weight_sha256_checksum]) validate_type = device_type if model_config[YAMLKeyword.quantize] == 1: validate_type = device_type + '_QUANTIZE' sh_commands.validate_model( abi=target_abi, device=self, model_file_path=model_file_path, weight_file_path=weight_file_path, platform=model_config[YAMLKeyword.platform], device_type=device_type, input_nodes=subgraphs[0][YAMLKeyword.input_tensors], output_nodes=subgraphs[0][YAMLKeyword.output_tensors], input_shapes=subgraphs[0][YAMLKeyword.input_shapes], output_shapes=subgraphs[0][YAMLKeyword.output_shapes], model_output_dir=model_output_dir, input_data_types=subgraphs[0][ YAMLKeyword.input_data_types], caffe_env=flags.caffe_env, validation_threshold=subgraphs[0][ YAMLKeyword.validation_threshold][validate_type], backend=subgraphs[0][YAMLKeyword.backend]) if flags.report and flags.round > 0: tuned = is_tuned and device_type == DeviceType.GPU self.report_run_statistics(target_abi=target_abi, model_name=model_name, device_type=device_type, output_dir=flags.report_dir, tuned=tuned) if model_output_dirs: opencl_output_bin_path = get_opencl_binary_output_path( library_name, target_abi, self) opencl_parameter_bin_path = get_opencl_parameter_output_path( library_name, target_abi, self) # clear opencl output dir if os.path.exists(opencl_output_bin_path): sh.rm('-rf', opencl_output_bin_path) if os.path.exists(opencl_parameter_bin_path): sh.rm('-rf', opencl_parameter_bin_path) # merge all model's opencl binaries together sh_commands.merge_opencl_binaries(model_output_dirs, CL_COMPILED_BINARY_FILE_NAME, opencl_output_bin_path) # merge all model's opencl parameter together sh_commands.merge_opencl_parameters(model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME, opencl_parameter_bin_path)
def bm_specific_target(self, flags, configs, target_abi): library_name = configs[YAMLKeyword.library_name] embed_model_data = \ configs[YAMLKeyword.model_data_format] == ModelFormat.code opencl_output_bin_path = '' opencl_parameter_path = '' link_dynamic = flags.mace_lib_type == MACELibType.dynamic if link_dynamic: bm_model_binary_name = BM_MODEL_DYNAMIC_NAME else: bm_model_binary_name = BM_MODEL_STATIC_NAME build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) if configs[YAMLKeyword.target_socs] and target_abi != ABIType.host: opencl_output_bin_path = get_opencl_binary_output_path( library_name, target_abi, self) opencl_parameter_path = get_opencl_parameter_output_path( library_name, target_abi, self) for model_name in configs[YAMLKeyword.models]: check_model_converted(library_name, model_name, configs[YAMLKeyword.model_graph_format], configs[YAMLKeyword.model_data_format], target_abi) MaceLogger.header( StringFormatter.block('Benchmark model %s on %s' % (model_name, self.device_name))) model_config = configs[YAMLKeyword.models][model_name] model_runtime = model_config[YAMLKeyword.runtime] subgraphs = model_config[YAMLKeyword.subgraphs] model_output_base_dir, model_output_dir, mace_model_dir = \ get_build_model_dirs(library_name, model_name, target_abi, self, model_config[YAMLKeyword.model_file_path]) if os.path.exists(model_output_dir): sh.rm('-rf', model_output_dir) os.makedirs(model_output_dir) if target_abi != ABIType.host: self.clear_data_dir() sh_commands.gen_random_input( model_output_dir, subgraphs[0][YAMLKeyword.input_tensors], subgraphs[0][YAMLKeyword.input_shapes], subgraphs[0][YAMLKeyword.validation_inputs_data], input_ranges=subgraphs[0][YAMLKeyword.input_ranges], input_data_types=subgraphs[0][YAMLKeyword.input_data_types]) runtime_list = [] if target_abi == ABIType.host: runtime_list.append(RuntimeType.cpu) elif model_runtime == RuntimeType.cpu_gpu: runtime_list.extend([RuntimeType.cpu, RuntimeType.cpu_gpu]) else: runtime_list.append(model_runtime) for runtime in runtime_list: device_type = parse_device_type(runtime) self.benchmark_model( abi=target_abi, benchmark_binary_dir=build_tmp_binary_dir, benchmark_binary_name=bm_model_binary_name, vlog_level=0, embed_model_data=embed_model_data, model_output_dir=model_output_dir, input_nodes=subgraphs[0][YAMLKeyword.input_tensors], output_nodes=subgraphs[0][YAMLKeyword.output_tensors], input_shapes=subgraphs[0][YAMLKeyword.input_shapes], output_shapes=subgraphs[0][YAMLKeyword.output_shapes], mace_model_dir=mace_model_dir, model_tag=model_name, device_type=device_type, model_graph_format=configs[YAMLKeyword.model_graph_format], omp_num_threads=flags.omp_num_threads, cpu_affinity_policy=flags.cpu_affinity_policy, gpu_perf_hint=flags.gpu_perf_hint, gpu_priority_hint=flags.gpu_priority_hint, opencl_binary_file=opencl_output_bin_path, opencl_parameter_file=opencl_parameter_path, libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, link_dynamic=link_dynamic)
def tuning(self, library_name, model_name, model_config, model_graph_format, model_data_format, target_abi, mace_lib_type): six.print_('* Tuning, it may take some time') build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) mace_run_name = MACE_RUN_STATIC_NAME link_dynamic = False if mace_lib_type == MACELibType.dynamic: mace_run_name = MACE_RUN_DYNAMIC_NAME link_dynamic = True embed_model_data = model_data_format == ModelFormat.code # build for specified soc # device_wrapper = DeviceWrapper(device) model_output_base_dir, model_output_dir, mace_model_dir = \ get_build_model_dirs( library_name, model_name, target_abi, self, model_config[YAMLKeyword.model_file_path]) self.clear_data_dir() subgraphs = model_config[YAMLKeyword.subgraphs] # generate input data sh_commands.gen_random_input( model_output_dir, subgraphs[0][YAMLKeyword.input_tensors], subgraphs[0][YAMLKeyword.input_shapes], subgraphs[0][YAMLKeyword.validation_inputs_data], input_ranges=subgraphs[0][YAMLKeyword.input_ranges], input_data_types=subgraphs[0][YAMLKeyword.input_data_types]) self.tuning_run( abi=target_abi, target_dir=build_tmp_binary_dir, target_name=mace_run_name, vlog_level=0, embed_model_data=embed_model_data, model_output_dir=model_output_dir, input_nodes=subgraphs[0][YAMLKeyword.input_tensors], output_nodes=subgraphs[0][YAMLKeyword.output_tensors], input_shapes=subgraphs[0][YAMLKeyword.input_shapes], output_shapes=subgraphs[0][YAMLKeyword.output_shapes], mace_model_dir=mace_model_dir, model_tag=model_name, device_type=DeviceType.GPU, running_round=0, restart_round=1, limit_opencl_kernel_time=model_config[ YAMLKeyword.limit_opencl_kernel_time], tuning=True, out_of_range_check=False, model_graph_format=model_graph_format, opencl_binary_file='', opencl_parameter_file='', libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, link_dynamic=link_dynamic, ) # pull opencl library self.pull(self.interior_dir, CL_COMPILED_BINARY_FILE_NAME, '{}/{}'.format(model_output_dir, BUILD_TMP_OPENCL_BIN_DIR)) # pull opencl parameter self.pull_from_data_dir( CL_TUNED_PARAMETER_FILE_NAME, '{}/{}'.format(model_output_dir, BUILD_TMP_OPENCL_BIN_DIR)) six.print_('Tuning done! \n')