def __init__(self, device_dict): """ init device with device dict info :type device_dict: Device :param device_dict: a key-value dict that holds the device information, which attribute has: device_name, target_abis, target_socs, system, address, username """ self._dana_util = DanaUtil() diff = set(device_dict.keys()) - set(YAMLKeyword.__dict__.keys()) if len(diff) > 0: six.print_('Wrong key detected: ') six.print_(diff) raise KeyError(str(diff)) self.__dict__.update(device_dict) if self.system == SystemType.android: self.data_dir = PHONE_DATA_DIR self.interior_dir = self.data_dir + '/interior' elif self.system == SystemType.arm_linux: try: sh.ssh('-q', '{}@{}'.format(self.username, self.address), 'exit') except sh.ErrorReturnCode as e: six.print_('device connect failed, ' 'please check your authentication') raise e self.data_dir = DEVICE_DATA_DIR self.interior_dir = self.data_dir + '/interior' elif self.system == SystemType.host: self.data_dir = DEVICE_DATA_DIR self.interior_dir = self.data_dir + '/interior'
def main(unused_args): target = FLAGS.target host_bin_path, bin_name = sh_commands.bazel_target_to_bin(target) target_abis = FLAGS.target_abis.split(',') dana_util = DanaUtil() for target_abi in target_abis: toolchain = infer_toolchain(target_abi) sh_commands.bazel_build(target, abi=target_abi, toolchain=toolchain, enable_neon=FLAGS.enable_neon, enable_quantize=FLAGS.enable_quantize, enable_bfloat16=FLAGS.enable_bfloat16, enable_fp16=FLAGS.enable_fp16, enable_rpcmem=FLAGS.enable_rpcmem, enable_hta=FLAGS.enable_hta, address_sanitizer=FLAGS.address_sanitizer, debug_mode=FLAGS.debug_mode) if FLAGS.run_target: target_devices = DeviceManager.list_devices(FLAGS.device_yml) if FLAGS.target_socs != TargetSOCTag.all and \ FLAGS.target_socs != TargetSOCTag.random: target_socs = set(FLAGS.target_socs.split(',')) target_devices = \ [dev for dev in target_devices if dev[YAMLKeyword.target_socs] in target_socs] if FLAGS.target_socs == TargetSOCTag.random: target_devices = sh_commands.choose_a_random_device( target_devices, target_abi) for dev in target_devices: if target_abi not in dev[YAMLKeyword.target_abis]: print("Skip device %s which does not support ABI %s" % (dev, target_abi)) continue device_wrapper = DeviceWrapper(dev) stdouts = device_wrapper.run( target_abi, host_bin_path, bin_name, args=FLAGS.args, opencl_profiling=True, vlog_level=FLAGS.vlog_level, out_of_range_check=True, address_sanitizer=FLAGS.address_sanitizer, simpleperf=FLAGS.simpleperf) globals()[FLAGS.stdout_processor](stdouts, dev, target_abi) if dana_util.service_available(): report_run_statistics(stdouts=stdouts, device=dev['device_name'], soc=dev['target_socs'], abi=target_abi, dana_util=dana_util)
class DeviceWrapper: allow_scheme = ('ssh', 'adb') def __init__(self, device_dict): """ init device with device dict info :type device_dict: Device :param device_dict: a key-value dict that holds the device information, which attribute has: device_name, target_abis, target_socs, system, address, username """ self._dana_util = DanaUtil() diff = set(device_dict.keys()) - set(YAMLKeyword.__dict__.keys()) if len(diff) > 0: six.print_('Wrong key detected: ') six.print_(diff) raise KeyError(str(diff)) self.__dict__.update(device_dict) if self.system == SystemType.android: self.data_dir = PHONE_DATA_DIR self.interior_dir = self.data_dir + '/interior' elif self.system == SystemType.arm_linux: try: sh.ssh('-q', '{}@{}'.format(self.username, self.address), 'exit') except sh.ErrorReturnCode as e: six.print_('device connect failed, ' 'please check your authentication') raise e self.data_dir = DEVICE_DATA_DIR self.interior_dir = self.data_dir + '/interior' elif self.system == SystemType.host: self.data_dir = DEVICE_DATA_DIR self.interior_dir = self.data_dir + '/interior' ################## # internal use # ################## def exec_command(self, command, *args, **kwargs): if self.system == SystemType.android: sh.adb('-s', self.address, 'shell', command, *args, **kwargs) elif self.system == SystemType.arm_linux: sh.ssh('{}@{}'.format(self.username, self.address), command, *args, **kwargs) ##################### # public interface # ##################### def is_lock(self): return sh_commands.is_device_locked(self.address) def lock(self): return sh_commands.device_lock(self.address) def clear_data_dir(self): if self.system == SystemType.android: sh_commands.clear_phone_data_dir(self.address, PHONE_DATA_DIR) elif self.system == SystemType.arm_linux: self.exec_command('rm -rf {}'.format(self.data_dir)) def pull_from_data_dir(self, filename, dst_path): if self.system == SystemType.android: self.pull(PHONE_DATA_DIR, filename, dst_path) elif self.system == SystemType.arm_linux: self.pull(DEVICE_DATA_DIR, filename, dst_path) def create_internal_storage_dir(self): internal_storage_dir = '{}/interior/'.format(self.data_dir) if self.system == SystemType.android: sh_commands.create_internal_storage_dir(self.address, internal_storage_dir) elif self.system == SystemType.arm_linux: self.exec_command('mkdir -p {}'.format(internal_storage_dir)) return internal_storage_dir def rm(self, file): if self.system == SystemType.android: sh.adb('-s', self.address, 'shell', 'rm', '-rf', file, _fg=True) elif self.system == SystemType.arm_linux: self.exec_command('rm -rf {}'.format(file), _fg=True) def push(self, src_path, dst_path): mace_check(os.path.exists(src_path), "Device", '{} not found'.format(src_path)) six.print_("Push %s to %s" % (src_path, dst_path)) if self.system == SystemType.android: sh_commands.adb_push(src_path, dst_path, self.address) elif self.system == SystemType.arm_linux: try: sh.scp(src_path, '{}@{}:{}'.format(self.username, self.address, dst_path)) except sh.ErrorReturnCode_1 as e: six.print_('Push Failed !', e, file=sys.stderr) raise e def pull(self, src_path, file_name, dst_path='.'): if not os.path.exists(dst_path): sh.mkdir("-p", dst_path) src_file = "%s/%s" % (src_path, file_name) dst_file = "%s/%s" % (dst_path, file_name) if os.path.exists(dst_file): sh.rm('-f', dst_file) six.print_("Pull %s to %s" % (src_file, dst_path)) if self.system == SystemType.android: sh_commands.adb_pull( src_file, dst_file, self.address) elif self.system == SystemType.arm_linux: try: sh.scp('-r', '%s@%s:%s' % (self.username, self.address, src_file), dst_file) except sh.ErrorReturnCode_1 as e: six.print_("Pull Failed !", file=sys.stderr) raise e def tuning_run(self, abi, target_dir, target_name, vlog_level, embed_model_data, model_output_dir, input_nodes, output_nodes, input_shapes, input_data_formats, output_shapes, output_data_formats, mace_model_dir, model_tag, device_type, running_round, restart_round, limit_opencl_kernel_time, opencl_queue_window_size, tuning, out_of_range_check, model_graph_format, opencl_binary_file, opencl_parameter_file, libmace_dynamic_library_path, num_threads=-1, cpu_affinity_policy=1, gpu_perf_hint=3, gpu_priority_hint=3, input_file_name='model_input', output_file_name='model_out', input_dir="", output_dir="", runtime_failure_ratio=0.0, address_sanitizer=False, link_dynamic=False, quantize_stat=False, layers_validate_file="", benchmark=False, ): six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, " "out_of_range_check=%s, num_threads=%s, " "cpu_affinity_policy=%s, gpu_perf_hint=%s, " "gpu_priority_hint=%s" % (model_tag, running_round, restart_round, str(tuning), str(out_of_range_check), num_threads, cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint)) mace_model_path = "" if model_graph_format == ModelFormat.file: mace_model_path = layers_validate_file if layers_validate_file \ else "%s/%s.pb" % (mace_model_dir, model_tag) model_data_file = "" if not embed_model_data: if self.system == SystemType.host: model_data_file = "%s/%s.data" % (mace_model_dir, model_tag) else: model_data_file = "%s/%s.data" % (self.data_dir, model_tag) if self.system == SystemType.host: libmace_dynamic_lib_path = \ os.path.dirname(libmace_dynamic_library_path) p = subprocess.Popen( [ "env", "ASAN_OPTIONS=detect_leaks=1", "LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path, "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, "MACE_LOG_TENSOR_RANGE=%d" % (1 if quantize_stat else 0), "%s/%s" % (target_dir, target_name), "--model_name=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), "--input_shape=%s" % ":".join(input_shapes), "--output_shape=%s" % ":".join(output_shapes), "--input_data_format=%s" % ",".join(input_data_formats), "--output_data_format=%s" % ",".join(output_data_formats), "--input_file=%s/%s" % (model_output_dir, input_file_name), "--output_file=%s/%s" % (model_output_dir, output_file_name), "--input_dir=%s" % input_dir, "--output_dir=%s" % output_dir, "--model_data_file=%s" % model_data_file, "--device=%s" % device_type, "--round=%s" % running_round, "--restart_round=%s" % restart_round, "--num_threads=%s" % num_threads, "--cpu_affinity_policy=%s" % cpu_affinity_policy, "--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_priority_hint=%s" % gpu_priority_hint, "--model_file=%s" % mace_model_path, ], stderr=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True) out, err = p.communicate() self.stdout = err + out six.print_(self.stdout) six.print_("Running finished!\n") elif self.system in [SystemType.android, SystemType.arm_linux]: self.rm(self.data_dir) self.exec_command('mkdir -p {}'.format(self.data_dir)) internal_storage_dir = self.create_internal_storage_dir() for input_name in input_nodes: formatted_name = common.formatted_file_name(input_file_name, input_name) self.push("%s/%s" % (model_output_dir, formatted_name), self.data_dir) if self.system == SystemType.android and address_sanitizer: self.push(sh_commands.find_asan_rt_library(abi), self.data_dir) if not embed_model_data: model_data_path = "%s/%s.data" % (mace_model_dir, model_tag) mace_check(os.path.exists(model_data_path), "Device", 'model data file not found,' ' please convert model first') self.push(model_data_path, self.data_dir) if device_type == common.DeviceType.GPU: if os.path.exists(opencl_binary_file): self.push(opencl_binary_file, self.interior_dir) if os.path.exists(opencl_parameter_file): self.push(opencl_parameter_file, self.data_dir) if self.system == SystemType.android \ and device_type == common.DeviceType.HEXAGON: self.push( "third_party/nnlib/%s/libhexagon_controller.so" % abi, self.data_dir) if device_type == common.DeviceType.APU: self.push("third_party/apu/libapu-frontend.so", self.data_dir) mace_model_phone_path = "" if model_graph_format == ModelFormat.file: mace_model_phone_path = "%s/%s.pb" % (self.data_dir, model_tag) self.push(mace_model_path, mace_model_phone_path) if link_dynamic: self.push(libmace_dynamic_library_path, self.data_dir) if self.system == SystemType.android: sh_commands.push_depended_so_libs( libmace_dynamic_library_path, abi, self.data_dir, self.address) self.push("%s/%s" % (target_dir, target_name), self.data_dir) stdout_buff = [] process_output = sh_commands.make_output_processor(stdout_buff) cmd = [ "LD_LIBRARY_PATH=%s" % self.data_dir, "MACE_TUNING=%s" % int(tuning), "MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check), "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % self.data_dir, "MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir, "MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time, "MACE_OPENCL_QUEUE_WINDOW_SIZE=%s" % opencl_queue_window_size, "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, "MACE_LOG_TENSOR_RANGE=%d" % (1 if quantize_stat else 0), ] if self.system == SystemType.android and address_sanitizer: cmd.extend([ "LD_PRELOAD=%s/%s" % (self.data_dir, sh_commands.asan_rt_library_names(abi)) ]) cmd.extend([ "%s/%s" % (self.data_dir, target_name), "--model_name=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), "--input_shape=%s" % ":".join(input_shapes), "--output_shape=%s" % ":".join(output_shapes), "--input_data_format=%s" % ",".join(input_data_formats), "--output_data_format=%s" % ",".join(output_data_formats), "--input_file=%s/%s" % (self.data_dir, input_file_name), "--output_file=%s/%s" % (self.data_dir, output_file_name), "--input_dir=%s" % input_dir, "--output_dir=%s" % output_dir, "--model_data_file=%s" % model_data_file, "--device=%s" % device_type, "--round=%s" % running_round, "--restart_round=%s" % restart_round, "--num_threads=%s" % num_threads, "--cpu_affinity_policy=%s" % cpu_affinity_policy, "--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_priority_hint=%s" % gpu_priority_hint, "--model_file=%s" % mace_model_phone_path, "--opencl_binary_file=%s/%s" % (self.data_dir, os.path.basename(opencl_binary_file)), "--opencl_parameter_file=%s/%s" % (self.data_dir, os.path.basename(opencl_parameter_file)), ]) if benchmark: cmd.append("--benchmark=%s" % benchmark) cmd = ' '.join(cmd) cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag, str(time.time())) cmd_file = "%s/%s" % (self.data_dir, cmd_file_name) tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name) with open(tmp_cmd_file, 'w') as file: file.write(cmd) self.push(tmp_cmd_file, cmd_file) os.remove(tmp_cmd_file) self.exec_command('sh {}'.format(cmd_file), _tty_in=True, _out=process_output, _err_to_out=True) self.stdout = "".join(stdout_buff) if not sh_commands.stdout_success(self.stdout): common.MaceLogger.error("Mace Run", "Mace run failed.") six.print_("Running finished!\n") else: six.print_('Unsupported system %s' % self.system, file=sys.stderr) raise Exception('Wrong device') return self.stdout def tuning(self, library_name, model_name, model_config, model_graph_format, model_data_format, target_abi, mace_lib_type): six.print_('* Tuning, it may take some time') build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) mace_run_name = MACE_RUN_STATIC_NAME link_dynamic = False if mace_lib_type == MACELibType.dynamic: mace_run_name = MACE_RUN_DYNAMIC_NAME link_dynamic = True embed_model_data = model_data_format == ModelFormat.code # build for specified soc # device_wrapper = DeviceWrapper(device) model_output_base_dir, model_output_dir, mace_model_dir = \ get_build_model_dirs( library_name, model_name, target_abi, self, model_config[YAMLKeyword.model_file_path]) self.clear_data_dir() subgraphs = model_config[YAMLKeyword.subgraphs] # generate input data sh_commands.gen_input( model_output_dir, subgraphs[0][YAMLKeyword.input_tensors], subgraphs[0][YAMLKeyword.input_shapes], subgraphs[0][YAMLKeyword.validation_inputs_data], input_ranges=subgraphs[0][YAMLKeyword.input_ranges], input_data_types=subgraphs[0][YAMLKeyword.input_data_types] ) self.tuning_run( abi=target_abi, target_dir=build_tmp_binary_dir, target_name=mace_run_name, vlog_level=0, embed_model_data=embed_model_data, model_output_dir=model_output_dir, input_nodes=subgraphs[0][YAMLKeyword.input_tensors], output_nodes=subgraphs[0][YAMLKeyword.output_tensors], input_shapes=subgraphs[0][YAMLKeyword.input_shapes], output_shapes=subgraphs[0][YAMLKeyword.output_shapes], input_data_formats=subgraphs[0][YAMLKeyword.input_data_formats], output_data_formats=subgraphs[0][YAMLKeyword.output_data_formats], mace_model_dir=mace_model_dir, model_tag=model_name, device_type=DeviceType.GPU, running_round=0, restart_round=1, limit_opencl_kernel_time=model_config[ YAMLKeyword.limit_opencl_kernel_time], opencl_queue_window_size=model_config[ YAMLKeyword.opencl_queue_window_size], tuning=True, out_of_range_check=False, model_graph_format=model_graph_format, opencl_binary_file='', opencl_parameter_file='', libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, link_dynamic=link_dynamic, ) # pull opencl library self.pull(self.interior_dir, CL_COMPILED_BINARY_FILE_NAME, '{}/{}'.format(model_output_dir, BUILD_TMP_OPENCL_BIN_DIR)) # pull opencl parameter self.pull_from_data_dir(CL_TUNED_PARAMETER_FILE_NAME, '{}/{}'.format(model_output_dir, BUILD_TMP_OPENCL_BIN_DIR)) six.print_('Tuning done! \n') @staticmethod def get_layers(model_dir, model_name, layers): model_file = "%s/%s.pb" % (model_dir, model_name) output_dir = "%s/output_models/" % model_dir if os.path.exists(output_dir): sh.rm('-rf', output_dir) os.makedirs(output_dir) layers_validate.convert(model_file, output_dir, layers) output_configs_path = output_dir + "outputs.yml" with open(output_configs_path) as f: output_configs = yaml.load(f) output_configs = output_configs[YAMLKeyword.subgraphs] return output_configs def run_model(self, flags, configs, target_abi, model_name, output_config, runtime, tuning): library_name = configs[YAMLKeyword.library_name] embed_model_data = \ configs[YAMLKeyword.model_data_format] == ModelFormat.code build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) # get target name for run mace_lib_type = flags.mace_lib_type if mace_lib_type == MACELibType.static: target_name = MACE_RUN_STATIC_NAME else: target_name = MACE_RUN_DYNAMIC_NAME link_dynamic = mace_lib_type == MACELibType.dynamic if target_abi != ABIType.host: self.clear_data_dir() model_config = configs[YAMLKeyword.models][model_name] subgraphs = model_config[YAMLKeyword.subgraphs] model_output_base_dir, model_output_dir, mace_model_dir = \ get_build_model_dirs( library_name, model_name, target_abi, self, model_config[YAMLKeyword.model_file_path]) model_opencl_output_bin_path = '' model_opencl_parameter_path = '' if tuning: model_opencl_output_bin_path = \ '{}/{}/{}'.format(model_output_dir, BUILD_TMP_OPENCL_BIN_DIR, CL_COMPILED_BINARY_FILE_NAME) model_opencl_parameter_path = \ '{}/{}/{}'.format(model_output_dir, BUILD_TMP_OPENCL_BIN_DIR, CL_TUNED_PARAMETER_FILE_NAME) elif target_abi != ABIType.host and self.target_socs: model_opencl_output_bin_path = get_opencl_binary_output_path( library_name, target_abi, self ) model_opencl_parameter_path = get_opencl_parameter_output_path( library_name, target_abi, self ) # run for specified soc device_type = parse_device_type(runtime) self.tuning_run( abi=target_abi, target_dir=build_tmp_binary_dir, target_name=target_name, vlog_level=flags.vlog_level, embed_model_data=embed_model_data, model_output_dir=model_output_dir, input_nodes=subgraphs[0][YAMLKeyword.input_tensors], output_nodes=output_config[ YAMLKeyword.output_tensors], input_shapes=subgraphs[0][YAMLKeyword.input_shapes], output_shapes=output_config[YAMLKeyword.output_shapes], input_data_formats=subgraphs[0][ YAMLKeyword.input_data_formats], output_data_formats=subgraphs[0][ YAMLKeyword.output_data_formats], mace_model_dir=mace_model_dir, model_tag=model_name, device_type=device_type, running_round=flags.round, restart_round=flags.restart_round, limit_opencl_kernel_time=model_config[ YAMLKeyword.limit_opencl_kernel_time], opencl_queue_window_size=model_config[ YAMLKeyword.opencl_queue_window_size], tuning=False, out_of_range_check=flags.gpu_out_of_range_check, model_graph_format=configs[ YAMLKeyword.model_graph_format], num_threads=flags.num_threads, cpu_affinity_policy=flags.cpu_affinity_policy, gpu_perf_hint=flags.gpu_perf_hint, gpu_priority_hint=flags.gpu_priority_hint, runtime_failure_ratio=flags.runtime_failure_ratio, address_sanitizer=flags.address_sanitizer, opencl_binary_file=model_opencl_output_bin_path, opencl_parameter_file=model_opencl_parameter_path, libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, link_dynamic=link_dynamic, quantize_stat=flags.quantize_stat, input_dir=flags.input_dir, output_dir=flags.output_dir, layers_validate_file=output_config[ YAMLKeyword.model_file_path], benchmark=flags.benchmark, ) def get_output_map(self, target_abi, output_nodes, output_shapes, model_output_dir): output_map = {} for i in range(len(output_nodes)): output_name = output_nodes[i] formatted_name = common.formatted_file_name( "model_out", output_name) if target_abi != "host": if os.path.exists("%s/%s" % (model_output_dir, formatted_name)): sh.rm("-rf", "%s/%s" % (model_output_dir, formatted_name)) self.pull_from_data_dir(formatted_name, model_output_dir) output_file_path = os.path.join(model_output_dir, formatted_name) output_shape = [ int(x) for x in common.split_shape(output_shapes[i])] output_map[output_name] = np.fromfile( output_file_path, dtype=np.float32).reshape(output_shape) return output_map def run_specify_abi(self, flags, configs, target_abi): if target_abi not in self.target_abis: six.print_('The device %s with soc %s do not support the abi %s' % (self.device_name, self.target_socs, target_abi)) return library_name = configs[YAMLKeyword.library_name] model_output_dirs = [] for model_name in configs[YAMLKeyword.models]: check_model_converted(library_name, model_name, configs[YAMLKeyword.model_graph_format], configs[YAMLKeyword.model_data_format], target_abi) if target_abi != ABIType.host: self.clear_data_dir() MaceLogger.header( StringFormatter.block( 'Run model {} on {}'.format(model_name, self.device_name))) model_config = configs[YAMLKeyword.models][model_name] model_runtime = model_config[YAMLKeyword.runtime] subgraphs = model_config[YAMLKeyword.subgraphs] model_output_base_dir, model_output_dir, mace_model_dir = \ get_build_model_dirs( library_name, model_name, target_abi, self, model_config[YAMLKeyword.model_file_path]) # clear temp model output dir if os.path.exists(model_output_dir): sh.rm('-rf', model_output_dir) os.makedirs(model_output_dir) tuning = False if not flags.address_sanitizer \ and target_abi != ABIType.host \ and (configs[YAMLKeyword.target_socs] or flags.target_socs) \ and self.target_socs \ and model_runtime in [RuntimeType.gpu, RuntimeType.cpu_gpu] \ and not flags.disable_tuning: self.tuning(library_name, model_name, model_config, configs[YAMLKeyword.model_graph_format], configs[YAMLKeyword.model_data_format], target_abi, flags.mace_lib_type) model_output_dirs.append(model_output_dir) self.clear_data_dir() tuning = True accuracy_validation_script = \ subgraphs[0][YAMLKeyword.accuracy_validation_script] output_configs = [] if not accuracy_validation_script and flags.layers != "-1": mace_check(configs[YAMLKeyword.model_graph_format] == ModelFormat.file and configs[YAMLKeyword.model_data_format] == ModelFormat.file, "Device", "'--layers' only supports model format 'file'.") output_configs = self.get_layers(mace_model_dir, model_name, flags.layers) # run for specified soc if not subgraphs[0][YAMLKeyword.check_tensors]: output_nodes = subgraphs[0][YAMLKeyword.output_tensors] output_shapes = subgraphs[0][YAMLKeyword.output_shapes] else: output_nodes = subgraphs[0][YAMLKeyword.check_tensors] output_shapes = subgraphs[0][YAMLKeyword.check_shapes] model_path = "%s/%s.pb" % (mace_model_dir, model_name) output_config = {YAMLKeyword.model_file_path: model_path, YAMLKeyword.output_tensors: output_nodes, YAMLKeyword.output_shapes: output_shapes} output_configs.append(output_config) runtime_list = [] if target_abi == ABIType.host: runtime_list.append(RuntimeType.cpu) elif model_runtime == RuntimeType.cpu_gpu: runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu]) else: runtime_list.append(model_runtime) if accuracy_validation_script: flags.validate = False flags.report = False import imp accuracy_val_module = imp.load_source( 'accuracy_val_module', accuracy_validation_script) for runtime in runtime_list: accuracy_validator = \ accuracy_val_module.AccuracyValidator() sample_size = accuracy_validator.sample_size() val_batch_size = accuracy_validator.batch_size() for i in range(0, sample_size, val_batch_size): inputs = accuracy_validator.preprocess( i, i + val_batch_size) sh_commands.gen_input( model_output_dir, subgraphs[0][YAMLKeyword.input_tensors], subgraphs[0][YAMLKeyword.input_shapes], input_data_types=subgraphs[0][YAMLKeyword.input_data_types], # noqa input_data_map=inputs) self.run_model(flags, configs, target_abi, model_name, output_configs[-1], runtime, tuning) accuracy_validator.postprocess( i, i + val_batch_size, self.get_output_map( target_abi, output_nodes, subgraphs[0][YAMLKeyword.output_shapes], model_output_dir)) accuracy_validator.result() else: sh_commands.gen_input( model_output_dir, subgraphs[0][YAMLKeyword.input_tensors], subgraphs[0][YAMLKeyword.input_shapes], subgraphs[0][YAMLKeyword.validation_inputs_data], input_ranges=subgraphs[0][YAMLKeyword.input_ranges], input_data_types=subgraphs[0][YAMLKeyword.input_data_types] ) for runtime in runtime_list: device_type = parse_device_type(runtime) log_dir = mace_model_dir + "/" + runtime if os.path.exists(log_dir): sh.rm('-rf', log_dir) os.makedirs(log_dir) for output_config in output_configs: self.run_model(flags, configs, target_abi, model_name, output_config, runtime, tuning) if flags.validate: log_file = "" if flags.layers != "-1": log_file = log_dir + "/log.csv" model_file_path, weight_file_path = \ get_model_files( model_config[YAMLKeyword.model_file_path], model_config[ YAMLKeyword.model_sha256_checksum], BUILD_DOWNLOADS_DIR, model_config[YAMLKeyword.weight_file_path], model_config[ YAMLKeyword.weight_sha256_checksum]) validate_type = device_type if device_type in [DeviceType.CPU, DeviceType.GPU] and \ (model_config[YAMLKeyword.quantize] == 1 or model_config[YAMLKeyword.quantize_large_weights] == 1): # noqa validate_type = DeviceType.QUANTIZE dockerfile_path, docker_image_tag = \ get_dockerfile_info( model_config.get( YAMLKeyword.dockerfile_path), model_config.get( YAMLKeyword.dockerfile_sha256_checksum), # noqa model_config.get( YAMLKeyword.docker_image_tag) ) if YAMLKeyword.dockerfile_path \ in model_config \ else ("third_party/caffe", "lastest") sh_commands.validate_model( abi=target_abi, device=self, model_file_path=model_file_path, weight_file_path=weight_file_path, docker_image_tag=docker_image_tag, dockerfile_path=dockerfile_path, platform=model_config[YAMLKeyword.platform], device_type=device_type, input_nodes=subgraphs[0][ YAMLKeyword.input_tensors], output_nodes=output_config[ YAMLKeyword.output_tensors], input_shapes=subgraphs[0][ YAMLKeyword.input_shapes], output_shapes=output_config[ YAMLKeyword.output_shapes], input_data_formats=subgraphs[0][ YAMLKeyword.input_data_formats], output_data_formats=subgraphs[0][ YAMLKeyword.output_data_formats], model_output_dir=model_output_dir, input_data_types=subgraphs[0][ YAMLKeyword.input_data_types], caffe_env=flags.caffe_env, validation_threshold=subgraphs[0][ YAMLKeyword.validation_threshold][ validate_type], backend=subgraphs[0][YAMLKeyword.backend], validation_outputs_data=subgraphs[0][ YAMLKeyword.validation_outputs_data], log_file=log_file, ) if flags.round > 0: tuned = tuning and device_type == DeviceType.GPU self.report_run_statistics( target_abi=target_abi, model_name=model_name, device_type=device_type, flags=flags, tuned=tuned) if model_output_dirs: opencl_output_bin_path = get_opencl_binary_output_path( library_name, target_abi, self ) opencl_parameter_bin_path = get_opencl_parameter_output_path( library_name, target_abi, self ) # clear opencl output dir if os.path.exists(opencl_output_bin_path): sh.rm('-rf', opencl_output_bin_path) if os.path.exists(opencl_parameter_bin_path): sh.rm('-rf', opencl_parameter_bin_path) # merge all model's opencl binaries together sh_commands.merge_opencl_binaries( model_output_dirs, CL_COMPILED_BINARY_FILE_NAME, opencl_output_bin_path ) # merge all model's opencl parameter together sh_commands.merge_opencl_parameters( model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME, opencl_parameter_bin_path ) sh_commands.gen_opencl_binary_cpps( opencl_output_bin_path, opencl_parameter_bin_path, opencl_output_bin_path + '.cc', opencl_parameter_bin_path + '.cc') def report_run_statistics(self, target_abi, model_name, device_type, flags, tuned): metrics = [0] * 3 for line in self.stdout.split('\n'): line = line.strip() parts = line.split() if len(parts) == 5 and parts[0].startswith('time'): metrics[0] = str(float(parts[2])) metrics[1] = str(float(parts[3])) metrics[2] = str(float(parts[4])) break if flags.report: report_filename = flags.report_dir + '/report.csv' if not os.path.exists(report_filename): with open(report_filename, 'w') as f: f.write('model_name,device_name,soc,abi,runtime,' 'init(ms),warmup(ms),run_avg(ms),tuned\n') data_str = \ '{model_name},{device_name},{soc},{abi},{device_type},' \ '{init},{warmup},{run_avg},{tuned}\n'.format( model_name=model_name, device_name=self.device_name, soc=self.target_socs, abi=target_abi, device_type=device_type, init=metrics[0], warmup=metrics[1], run_avg=metrics[2], tuned=tuned) with open(report_filename, 'a') as f: f.write(data_str) self.report_to_dana(model_name, target_abi, device_type, { 'init': metrics[0], 'warmup': metrics[1], 'run-avg': metrics[2] }) def report_to_dana(self, model_name, target_abi, device_type, metrics): if not self._dana_util.service_available(): return for (key, value) in metrics.items(): value = float(value) self._dana_util.fast_report_benchmark( table_name="Models", base_name="%s_%s" % (key, model_name), device_name=self.device_name, soc=self.target_socs, target_abi=target_abi, runtime=device_type, value=value, trend=DanaTrend.SMALLER) def run(self, abi, host_bin_path, bin_name, args='', opencl_profiling=True, vlog_level=0, out_of_range_check=True, address_sanitizer=False, simpleperf=False): host_bin_full_path = '%s/%s' % (host_bin_path, bin_name) device_bin_full_path = '%s/%s' % (self.data_dir, bin_name) print( '================================================================' ) print('Trying to lock device %s' % self.address) with self.lock(): print('Run on device: %s, %s, %s' % (self.address, self.target_socs, self.device_name)) self.rm(self.data_dir) self.exec_command('mkdir -p %s' % self.data_dir) self.push(host_bin_full_path, device_bin_full_path) ld_preload = '' if address_sanitizer: self.push(sh_commands.find_asan_rt_library(abi), self.data_dir) ld_preload = 'LD_PRELOAD=%s/%s' % \ (self.data_dir, sh_commands.asan_rt_library_names(abi)) opencl_profiling = 1 if opencl_profiling else 0 out_of_range_check = 1 if out_of_range_check else 0 print('Run %s' % device_bin_full_path) stdout_buf = [] process_output = sh_commands.make_output_processor(stdout_buf) internal_storage_dir = self.create_internal_storage_dir() if simpleperf and self.system == SystemType.android: self.push(sh_commands.find_simpleperf_library(abi), self.data_dir) simpleperf_cmd = '%s/simpleperf' % self.data_dir exec_cmd = [ ld_preload, 'MACE_OUT_OF_RANGE_CHECK=%s' % out_of_range_check, 'MACE_OPENCL_PROFILING=%d' % opencl_profiling, 'MACE_INTERNAL_STORAGE_PATH=%s' % internal_storage_dir, 'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level, simpleperf_cmd, 'stat', '--group', 'raw-l1-dcache,raw-l1-dcache-refill', '--group', 'raw-l2-dcache,raw-l2-dcache-refill', '--group', 'raw-l1-dtlb,raw-l1-dtlb-refill', '--group', 'raw-l2-dtlb,raw-l2-dtlb-refill', device_bin_full_path, args, ] else: exec_cmd = [ ld_preload, 'MACE_OUT_OF_RANGE_CHECK=%d' % out_of_range_check, 'MACE_OPENCL_PROFILNG=%d' % opencl_profiling, 'MACE_INTERNAL_STORAGE_PATH=%s' % internal_storage_dir, 'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level, device_bin_full_path, args ] exec_cmd = ' '.join(exec_cmd) self.exec_command(exec_cmd, _tty_in=True, _out=process_output, _err_to_out=True) return ''.join(stdout_buf)