예제 #1
0
파일: device.py 프로젝트: lbqin/mace
    def run(self,
            abi,
            host_bin_path,
            bin_name,
            args='',
            opencl_profiling=True,
            vlog_level=0,
            out_of_range_check=True,
            address_sanitizer=False,
            simpleperf=False):
        host_bin_full_path = '%s/%s' % (host_bin_path, bin_name)
        device_bin_full_path = '%s/%s' % (self.data_dir, bin_name)
        print(
            '================================================================')
        print('Trying to lock device %s' % self.address)
        with self.lock():
            print('Run on device: %s, %s, %s' %
                  (self.address, self.target_socs, self.device_name))
            self.rm(self.data_dir)
            self.exec_command('mkdir -p %s' % self.data_dir)
            self.push(host_bin_full_path, device_bin_full_path)
            ld_preload = ''
            if address_sanitizer:
                self.push(sh_commands.find_asan_rt_library(abi), self.data_dir)
                ld_preload = 'LD_PRELOAD=%s/%s' % \
                             (self.data_dir,
                              sh_commands.asan_rt_library_names(abi))
            opencl_profiling = 1 if opencl_profiling else 0
            out_of_range_check = 1 if out_of_range_check else 0
            print('Run %s' % device_bin_full_path)
            stdout_buf = []
            process_output = sh_commands.make_output_processor(stdout_buf)

            if simpleperf and self.system == SystemType.android:
                self.push(sh_commands.find_simpleperf_library(abi),
                          self.data_dir)
                simpleperf_cmd = '%s/simpleperf' % self.data_dir
                exec_cmd = [
                    ld_preload,
                    'MACE_OUT_OF_RANGE_CHECK=%s' % out_of_range_check,
                    'MACE_OPENCL_PROFILING=%d' % opencl_profiling,
                    'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level,
                    simpleperf_cmd,
                    'stat',
                    '--group',
                    'raw-l1-dcache,raw-l1-dcache-refill',
                    '--group',
                    'raw-l2-dcache,raw-l2-dcache-refill',
                    '--group',
                    'raw-l1-dtlb,raw-l1-dtlb-refill',
                    '--group',
                    'raw-l2-dtlb,raw-l2-dtlb-refill',
                    device_bin_full_path,
                    args,
                ]
            else:
                exec_cmd = [
                    ld_preload,
                    'MACE_OUT_OF_RANGE_CHECK=%d' % out_of_range_check,
                    'MACE_OPENCL_PROFILNG=%d' % opencl_profiling,
                    'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level,
                    device_bin_full_path, args
                ]
            exec_cmd = ' '.join(exec_cmd)
            self.exec_command(exec_cmd,
                              _tty_in=True,
                              _out=process_output,
                              _err_to_out=True)
            return ''.join(stdout_buf)
예제 #2
0
파일: device.py 프로젝트: lbqin/mace
    def tuning_run(
        self,
        abi,
        target_dir,
        target_name,
        vlog_level,
        embed_model_data,
        model_output_dir,
        input_nodes,
        output_nodes,
        input_shapes,
        output_shapes,
        mace_model_dir,
        model_tag,
        device_type,
        running_round,
        restart_round,
        limit_opencl_kernel_time,
        tuning,
        out_of_range_check,
        model_graph_format,
        opencl_binary_file,
        opencl_parameter_file,
        libmace_dynamic_library_path,
        omp_num_threads=-1,
        cpu_affinity_policy=1,
        gpu_perf_hint=3,
        gpu_priority_hint=3,
        input_file_name='model_input',
        output_file_name='model_out',
        runtime_failure_ratio=0.0,
        address_sanitizer=False,
        link_dynamic=False,
        quantize_stat=False,
    ):
        six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
                   "out_of_range_check=%s, omp_num_threads=%s, "
                   "cpu_affinity_policy=%s, gpu_perf_hint=%s, "
                   "gpu_priority_hint=%s" %
                   (model_tag, running_round, restart_round, str(tuning),
                    str(out_of_range_check), omp_num_threads,
                    cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint))
        mace_model_path = ""
        if model_graph_format == ModelFormat.file:
            mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
        if self.system == SystemType.host:
            libmace_dynamic_lib_path = \
                os.path.dirname(libmace_dynamic_library_path)
            p = subprocess.Popen([
                "env",
                "LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path,
                "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
                "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
                "MACE_LOG_TENSOR_RANGE=%d" % (1 if quantize_stat else 0),
                "%s/%s" % (target_dir, target_name),
                "--model_name=%s" % model_tag,
                "--input_node=%s" % ",".join(input_nodes),
                "--output_node=%s" % ",".join(output_nodes),
                "--input_shape=%s" % ":".join(input_shapes),
                "--output_shape=%s" % ":".join(output_shapes),
                "--input_file=%s/%s" % (model_output_dir, input_file_name),
                "--output_file=%s/%s" % (model_output_dir, output_file_name),
                "--model_data_file=%s/%s.data" % (mace_model_dir, model_tag),
                "--device=%s" % device_type,
                "--round=%s" % running_round,
                "--restart_round=%s" % restart_round,
                "--omp_num_threads=%s" % omp_num_threads,
                "--cpu_affinity_policy=%s" % cpu_affinity_policy,
                "--gpu_perf_hint=%s" % gpu_perf_hint,
                "--gpu_priority_hint=%s" % gpu_priority_hint,
                "--model_file=%s" % mace_model_path,
            ],
                                 stderr=subprocess.PIPE,
                                 stdout=subprocess.PIPE)
            out, err = p.communicate()
            self.stdout = err + out
            six.print_(self.stdout)
            six.print_("Running finished!\n")
        elif self.system in [SystemType.android, SystemType.arm_linux]:
            self.rm(self.data_dir)
            self.exec_command('mkdir -p {}'.format(self.data_dir))
            internal_storage_dir = self.create_internal_storage_dir()

            for input_name in input_nodes:
                formatted_name = common.formatted_file_name(
                    input_file_name, input_name)
                self.push("%s/%s" % (model_output_dir, formatted_name),
                          self.data_dir)
            if self.system == SystemType.android and address_sanitizer:
                self.push(sh_commands.find_asan_rt_library(abi), self.data_dir)

            if not embed_model_data:
                model_data_path = "%s/%s.data" % (mace_model_dir, model_tag)
                mace_check(
                    os.path.exists(model_data_path), "Device",
                    'model data file not found,'
                    ' please convert model first')
                self.push(model_data_path, self.data_dir)

            if device_type == common.DeviceType.GPU:
                if os.path.exists(opencl_binary_file):
                    self.push(opencl_binary_file, self.data_dir)
                if os.path.exists(opencl_parameter_file):
                    self.push(opencl_parameter_file, self.data_dir)

            self.push("third_party/nnlib/libhexagon_controller.so",
                      self.data_dir)

            mace_model_phone_path = ""
            if model_graph_format == ModelFormat.file:
                mace_model_phone_path = "%s/%s.pb" % (self.data_dir, model_tag)
                self.push(mace_model_path, mace_model_phone_path)
            if link_dynamic:
                self.push(libmace_dynamic_library_path, self.data_dir)
                if self.system == SystemType.android:
                    sh_commands.push_depended_so_libs(
                        libmace_dynamic_library_path, abi, self.data_dir,
                        self.address)
            self.push("%s/%s" % (target_dir, target_name), self.data_dir)

            stdout_buff = []
            process_output = sh_commands.make_output_processor(stdout_buff)
            cmd = [
                "LD_LIBRARY_PATH=%s" % self.data_dir,
                "MACE_TUNING=%s" % int(tuning),
                "MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check),
                "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
                "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % self.data_dir,
                "MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
                "MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time,
                "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
                "MACE_LOG_TENSOR_RANGE=%d" % (1 if quantize_stat else 0),
            ]
            if self.system == SystemType.android and address_sanitizer:
                cmd.extend([
                    "LD_PRELOAD=%s/%s" %
                    (self.data_dir, sh_commands.asan_rt_library_names(abi))
                ])
            cmd.extend([
                "%s/%s" % (self.data_dir, target_name),
                "--model_name=%s" % model_tag,
                "--input_node=%s" % ",".join(input_nodes),
                "--output_node=%s" % ",".join(output_nodes),
                "--input_shape=%s" % ":".join(input_shapes),
                "--output_shape=%s" % ":".join(output_shapes),
                "--input_file=%s/%s" % (self.data_dir, input_file_name),
                "--output_file=%s/%s" % (self.data_dir, output_file_name),
                "--model_data_file=%s/%s.data" % (self.data_dir, model_tag),
                "--device=%s" % device_type,
                "--round=%s" % running_round,
                "--restart_round=%s" % restart_round,
                "--omp_num_threads=%s" % omp_num_threads,
                "--cpu_affinity_policy=%s" % cpu_affinity_policy,
                "--gpu_perf_hint=%s" % gpu_perf_hint,
                "--gpu_priority_hint=%s" % gpu_priority_hint,
                "--model_file=%s" % mace_model_phone_path,
                "--opencl_binary_file=%s/%s" %
                (self.data_dir, os.path.basename(opencl_binary_file)),
                "--opencl_parameter_file=%s/%s" %
                (self.data_dir, os.path.basename(opencl_parameter_file)),
            ])
            cmd = ' '.join(cmd)
            cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag,
                                          str(time.time()))
            cmd_file = "%s/%s" % (self.data_dir, cmd_file_name)
            tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name)
            with open(tmp_cmd_file, 'w') as file:
                file.write(cmd)
            self.push(tmp_cmd_file, cmd_file)
            os.remove(tmp_cmd_file)
            self.exec_command('sh {}'.format(cmd_file),
                              _tty_in=True,
                              _out=process_output,
                              _err_to_out=True)
            self.stdout = "".join(stdout_buff)
            if not sh_commands.stdout_success(self.stdout):
                common.MaceLogger.error("Mace Run", "Mace run failed.")

            six.print_("Running finished!\n")
        else:
            six.print_('Unsupported system %s' % self.system, file=sys.stderr)
            raise Exception('Wrong device')

        return self.stdout