def _nsight(self, cmd, profile_from_start): if profile_from_start: profile_cmd = "nsys nvprof -o tmp.qdrep {}".format(cmd) else: profile_cmd = "nsys nvprof --profile-from-start=off -o tmp.qdrep {}".format( cmd) return system.run_command(profile_cmd)
def launch(benchmark_script, benchmark_script_args, with_nvprof=False): """ If with_nvprof is True, it will launch the following command firstly to get the gpu_time: nvprof python benchmark_script benchmark_script_args Then the normal testing command will be launched: python benchmark_script benchmark_script_args """ cmd = "{} {} {}".format(sys.executable, benchmark_script, " ".join(benchmark_script_args)) if with_nvprof: stdout, exit_code = _nvprof(cmd) if exit_code == 0: parse_status, gpu_time = _parse_nvprof_logs(stdout.split("\n")) else: parse_status = False if parse_status: return gpu_time else: print("Runing Error:\n {}".format(stdout)) else: stdout, exit_code = system.run_command(cmd) print(stdout) if exit_code != 0: sys.exit(exit_code) return 0.0
def is_ampere_gpu(): stdout, exit_code = system.run_command("nvidia-smi -L") if exit_code == 0: gpu_list = stdout.split("\n") if len(gpu_list) >= 1: #print(gpu_list[0]) # GPU 0: NVIDIA A100-SXM4-40GB (UUID: xxxx) return gpu_list[0].find("A100") > 0 return False
def launch(benchmark_script, benchmark_script_args, task="speed", repeat=1, sync_interval=80, with_nvprof=False, profile_from_start=True): """ If with_nvprof is True, it will launch the following command firstly to get the gpu_time: nvprof python benchmark_script benchmark_script_args Then the normal testing command will be launched: python benchmark_script benchmark_script_args """ if with_nvprof: if task == "speed" and not profile_from_start: _set_args(benchmark_script_args, "--profiler", "nvprof") elif task == "scheduling": _set_args(benchmark_script_args, "--profiler", "nvprof_nvtx") cmd = "{} {} {}".format(sys.executable, benchmark_script, " ".join(benchmark_script_args)) if with_nvprof: if task == "speed": if is_ampere_gpu(): runner = NsightRunner() else: runner = NvprofRunner() gpu_time = runner.run(cmd, profile_from_start) _set_args(benchmark_script_args, "--profiler", "none") return gpu_time elif task == "scheduling": runner = NsightRunnerForDynamicScheduling() scheduling_time_dict = runner.run(cmd, 5, repeat + 1, sync_interval) _set_args(benchmark_script_args, "--profiler", "none") return scheduling_time_dict else: stdout, exit_code = system.run_command(cmd) print(stdout) if exit_code != 0: sys.exit(exit_code) return 0.0
def _nvprof(self, cmd, profile_from_start): if profile_from_start: profile_cmd = "nvprof {}".format(cmd) else: profile_cmd = "nvprof --profile-from-start off {}".format(cmd) return system.run_command(profile_cmd)
def _nsight_nvtx(self, cmd): return system.run_command( "nsys profile -t cuda,nvtx --stats true -o tmp.qdrep --force-overwrite true {}" .format(cmd))
def _nvprof(cmd): return system.run_command("nvprof {}".format(cmd))
def _nvprof(self, cmd): return system.run_command( "nvprof --profile-from-start off {}".format(cmd))
def _nsight(self, cmd): return system.run_command( "nsys nvprof --profile-from-start=off -o tmp.qdrep {}".format(cmd))