예제 #1
0
def check_nvidia_smi(exit_if_fail=False, debug=False):
    """
    Make sure nvidia-smi runs fast enough to perform GPU utilization sampling.
    :return:
    """
    start_t = time.time()
    # $ nvidia-smi
    smi_output = run_nvidia_smi(debug=debug)
    end_t = time.time()
    nvidia_smi_sec = end_t - start_t
    if nvidia_smi_sec > MAX_NVIDIA_SMI_TIME_SEC:
        # $ sudo service nvidia-persistenced start
        errmsg = textwrap.dedent("""
        RL-Scope WARNING: nvidia-smi takes a long time to run on your system.
        In particular, it took {sec} sec to run nvidia-smi (we would prefer < {limit_sec}).
        This will interfere with sampling GPU utilization.
        You can fix this by running the following command:
        
        # Start systemd nvidia-persistenced service (if it's not already running).
        $ sudo nvidia-persistenced --persistence-mode
        
        For more details see:
        https://devtalk.nvidia.com/default/topic/1011192/nvidia-smi-is-slow-on-ubuntu-16-04-/
        """).format(
            sec=nvidia_smi_sec,
            limit_sec=MAX_NVIDIA_SMI_TIME_SEC,
        )
        if exit_if_fail:
            logger.error(errmsg)
            sys.exit(1)
        else:
            logger.warning(errmsg)
예제 #2
0
    def run_py(self):
        # TODO: run pytest with appropriate cmdline options.
        # Q: record output?
        args = self.args
        with with_chdir(py_config.INSTALL_ROOT):

            # 'python'
            cmd = [sys.executable]
            if args.Werror:
                cmd.append('-Werror')
            # '-Wignore:::_pytest.assertion.rewrite' Suppresses deprecation warnings
            # in pytest (up to at least version 6.1.1)
            #
            # https://github.com/pytest-dev/pytest/issues/1403#issuecomment-443533232
            cmd.extend(
                ['-Wignore:::_pytest.assertion.rewrite', '-m', 'pytest'])
            if args.debug:
                cmd.append(['--pdb', '-s'])

            print_cmd(cmd)
            proc = subprocess.run(cmd)
            if proc.returncode != 0:
                logger.error("RL-Scope python unit tests failed")
                sys.exit(proc.returncode)
        logger.info("RL-Scope python unit tests PASSED")
예제 #3
0
 def _gpu_worker(self, gpu, *args, **kwargs):
     try:
         self.gpu_worker(gpu, *args, **kwargs)
     except KeyboardInterrupt:
         logger.debug(f"GPU[{gpu}] worker saw Ctrl-C; exiting early")
         return
     except Exception as e:
         logger.error(textwrap.dedent("""\
         BUG: GPU[{gpu}] worker failed with unhandled exception:
         {error}
         """).format(
             gpu=gpu,
             error=textwrap.indent(traceback.format_exc(), prefix='  '),
         ).rstrip())
         sys.exit(1)
예제 #4
0
 def run_cpp(self):
     args = self.args
     if shutil.which(py_config.CPP_UNIT_TEST_CMD) is None:
         logger.error(
             "Didn't find C++ test binary ({bin}) on PATH; have you run build_rlscope yet?"
             .format(bin=py_config.CPP_UNIT_TEST_CMD, ))
         sys.exit(1)
     cmd = [py_config.CPP_UNIT_TEST_CMD]
     if args.debug:
         cmd = ['gdb', '--args'] + cmd
     print_cmd(cmd)
     proc = subprocess.run(cmd)
     if proc.returncode != 0:
         logger.error("RL-Scope C++ unit tests failed")
         sys.exit(proc.returncode)
     logger.info("RL-Scope C++ unit tests PASSED")
예제 #5
0
def execve_rlscope_binary(binary):
    exe_path = _j(CPP_BIN, binary)
    if not os.path.exists(exe_path):
        logger.error("Couldn't find {bin} binary @ {path}".format(
            bin=binary,
            path=exe_path,
        ))
        sys.exit(1)
    cmd = [exe_path] + sys.argv[1:]
    if DEBUG:
        print_cmd(cmd)
    env = dict(os.environ)

    sys.stdout.flush()
    sys.stderr.flush()
    os.execve(exe_path, cmd, env)
    # Shouldn't return from os.execve
    assert False
예제 #6
0
def main():
    parser = argparse.ArgumentParser(
        description=textwrap.dedent(__doc__.lstrip().rstrip()),
        formatter_class=argparse.RawTextHelpFormatter)
    # TODO: add --pdb to break on failed python tests, and gdb on failed C++ tests.
    parser.add_argument("--debug",
                        action='store_true',
                        help=textwrap.dedent("""\
                        Debug unit tests.
                        """))
    parser.add_argument("--Werror",
                        action='store_true',
                        help=textwrap.dedent("""\
                        Treat warnings as errors (pytest)
                        """))
    parser.add_argument("--tests",
                        choices=['py', 'cpp', 'all'],
                        default='all',
                        help=textwrap.dedent("""\
                        Which unit tests to run:
                        py:
                          Just python unit tests.
                        cpp:
                          Just C++ unit tests (rls-test).
                        all:
                          Both python and C++ unit tests.
                        """))

    try:
        import pytest
    except ModuleNotFoundError as e:
        logger.error(
            textwrap.dedent("""
        To run rls-unit-tests, you must install pytest:
          $ pip install "pytest >= 4.4.1"
        """).rstrip())
        sys.exit(1)
        # raise

    args = parser.parse_args()
    unit_tests = RLSUnitTests(args)
    unit_tests.run()
예제 #7
0
def main():

    try:
        check_host.check_config()
    except RLScopeConfigurationError as e:
        logger.error(e)
        sys.exit(1)

    parser = argparse.ArgumentParser(
        description=textwrap.dedent(__doc__.lstrip().rstrip()),
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("--run",
                        action='store_true',
                        help=textwrap.dedent("""\
                        Run the command as-is.
                        """))
    parser.add_argument("--append",
                        action='store_true',
                        help=textwrap.dedent("""\
                        Append the command to --sh
                        """))
    parser.add_argument("--sh",
                        help=textwrap.dedent("""\
                        Shell file to append commands to (see --append).
                        """))
    parser.add_argument('--run-sh',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Run all the commands in --sh on the available --gpus
                        """))
    parser.add_argument('--rlscope-directory',
                        help=textwrap.dedent("""\
                        The output directory of the command being run.
                        This is where logfile.out will be output.
                        """))
    parser.add_argument("--verbosity",
                        choices=['progress', 'commands', 'output'],
                        default='progress',
                        help=textwrap.dedent("""\
                            Output information about running commands.
                            --verbosity progress (Default)
                                Only show high-level progress bar information.
                              
                            --verbosity commands
                                Show the command-line of commands that are being run.
                                
                            --verbosity output
                                Show the output of each analysis (not configuration) command on sys.stdout.
                                NOTE: This may cause interleaving of lines.
                            """))
    parser.add_argument('--line-numbers', action='store_true', help=textwrap.dedent("""\
    Show line numbers and timestamps in RL-Scope logging messages.
    """))
    parser.add_argument('--debug',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Debug
                        """))
    parser.add_argument('--skip-final-error-message',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Skip error message printed at the end if at least one command fails.
                        """))
    parser.add_argument("--retry",
                        type=int,
                        help=textwrap.dedent("""\
                            If a command fails, retry it up to --retry times.
                            Default: don't retry.
                            """))
    parser.add_argument("--tee",
                        action='store_true',
                        help=textwrap.dedent("""\
                        (debug)
                        tee output of parallel processes to stdout (prefix output with worker name)
                        """))
    parser.add_argument("--pdb",
                        action='store_true',
                        help=textwrap.dedent("""\
                        Debug
                        """))
    parser.add_argument('--dry-run',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Dry run
                        """))
    parser.add_argument('--skip-errors',
                        action='store_true',
                        help=textwrap.dedent("""\
                        If a command fails, ignore the failure and continue running other commands.
                        """))
    parser.add_argument("--gpus",
                        default='all',
                        help=textwrap.dedent("""\
                        # Run on the first GPU only
                        --gpus 0
                        # Run on the first 2 GPUs
                        --gpus 0,1
                        # Run on all available GPUs
                        --gpus all
                        # Don't allow running with any GPUs (CUDA_VISIBLE_DEVICES="")
                        --gpus none
                        """))
    all_args, _ = parser.parse_known_args(sys.argv)
    ignore_opts = set()
    if all_args.sh is not None:
        ignore_opts.add(all_args.sh)
    run_expr_argv, cmd = gather_argv(
        sys.argv[1:],
        ignore_opts=ignore_opts)
    args = parser.parse_args(run_expr_argv)

    if args.debug:
        logger.debug({
            'run_expr_argv': run_expr_argv,
            'cmd': cmd,
        })

    rlscope_logging.setup_logger(
        debug=args.debug,
        line_numbers=args.debug or args.line_numbers or py_config.is_development_mode(),
    )

    if args.sh is None and ( args.run_sh or args.append ):
        error("--sh is required when either --run-sh or --append are given", parser=parser)

    if args.run_sh and ( args.append or args.run ):
        error("When --run-sh is given, you cannot provide either --append or --run", parser=parser)

    available_gpus = get_available_gpus()
    if args.gpus == 'all':
        gpus = sorted([gpu['device_number'] for gpu in available_gpus])
    elif args.gpus.lower() == 'none':
        args.gpus = [None]
    else:
        try:
            gpus = sorted([int(gpu) for gpu in re.split(r',', args.gpus)])
        except ValueError:
            error("Failed to parser --gpus={gpus}".format(gpus=args.gpus), parser=parser)

    assert len(gpus) >= 1

    if args.run or args.append:
        if len(cmd) == 0:
            error("Expected cmd to run in arguments, but none was provided",
                  parser=parser)

        if shutil.which(cmd[0]) is None:
            error("Couldn't find {exec} on PATH".format(
                exec=cmd[0]), parser=parser)

    if all_args.rlscope_directory is None:
        # No --rlscope-directory argument; just use current directory?
        args.rlscope_directory = os.getcwd()
    else:
        args.rlscope_directory = all_args.rlscope_directory
    # # error("\n  {cmd}".format(cmd=' '.join(cmd)))
    # error(textwrap.dedent("""\
    # --rlscope-directory must be provided so we know where to output logfile.out for cmd:
    #   > CMD:
    #     $ {cmd}
    #   """).format(
    #   cmd=' '.join(cmd),
    # ).rstrip())
    # # "Copy" --rlscope-directory argument from cmd.
    # args.rlscope_directory = all_args.rlscope_directory

    args_dict = dict(vars(args))
    args_dict.pop('gpus')
    args_dict.pop('pdb')
    obj = RunExpr(
        cmd=cmd,
        gpus=gpus,
        **args_dict,
    )

    def _run():
        obj.run_program()
    run_with_pdb(args, _run)
예제 #8
0
    def mode_run_sh(self):
        # Fill queue with commands to run.
        run_commands = self.run_commands()
        for run_cmd in run_commands:
            logger.debug(f"Put: {run_cmd}")
            self.cmd_queue.put(run_cmd)

        self.start_gpu_workers()

        bar = None
        if self.should_show_progress:
            bar = progressbar.ProgressBar(max_value=len(run_commands))
        last_completed = None

        # Wait for workers to terminate
        try:
            while True:
                if self.should_show_progress:
                    completed = len(run_commands) - self.cmd_queue.qsize()
                    if last_completed is None or completed > last_completed:
                        bar.update(completed)
                    last_completed = completed

                if self.worker_failed.is_set():
                    self.stop_workers()
                    # ; use --skip-errors to ignore failed commands.
                    if not self.skip_final_error_message:
                        logger.error("At least one command failed with non-zero exit status")
                    if self.should_show_progress:
                        bar.finish()
                    sys.exit(1)

                alive_workers = 0
                failed_workers = 0
                for gpu, worker in self.gpu_workers.items():
                    if worker.is_alive():
                        alive_workers += 1
                        continue

                    if worker.exitcode < 0:
                        logger.error("GPU[{gpu}] worker failed with exitcode={ret} (unhandled exception)".format(
                            gpu=gpu,
                            ret=worker.exitcode,
                        ))
                        self.worker_failed.set()
                        failed_workers += 1

                if failed_workers > 0:
                    self.stop_workers()
                    if self.should_show_progress:
                        bar.finish()
                    sys.exit(1)

                if alive_workers == 0:
                    if self.cmd_queue.qsize() > 0:
                        logger.warning("GPU workers have finished with {len} remaining commands unfinished".format(
                            len=self.cmd_queue.qsize()
                        ))
                        sys.exit(1)
                    logger.debug("GPU workers have finished successfully".format(
                        len=self.cmd_queue.qsize()
                    ))
                    if self.should_show_progress:
                        bar.finish()
                    sys.exit(0)

                time.sleep(2)
        except KeyboardInterrupt:
            logger.info("Saw Ctrl-C; waiting for workers to terminate")
            self.stop_workers()
            logger.warning("{len} remaining commands went unprocessed".format(len=self.cmd_queue.qsize()))
            if self.should_show_progress:
                bar.finish()
            sys.exit(1)
예제 #9
0
def main():

    try:
        check_host.check_config()
    except RLScopeConfigurationError as e:
        logger.error(e)
        sys.exit(1)

    parser = argparse.ArgumentParser(
        description=textwrap.dedent(__doc__.lstrip().rstrip()),
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('--rlscope-directory',
                        required=True,
                        help=textwrap.dedent("""\
    Look for *.venn_js.json rooted at this directory.
    The output file will be <directory>/rlscope_plot_index_data.py.
    All the venn_js_path's in the index will be relative to --directory.
    """))
    parser.add_argument('--out-dir',
                        help=textwrap.dedent("""\
    The output file will be <out-dir>/rlscope_plot_index_data.py.
    Default: --directory
    """))
    parser.add_argument('--debug',
                        action='store_true',
                        help=textwrap.dedent("""\
    Debug
    """))
    parser.add_argument('--dry-run',
                        action='store_true',
                        help=textwrap.dedent("""\
    Don't write file.
    """))
    parser.add_argument('--basename',
                        default='rlscope_plot_index_data.py',
                        help=textwrap.dedent("""\
    Name of python file to generate.
    """))
    parser.add_argument('--replace',
                        action='store_true',
                        help=textwrap.dedent("""\
    Replace if exists.
    """))
    parser.add_argument('--pdb',
                        action='store_true',
                        help=textwrap.dedent("""\
    Python debugger on unhandled exception.
    """))
    args = parser.parse_args()

    if args.out_dir is None:
        args.out_dir = args.rlscope_directory

    try:
        obj = GeneratePlotIndex(
            directory=args.rlscope_directory,
            out_dir=args.out_dir,
            basename=args.basename,
            debug=args.debug,
            replace=args.replace,
            dry_run=args.dry_run,
        )
        obj.run()
    except Exception as e:
        if not args.pdb:
            raise
        print("> RL-Scope: Detected exception:")
        print(e)
        print("> Entering pdb:")
        import pdb
        pdb.post_mortem()
        raise
예제 #10
0
def main():

    try:
        check_host.check_config()
    except RLScopeConfigurationError as e:
        logger.error(e)
        sys.exit(1)

    rlscope_prof_argv, cmd_argv = gather_argv(sys.argv[1:])

    parser = argparse.ArgumentParser(
        description=
        "RL-Scope cross-stack profiler for reinforcement learning workloads.",
        formatter_class=argparse.RawTextHelpFormatter)
    # NOTE: these arguments must precede the executable (python some/script.py), otherwise they will be sent
    # to the training script, and not handled by this script (rls-prof).
    parser.add_argument('--debug', action='store_true')
    parser.add_argument("--verbosity",
                        choices=['progress', 'commands', 'output'],
                        default='progress',
                        help=textwrap.dedent("""\
                            Output information about running commands.
                            --verbosity progress (Default)
                                Only show high-level progress bar information.
                              
                            --verbosity commands
                                Show the command-line of commands that are being run.
                                
                            --verbosity output
                                Show the output of each analysis (not configuration) command on sys.stdout.
                                NOTE: This may cause interleaving of lines.
                            """))
    parser.add_argument('--line-numbers',
                        action='store_true',
                        help=textwrap.dedent("""\
    Show line numbers and timestamps in RL-Scope logging messages.
    """))
    parser.add_argument('--rlscope-debug', action='store_true')
    parser.add_argument('--rlscope-rm-traces-from',
                        help=textwrap.dedent("""\
    Delete traces rooted at this --rlscope-directory. 
    Useful if your training script has multiple training scripts, and you need to use --rlscope-skip-rm-traces 
    when launching the other scripts.
    """))
    # parser.add_argument('--rlscope-disable', action='store_true', help=textwrap.dedent("""\
    #     RL-Scope: Skip any profiling. Used for uninstrumented runs.
    #     Useful for ensuring minimal libcupti registration when we run --cuda-api-calls during config_uninstrumented.
    #
    #     Effect: sets "export RLSCOPE_DISABLE=1" for librlscope.so.
    # """))

    add_bool_arg(parser,
                 '--cuda-api-calls',
                 help=textwrap.dedent("""\
                        Trace CUDA API runtime/driver calls.
                        
                        i.e. total number of calls, and total time (usec) spent in a given API call.
                        
                        Effect: sets "export RLSCOPE_CUDA_API_CALLS=1" for librlscope.so.
                        """))
    add_bool_arg(parser,
                 '--cuda-activities',
                 help=textwrap.dedent("""\
                        Trace CUDA activities (i.e. GPU kernel runtimes, memcpy's).
                        
                        Effect: sets "export RLSCOPE_CUDA_ACTIVITIES=yes" for librlscope.so.
                        """))
    add_bool_arg(parser,
                 '--cuda-api-events',
                 help=textwrap.dedent("""\
                        Trace all the start/end timestamps of CUDA API calls.
                        Needed during instrumented runs so we know when to subtract profiling overheads.
                        
                        Effect: sets "export RLSCOPE_CUDA_API_EVENTS=yes" for librlscope.so.
                        """))
    add_bool_arg(parser,
                 '--gpu-hw',
                 help=textwrap.dedent("""\
                        Collect GPU hardware counters.
                        
                        Effect: sets "export RLSCOPE_GPU_HW=yes" for librlscope.so.
                        """))

    # parser.add_argument('--fuzz-cuda-api', action='store_true',
    #                     help=textwrap.dedent("""\
    #                     Use libcupti to trace ALL CUDA runtime API calls (# of calls, and total time spent in them).
    #                     This is useful for determining which CUDA API's we need to "calibrate subtractions" for.
    #                     NOTE: this SHOULDN'T be used for finding profiling book-keeping "subtractions", since it
    #                     adds a LOT of overhead to add start/end callbacks to all CUDA API functions.
    #
    #                     Effect: sets "export RLSCOPE_FUZZ_CUDA_API=yes" for librlscope.so.
    #                     """))

    parser.add_argument('--pc-sampling',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Perform sample-profiling using CUDA's "PC Sampling" API.
                        
                        Currently, we're just going to record GPUSamplingState.is_gpu_active.
                        
                        Effect: sets "export RLSCOPE_PC_SAMPLING=1" for librlscope.so.
                        """))
    parser.add_argument('--trace-at-start',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Start tracing right at application startup.
                        
                        Effect: sets "export RLSCOPE_TRACE_AT_START=yes" for librlscope.so.
                        """))
    # parser.add_argument('--stream-sampling', action='store_true',
    #                     help=textwrap.dedent("""\
    #                     Poll cudaStreamQuery() to see if the GPU is being used.
    #
    #                     Effect: sets "export RLSCOPE_STREAM_SAMPLING=yes" for librlscope.so.
    #                     """))

    calibrate_help = textwrap.dedent("""\
    Perform multiple runs in order to calibrate for profiling overhead 
    specific to the workload being run.
    """).rstrip()
    parser.add_argument("--calibrate",
                        dest='calibrate',
                        action='store_true',
                        default=True,
                        help=calibrate_help)
    parser.add_argument("--no-calibrate",
                        dest='calibrate',
                        action='store_false',
                        help=calibrate_help)

    parser.add_argument("--re-calibrate",
                        action='store_true',
                        help=textwrap.dedent("""\
                            Remove existing profiling overhead calibration files, and recompute them.
                            """))
    parser.add_argument("--re-plot",
                        action='store_true',
                        help=textwrap.dedent("""\
                            Remove existing plots and remake them (NOTE: doesn't recompute analysis; see --re-calibrate).
                            """))

    parallel_runs_help = textwrap.dedent("""\
                            Parallelize running configurations across GPUs on this machine (assume no CPU interference). 
                            See --gpus.
                            """)
    parser.add_argument("--parallel-runs",
                        dest='parallel_runs',
                        action='store_true',
                        default=True,
                        help=parallel_runs_help)
    parser.add_argument("--no-parallel-runs",
                        dest='parallel_runs',
                        action='store_false',
                        help=parallel_runs_help)

    parser.add_argument("--retry",
                        type=int,
                        help=textwrap.dedent("""\
                            If a command fails, retry it up to --retry times.
                            Default: don't retry.
                            """))
    parser.add_argument("--dry-run",
                        action='store_true',
                        help=textwrap.dedent("""\
                            Dry run
                            """))
    # parser.add_argument("--gpus",
    #                     action='store_true',
    #                     help=textwrap.dedent("""\
    #                         Parallelize running configurations across GPUs on this machine (assume no CPU inteference). See --rlscope-gpus
    #                         """))
    parser.add_argument("--gpus",
                        help=textwrap.dedent("""\
                        # Run on the first GPU only
                        --gpus 0
                        # Run on the first 2 GPUs
                        --gpus 0,1
                        # Run on all available GPUs
                        --gpus all
                        # Don't allow running with any GPUs (CUDA_VISIBLE_DEVICES="")
                        --gpus none
                        """))
    parser.add_argument(
        '--config',
        choices=[
            'interception',
            'no-interception',
            'gpu-activities',
            'gpu-activities-api-time',
            'no-gpu-activities',
            'full',
            'time-breakdown',
            'gpu-hw',
            'uninstrumented',
        ],
        # Detect if user provides --config or not.
        # By default, run with full RL-Scope instrumentation.
        # default=DEFAULT_CONFIG,
        help=textwrap.dedent("""\
                        For measuring LD_PRELOAD CUDA API interception overhead:
                            interception:
                                Enable LD_PRELOAD CUDA API interception.
                                $ rls-prof --debug --cuda-api-calls --cuda-api-events --rlscope-disable
                            no-interception:
                                Disable LD_PRELOAD CUDA API interception.
                                $ rls-prof --debug --rlscope-disable
                                
                        For measuring CUPTI GPU activity gathering overhead on a per CUDA API call basis.
                            gpu-activities:
                                Enable CUPTI GPU activity recording.
                                $ rls-prof --debug --cuda-api-calls --cuda-activities --rlscope-disable
                            no-gpu-activities:
                                Disable CUPTI GPU activity recording.
                                $ rls-prof --debug --cuda-api-calls --rlscope-disable
                                
                        Expect (for the above configurations):
                        You should run train.py with these arguments set
                        
                            # Since we are comparing total training time, 
                            # run each configuration with the same number of training loop steps.
                            --rlscope-max-passes $N
                            
                            # Disable any pyprof or old tfprof tracing code.
                            --rlscope-disable
                                
                        For collecting full RL-Scope traces for using with rls-run / rlscope-drill:
                            full:
                                Enable all of tfprof and pyprof collection.
                                $ rls-prof --cuda-api-calls --cuda-api-events --cuda-activities --rlscope-disable
                                NOTE: we still use --rlscope-disable to prevent "old" tfprof collection.
                                
                        gpu-hw:
                          ONLY collect GPU hardware counters
                        """))
    args = parser.parse_args(rlscope_prof_argv)

    is_debug = args.debug or args.rlscope_debug or is_env_true('RLSCOPE_DEBUG')
    rlscope_logging.setup_logger(
        debug=is_debug,
        line_numbers=is_debug or args.line_numbers
        or py_config.is_development_mode(),
    )

    if args.rlscope_rm_traces_from is not None:
        logger.info(
            "rls-prof: Delete trace-files rooted at --rlscope-directory = {dir}"
            .format(dir=args.rlscope_rm_traces_from))
        return

    rlscope_api.find_librlscope()
    so_path = rlscope_api.RLSCOPE_CLIB
    assert so_path is not None
    env = dict(os.environ)
    add_env = dict()
    add_env['LD_PRELOAD'] = "{ld}:{so_path}".format(ld=env.get(
        'LD_PRELOAD', ''),
                                                    so_path=so_path)
    # Q: I just want LD_LIBRARY_PATH to get printed...
    if 'LD_LIBRARY_PATH' in env:
        add_env['LD_LIBRARY_PATH'] = env['LD_LIBRARY_PATH']
    # if 'LD_LIBRARY_PATH' in env:
    #     add_env['LD_LIBRARY_PATH'] = env['LD_LIBRARY_PATH']

    def _set_if_none(attr, value):
        if getattr(args, attr) is None:
            setattr(args, attr, value)

    def maybe_remove(xs, x):
        if x in xs:
            xs.remove(x)

    if args.calibrate:
        if args.config is not None:
            logger.error(
                "Only --calibrate or --config should be provided for rls-prof."
            )
            parser.exit(1)
        # Run calibrate.py
        cmd = ['rls-calibrate', 'run']

        if args.gpu_hw:
            cmd.extend(['--gpu-hw'])
            maybe_remove(rlscope_prof_argv, '--gpu-hw')

        cmd.extend(['--verbosity', args.verbosity])

        if args.parallel_runs:
            cmd.extend(['--parallel-runs'])
            maybe_remove(rlscope_prof_argv, '--parallel-runs')
        else:
            cmd.extend(['--no-parallel-runs'])
            maybe_remove(rlscope_prof_argv, '--no-parallel-runs')

        if args.retry is not None:
            cmd.extend(['--retry', str(args.retry)])

        # Q: Can't we just pass this through?
        # if args.re_calibrate:
        #     cmd.extend(['--re-calibrate'])
        #     rlscope_prof_argv.remove('--re-calibrate')

        # if args.gpus is not None:
        #     cmd.extend(['--gpus', args.gpus])
        maybe_remove(rlscope_prof_argv, '--calibrate')
        cmd.extend(rlscope_prof_argv)
        cmd.extend(cmd_argv)
        # cmd.remove('--calibrate')
        print_cmd(cmd)
        try:
            proc = subprocess.run(cmd, check=False)
            sys.exit(proc.returncode)
        except KeyboardInterrupt:
            logger.info(
                "Saw Ctrl-C during calibration; aborting remaining runs.")
            sys.exit(1)

    if args.config is None:
        args.config = DEFAULT_CONFIG

    add_env['RLSCOPE_CONFIG'] = args.config
    if args.config == 'interception':
        "rls-prof --debug --cuda-api-calls --cuda-api-events"
        _set_if_none('cuda_api_calls', True)
        _set_if_none('cuda_api_events', True)
    elif args.config in ['no-interception', 'uninstrumented']:
        "rls-prof --debug"
        pass
    elif args.config == 'gpu-hw':
        "$ rls-prof --debug --gpu-hw"
        _set_if_none('cuda_api_calls', False)
        _set_if_none('cuda_api_events', False)
        _set_if_none('cuda_activities', False)
        _set_if_none('gpu_hw', True)
    elif args.config == 'no-gpu-activities':
        "$ rls-prof --debug --cuda-api-calls"
        _set_if_none('cuda_api_calls', True)
        _set_if_none('gpu_hw', False)
    elif args.config == 'gpu-activities':
        "$ rls-prof --debug --cuda-api-calls --cuda-activities"
        _set_if_none('cuda_api_calls', True)
        _set_if_none('cuda_activities', True)
        _set_if_none('gpu_hw', False)
    elif args.config == 'gpu-activities-api-time':
        "$ rls-prof --debug --cuda-api-calls --cuda-api-events --cuda-activities"
        _set_if_none('cuda_api_calls', True)
        _set_if_none('cuda_api_events', True)
        _set_if_none('cuda_activities', True)
        _set_if_none('gpu_hw', False)
    elif args.config is None or args.config in {'full', 'time-breakdown'}:
        "$ rls-prof --cuda-api-calls --cuda-api-events --cuda-activities"
        _set_if_none('cuda_api_calls', True)
        _set_if_none('cuda_api_events', True)
        _set_if_none('cuda_activities', True)
        _set_if_none('gpu_hw', False)
    else:
        raise NotImplementedError()

    # if args.fuzz_cuda_api and args.cuda_api_calls:
    #     parser.error("Can only run rls-prof with --fuzz-cuda-api or --cuda-api-calls, not both")

    if args.debug or args.rlscope_debug or is_env_true('RLSCOPE_DEBUG'):
        logger.info(
            "Detected debug mode; enabling C++ logging statements (export RLSCOPE_CPP_MIN_VLOG_LEVEL=1)"
        )
        add_env['RLSCOPE_CPP_MIN_VLOG_LEVEL'] = 1

    # if args.rlscope_disable:
    #     add_env['RLSCOPE_DISABLE'] = 'yes'

    def set_yes_no(attr, env_var):
        if getattr(args, attr):
            add_env[env_var] = 'yes'
        else:
            add_env[env_var] = 'no'

    set_yes_no('cuda_api_calls', 'RLSCOPE_CUDA_API_CALLS')

    set_yes_no('cuda_activities', 'RLSCOPE_CUDA_ACTIVITIES')

    set_yes_no('gpu_hw', 'RLSCOPE_GPU_HW')

    set_yes_no('pc_sampling', 'RLSCOPE_PC_SAMPLING')

    # set_yes_no('fuzz_cuda_api', 'RLSCOPE_FUZZ_CUDA_API')

    set_yes_no('cuda_api_events', 'RLSCOPE_CUDA_API_EVENTS')

    set_yes_no('gpu_hw', 'RLSCOPE_GPU_HW')

    set_yes_no('trace_at_start', 'RLSCOPE_TRACE_AT_START')

    # set_yes_no('stream_sampling', 'RLSCOPE_STREAM_SAMPLING')

    if len(cmd_argv) == 0:
        parser.print_usage()
        logger.error(
            "You must provide a command to execute after \"rls-prof\"")
        sys.exit(1)

    exe_path = shutil.which(cmd_argv[0])
    if exe_path is None:
        print(
            "RL-Scope ERROR: couldn't locate {exe} on $PATH; try giving a full path to {exe} perhaps?"
            .format(exe=cmd_argv[0], ))
        sys.exit(1)
    # cmd = argv
    cmd = [exe_path] + cmd_argv[1:]
    print_cmd(cmd, env=add_env)

    env.update(add_env)
    for k in list(env.keys()):
        env[k] = str(env[k])

    sys.stdout.flush()
    sys.stderr.flush()
    os.execve(exe_path, cmd, env)
    # os.execve shouldn't return.
    assert False
예제 #11
0
def find_librlscope():
    global RLSCOPE_CLIB
    if RLSCOPE_CLIB is not None:
        return

    # Older version of python (<=3.6) need 'LIBRARY_PATH' to be defined for find_library to work.
    # assert 'LIBRARY_PATH' not in ENV or ENV['LIBRARY_PATH'] == ENV['LD_LIBRARY_PATH']

    # First, try to find librlscope.so using our current LD_LIBRARY_PATH.
    #
    # NOTE: This will succeed in development mode (i.e., "python setup.py develop")
    # since we set LD_LIBRARY_PATH in source_me.sh.
    # In "pip install rlscope" distribution mode, this will fail, since
    # librlscope.lib is packaged inside:
    #   rlscope/cpp/lib/librlscope.so
    ENV['LIBRARY_PATH'] = ENV.get('LD_LIBRARY_PATH', '')
    RLSCOPE_CLIB = ctypes.util.find_library(RLSCOPE_LIBNAME)

    if RLSCOPE_CLIB is None:
        orig_LD_LIBRARY_PATH = ENV.get('LD_LIBRARY_PATH', '')
        # Locations to search for librlscope.so
        # Currently, we just search for rlscope/cpp/lib/librlscope.so
        rlscope_lib_dirs = [py_config.CPP_LIB]
        for path in rlscope_lib_dirs:
            if not os.path.isdir(path):
                continue
            ENV['LD_LIBRARY_PATH'] = "{LD_LIBRARY_PATH}:{path}".format(
                path=path,
                LD_LIBRARY_PATH=ENV.get('LD_LIBRARY_PATH', ''),
            )
            ENV['LIBRARY_PATH'] = ENV['LD_LIBRARY_PATH']
            RLSCOPE_CLIB = ctypes.util.find_library(RLSCOPE_LIBNAME)
            if RLSCOPE_CLIB is not None:
                break
            ENV['LD_LIBRARY_PATH'] = orig_LD_LIBRARY_PATH
            ENV['LIBRARY_PATH'] = ENV['LD_LIBRARY_PATH']

    if RLSCOPE_CLIB is None:
        if py_config.is_development_mode():
            """
            RL-Scope has been installed using "python setup.py develop", and is being 
            run from a github repo checkout.
            
            Provide instructions on how to build librlscope.so from scratch, and add it to 
            the user's LD_LIBRARY_PATH so we can find it.
            """
            logger.error(
                textwrap.dedent("""\
            RL-Scope ERROR: couldn't find RL-Scope library (lib{name}.so); to build it, do:
              $ cd {root}
              $ bash ./setup.sh
              # To modify your LD_LIBRARY_PATH to include lib{name}.so, run:
              $ source source_me.sh
            """.format(
                    name=RLSCOPE_LIBNAME,
                    root=py_config.ROOT,
                )).rstrip())
        else:
            """
            RL-Scope has been installed using "pip install rlscope".
            
            librlscope.so SHOULD be bundled with the install python package; 
            if it isn't then this is a BUG.
            """
            logger.error(
                textwrap.dedent("""\
            RL-Scope ERROR: couldn't find RL-Scope library (lib{name}.so) inside {lib_dir}.
            This looks like a BUG in RL-Scope; please report it at:
              https://github.com/UofT-EcoSystem/rlscope/issues
            """.format(
                    lib_dir=py_config.CPP_LIB,
                    name=RLSCOPE_LIBNAME,
                )).rstrip())
        sys.exit(1)
예제 #12
0
def main():

    try:
        check_host.check_config()
    except RLScopeConfigurationError as e:
        logger.error(e)
        sys.exit(1)

    rlscope_util_argv, cmd_argv = split_argv_on(sys.argv[1:])
    parser = get_util_sampler_parser(add_rlscope_root_pid=len(cmd_argv) == 0)
    args = parser.parse_args(rlscope_util_argv)

    # To make it easy to launch utilization sampler manually in certain code bases,
    # allow ignoring all the --rlscope-* arguments:
    #
    # e.g. in minigo's loop_main.sh shell script we do
    #   python3 -m scripts.utilization_sampler "$@" --rlscope-directory $BASE_DIR &
    # where $@ contains all the --rlscope-* args.
    args, extra_argv = parser.parse_known_args()
    # args = parser.parse_args()

    # NOTE: During profiling, we depend on this being called from the root training script.
    if not args.skip_smi_check:
        nvidia_gpu_query.check_nvidia_smi()

    if args.kill:
        for proc in psutil.process_iter():
            # if proc.name() == sys.argv[0]:
            # pinfo = proc.as_dict(attrs=['pid', 'name', 'username'])
            pinfo = proc.as_dict(attrs=['pid', 'username', 'cmdline'])
            pprint.pprint({'pinfo': pinfo})
            # cmdline = proc.cmdline()
            try:
                logger.info(pinfo['cmdline'])
                if re.search(r'rls-util-sampler', ' '.join(
                        pinfo['cmdline'])) and pinfo['pid'] != os.getpid():
                    logger.info(
                        "> Kill rls-util-sampler: {proc}".format(proc=proc))
                    proc.kill()
            except psutil.NoSuchProcess:
                pass
        sys.exit(0)

    if args.rlscope_directory is None:
        logger.info(
            "--rlscope-directory is required: directory where trace-files are saved"
        )
        parser.print_help()
        sys.exit(1)

    os.makedirs(args.rlscope_directory, exist_ok=True)

    # if args.measure_samples_per_sec:
    #     measure_samples_per_sec()
    #     return

    if args.rlscope_util_sample_frequency_sec < MIN_UTIL_SAMPLE_FREQUENCY_SEC:
        parser.error(
            "Need --rlscope-util-sample-frequency-sec={val} to be larger than minimum sample frequency ({min} sec)"
            .format(
                val=args.rlscope_util_sample_frequency_sec,
                min=MIN_UTIL_SAMPLE_FREQUENCY_SEC,
            ))

    rlscope_root_pid = None
    cmd_proc = None
    if len(cmd_argv) != 0:
        exe_path = shutil.which(cmd_argv[0])
        if exe_path is None:
            print(
                "RL-Scope ERROR: couldn't locate {exe} on $PATH; try giving a full path to {exe} perhaps?"
                .format(exe=cmd_argv[0], ))
            sys.exit(1)
        cmd = [exe_path] + cmd_argv[1:]
        print_cmd(cmd)

        sys.stdout.flush()
        sys.stderr.flush()
        cmd_proc = subprocess.Popen(cmd)
        rlscope_root_pid = cmd_proc.pid
    else:
        rlscope_root_pid = args.rlscope_root_pid

    # NOTE: usually, we have rls-prof program signal us to terminate.
    # However if they provide a cmd, we would like to terminate sampler when cmd finishes, and return cmd's exit status.
    util_sampler = UtilizationSampler(
        directory=args.rlscope_directory,
        pid=rlscope_root_pid,
        async_process=cmd_proc,
        util_dump_frequency_sec=args.rlscope_util_dump_frequency_sec,
        util_sample_frequency_sec=args.rlscope_util_sample_frequency_sec,
        debug=args.rlscope_debug,
        debug_single_thread=args.rlscope_debug_single_thread,
    )
    util_sampler.run()
    sys.exit(util_sampler.exit_status)
예제 #13
0
def expr_run_cmd(cmd, to_file,
                 cwd=None,
                 env=None,
                 replace=False,
                 dry_run=False,
                 skip_error=False,
                 tee_output=True,
                 tee_cmd=None,
                 tee_prefix=None,
                 # extra_argv=[],
                 only_show_env=None,
                 debug=False,
                 raise_exception=False,
                 exception_class=None,
                 log_errors=True,
                 log_func=None):
    """
    Run an experiment, if it hasn't been run already.
    We check if an experiment as already been run by looking for a log file, and whether that logfile has a success-line in it
    (we search for "IML BENCH DONE")
    :param self:
    :param cmd:
    :param to_file:
    :param env:
    :param replace:
    :param debug:
    :return:
    """

    if log_func is None:
        log_func = logger.error

    if env is None:
        # Make sure rls-run get RLSCOPE_POSTGRES_HOST
        env = dict(os.environ)

    proc = None
    failed = False
    if replace or not expr_already_ran(to_file, debug=debug):

        try:
            tee_kwargs = dict()
            if skip_error:
                tee_kwargs['check'] = False
            proc = tee(
                cmd=cmd,
                to_file=to_file,
                cwd=cwd,
                env=env,
                dry_run=dry_run,
                tee_output=tee_output,
                tee_cmd=tee_cmd,
                tee_prefix=tee_prefix,
                only_show_env=only_show_env,
                **tee_kwargs,
            )
            if not dry_run and skip_error and proc.returncode != 0:
                if log_errors:
                    log_func(
                        "Command failed; see {path}; continuing".format(
                            path=to_file,
                        ))
                failed = True
        except subprocess.CalledProcessError as e:

            err_msg = textwrap.dedent("""\
            Command failed: see {path} for command and output.
            """).format(
                path=to_file,
            ).rstrip()
            if log_errors:
                logger.error(err_msg)
            if raise_exception:
                if exception_class is None:
                    raise
                raise exception_class(err_msg)
            ret = 1
            if debug:
                logger.error("Exiting with ret={ret}\n{stack}".format(
                    ret=ret,
                    stack=get_stacktrace(),
                ))
            sys.exit(ret)

        if not failed:
            if not dry_run and proc.returncode != 0:
                logger.error("BUG: saw returncode = {ret}, expected 0".format(
                    ret=proc.returncode))
                assert proc.returncode == 0
            if not dry_run:
                with open(to_file, 'a') as f:
                    f.write("{success_line}\n".format(success_line=EXPERIMENT_SUCCESS_LINE))
            if not dry_run:
                assert expr_already_ran(to_file, debug=debug)

    return proc
예제 #14
0
def main():

    try:
        check_host.check_config()
    except RLScopeConfigurationError as e:
        logger.error(e)
        sys.exit(1)

    parser = argparse.ArgumentParser(
        description=textwrap.dedent("""\
        Process trace-files collected from running an ML script with the RL-Scope profiler.
        
        For task-specific help, provided task-name and --help, e.g.:
        $ rls-run --task OverlapStackedBarTask --help
        
        NOTE: 
        - This script is a thin usage/debugging wrapper around a "luigi" DAG execution script. 
          It just forwards arguments to it.
        - Any unparsed args are forward to the luigi script.
        """),
        formatter_class=argparse.RawTextHelpFormatter,
        add_help=False,
    )
    parser.add_argument('--pdb', action='store_true',
                        help="Break into pdb when an exception occurs")
    parser.add_argument('--task',
                        choices=[klass.__name__ for klass in tasks.RLSCOPE_TASKS],
                        help="Name of a runnable IMLTask defined in rlscope.parser.tasks")
    parser.add_argument('--workers',
                        type=int,
                        # DISABLE --workers for now to prevent opening to many postgres connections by accident;
                        # we parallelize internally instead
                        # e.g. ResourceOverlap with 32 worker threads, each of which opens a SQL
                        # connection.
                        # default=multiprocessing.cpu_count(),
                        default=1,
                        help="Maximum number of parallel tasks to run (luigi parameter)")
    parser.add_argument('--help', '-h',
                        action='store_true')
    args, luigi_argv = parser.parse_known_args(sys.argv)

    if args.help and not args.task:
        # Print available tasks.
        parser.print_help()
        sys.exit(0)

    if args.task is None and not args.help:
        # If they just run this:
        # $ rls-run --rlscope-directory <dir>
        # Then run all the targets.
        args.task = 'All'

    extra_argv = [
        '--module', 'rlscope.parser.tasks',
        '--local-scheduler',
        # Default log-level from luigi is DEBUG which is too noisy.
        # Make the default level INFO instead.
        '--log-level', 'INFO',
    ]
    luigi_argv.extend(extra_argv)
    if args.task:
        # Task needs to be the first argument after rls-run.
        luigi_argv.insert(1, args.task)

    if args.help:
        luigi_argv.extend(['--help'])

    if args.workers > 1:
        logger.warning("Each overlap plot uses all the cores; forcing --workers=1")
        args.workers = 1

    if args.pdb:
        logger.debug("Registering pdb breakpoint (--pdb)")
        register_pdb_breakpoint()
        # Debugger is useless when multithreaded.
        args.workers = 1

    luigi_argv.extend(['--workers', str(args.workers)])

    # logger.debug("Luigi arguments:\n{msg}".format(msg=textwrap.indent(pprint.pformat({
    #     'luigi_argv':luigi_argv,
    #     'sys.argv':sys.argv,
    # }), prefix='  ')))

    with warnings.catch_warnings():
        # I don't really take much advantage of luigi's DFS scheduler and instead run things manually.
        # Oh well.
        warnings.filterwarnings('ignore', category=UserWarning, message=r'.*without outputs has no custom complete', module=r'luigi')
        warnings.filterwarnings('ignore', category=UserWarning, message=r'Parameter.*with value "None" is not of type string', module=r'luigi')
        tasks.main(argv=luigi_argv[1:], should_exit=False)
예제 #15
0
 def pdb_breakpoint(task, ex):
     logger.error("> Detected unhandled exception {ex} in {task}; entering pdb".format(
         ex=ex.__class__.__name__,
         task=task.__class__.__name__,
     ))
     pdb.post_mortem()