Ejemplo n.º 1
0
def main():

    try:
        check_host.check_config()
    except RLScopeConfigurationError as e:
        logger.error(e)
        sys.exit(1)

    parser = argparse.ArgumentParser(
        description=textwrap.dedent(__doc__.lstrip().rstrip()),
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("--run",
                        action='store_true',
                        help=textwrap.dedent("""\
                        Run the command as-is.
                        """))
    parser.add_argument("--append",
                        action='store_true',
                        help=textwrap.dedent("""\
                        Append the command to --sh
                        """))
    parser.add_argument("--sh",
                        help=textwrap.dedent("""\
                        Shell file to append commands to (see --append).
                        """))
    parser.add_argument('--run-sh',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Run all the commands in --sh on the available --gpus
                        """))
    parser.add_argument('--rlscope-directory',
                        help=textwrap.dedent("""\
                        The output directory of the command being run.
                        This is where logfile.out will be output.
                        """))
    parser.add_argument("--verbosity",
                        choices=['progress', 'commands', 'output'],
                        default='progress',
                        help=textwrap.dedent("""\
                            Output information about running commands.
                            --verbosity progress (Default)
                                Only show high-level progress bar information.
                              
                            --verbosity commands
                                Show the command-line of commands that are being run.
                                
                            --verbosity output
                                Show the output of each analysis (not configuration) command on sys.stdout.
                                NOTE: This may cause interleaving of lines.
                            """))
    parser.add_argument('--line-numbers', action='store_true', help=textwrap.dedent("""\
    Show line numbers and timestamps in RL-Scope logging messages.
    """))
    parser.add_argument('--debug',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Debug
                        """))
    parser.add_argument('--skip-final-error-message',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Skip error message printed at the end if at least one command fails.
                        """))
    parser.add_argument("--retry",
                        type=int,
                        help=textwrap.dedent("""\
                            If a command fails, retry it up to --retry times.
                            Default: don't retry.
                            """))
    parser.add_argument("--tee",
                        action='store_true',
                        help=textwrap.dedent("""\
                        (debug)
                        tee output of parallel processes to stdout (prefix output with worker name)
                        """))
    parser.add_argument("--pdb",
                        action='store_true',
                        help=textwrap.dedent("""\
                        Debug
                        """))
    parser.add_argument('--dry-run',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Dry run
                        """))
    parser.add_argument('--skip-errors',
                        action='store_true',
                        help=textwrap.dedent("""\
                        If a command fails, ignore the failure and continue running other commands.
                        """))
    parser.add_argument("--gpus",
                        default='all',
                        help=textwrap.dedent("""\
                        # Run on the first GPU only
                        --gpus 0
                        # Run on the first 2 GPUs
                        --gpus 0,1
                        # Run on all available GPUs
                        --gpus all
                        # Don't allow running with any GPUs (CUDA_VISIBLE_DEVICES="")
                        --gpus none
                        """))
    all_args, _ = parser.parse_known_args(sys.argv)
    ignore_opts = set()
    if all_args.sh is not None:
        ignore_opts.add(all_args.sh)
    run_expr_argv, cmd = gather_argv(
        sys.argv[1:],
        ignore_opts=ignore_opts)
    args = parser.parse_args(run_expr_argv)

    if args.debug:
        logger.debug({
            'run_expr_argv': run_expr_argv,
            'cmd': cmd,
        })

    rlscope_logging.setup_logger(
        debug=args.debug,
        line_numbers=args.debug or args.line_numbers or py_config.is_development_mode(),
    )

    if args.sh is None and ( args.run_sh or args.append ):
        error("--sh is required when either --run-sh or --append are given", parser=parser)

    if args.run_sh and ( args.append or args.run ):
        error("When --run-sh is given, you cannot provide either --append or --run", parser=parser)

    available_gpus = get_available_gpus()
    if args.gpus == 'all':
        gpus = sorted([gpu['device_number'] for gpu in available_gpus])
    elif args.gpus.lower() == 'none':
        args.gpus = [None]
    else:
        try:
            gpus = sorted([int(gpu) for gpu in re.split(r',', args.gpus)])
        except ValueError:
            error("Failed to parser --gpus={gpus}".format(gpus=args.gpus), parser=parser)

    assert len(gpus) >= 1

    if args.run or args.append:
        if len(cmd) == 0:
            error("Expected cmd to run in arguments, but none was provided",
                  parser=parser)

        if shutil.which(cmd[0]) is None:
            error("Couldn't find {exec} on PATH".format(
                exec=cmd[0]), parser=parser)

    if all_args.rlscope_directory is None:
        # No --rlscope-directory argument; just use current directory?
        args.rlscope_directory = os.getcwd()
    else:
        args.rlscope_directory = all_args.rlscope_directory
    # # error("\n  {cmd}".format(cmd=' '.join(cmd)))
    # error(textwrap.dedent("""\
    # --rlscope-directory must be provided so we know where to output logfile.out for cmd:
    #   > CMD:
    #     $ {cmd}
    #   """).format(
    #   cmd=' '.join(cmd),
    # ).rstrip())
    # # "Copy" --rlscope-directory argument from cmd.
    # args.rlscope_directory = all_args.rlscope_directory

    args_dict = dict(vars(args))
    args_dict.pop('gpus')
    args_dict.pop('pdb')
    obj = RunExpr(
        cmd=cmd,
        gpus=gpus,
        **args_dict,
    )

    def _run():
        obj.run_program()
    run_with_pdb(args, _run)
Ejemplo n.º 2
0
def main():

    try:
        check_host.check_config()
    except RLScopeConfigurationError as e:
        logger.error(e)
        sys.exit(1)

    parser = argparse.ArgumentParser(
        description=textwrap.dedent(__doc__.lstrip().rstrip()),
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('--rlscope-directory',
                        required=True,
                        help=textwrap.dedent("""\
    Look for *.venn_js.json rooted at this directory.
    The output file will be <directory>/rlscope_plot_index_data.py.
    All the venn_js_path's in the index will be relative to --directory.
    """))
    parser.add_argument('--out-dir',
                        help=textwrap.dedent("""\
    The output file will be <out-dir>/rlscope_plot_index_data.py.
    Default: --directory
    """))
    parser.add_argument('--debug',
                        action='store_true',
                        help=textwrap.dedent("""\
    Debug
    """))
    parser.add_argument('--dry-run',
                        action='store_true',
                        help=textwrap.dedent("""\
    Don't write file.
    """))
    parser.add_argument('--basename',
                        default='rlscope_plot_index_data.py',
                        help=textwrap.dedent("""\
    Name of python file to generate.
    """))
    parser.add_argument('--replace',
                        action='store_true',
                        help=textwrap.dedent("""\
    Replace if exists.
    """))
    parser.add_argument('--pdb',
                        action='store_true',
                        help=textwrap.dedent("""\
    Python debugger on unhandled exception.
    """))
    args = parser.parse_args()

    if args.out_dir is None:
        args.out_dir = args.rlscope_directory

    try:
        obj = GeneratePlotIndex(
            directory=args.rlscope_directory,
            out_dir=args.out_dir,
            basename=args.basename,
            debug=args.debug,
            replace=args.replace,
            dry_run=args.dry_run,
        )
        obj.run()
    except Exception as e:
        if not args.pdb:
            raise
        print("> RL-Scope: Detected exception:")
        print(e)
        print("> Entering pdb:")
        import pdb
        pdb.post_mortem()
        raise
Ejemplo n.º 3
0
def main():

    try:
        check_host.check_config()
    except RLScopeConfigurationError as e:
        logger.error(e)
        sys.exit(1)

    rlscope_util_argv, cmd_argv = split_argv_on(sys.argv[1:])
    parser = get_util_sampler_parser(add_rlscope_root_pid=len(cmd_argv) == 0)
    args = parser.parse_args(rlscope_util_argv)

    # To make it easy to launch utilization sampler manually in certain code bases,
    # allow ignoring all the --rlscope-* arguments:
    #
    # e.g. in minigo's loop_main.sh shell script we do
    #   python3 -m scripts.utilization_sampler "$@" --rlscope-directory $BASE_DIR &
    # where $@ contains all the --rlscope-* args.
    args, extra_argv = parser.parse_known_args()
    # args = parser.parse_args()

    # NOTE: During profiling, we depend on this being called from the root training script.
    if not args.skip_smi_check:
        nvidia_gpu_query.check_nvidia_smi()

    if args.kill:
        for proc in psutil.process_iter():
            # if proc.name() == sys.argv[0]:
            # pinfo = proc.as_dict(attrs=['pid', 'name', 'username'])
            pinfo = proc.as_dict(attrs=['pid', 'username', 'cmdline'])
            pprint.pprint({'pinfo': pinfo})
            # cmdline = proc.cmdline()
            try:
                logger.info(pinfo['cmdline'])
                if re.search(r'rls-util-sampler', ' '.join(
                        pinfo['cmdline'])) and pinfo['pid'] != os.getpid():
                    logger.info(
                        "> Kill rls-util-sampler: {proc}".format(proc=proc))
                    proc.kill()
            except psutil.NoSuchProcess:
                pass
        sys.exit(0)

    if args.rlscope_directory is None:
        logger.info(
            "--rlscope-directory is required: directory where trace-files are saved"
        )
        parser.print_help()
        sys.exit(1)

    os.makedirs(args.rlscope_directory, exist_ok=True)

    # if args.measure_samples_per_sec:
    #     measure_samples_per_sec()
    #     return

    if args.rlscope_util_sample_frequency_sec < MIN_UTIL_SAMPLE_FREQUENCY_SEC:
        parser.error(
            "Need --rlscope-util-sample-frequency-sec={val} to be larger than minimum sample frequency ({min} sec)"
            .format(
                val=args.rlscope_util_sample_frequency_sec,
                min=MIN_UTIL_SAMPLE_FREQUENCY_SEC,
            ))

    rlscope_root_pid = None
    cmd_proc = None
    if len(cmd_argv) != 0:
        exe_path = shutil.which(cmd_argv[0])
        if exe_path is None:
            print(
                "RL-Scope ERROR: couldn't locate {exe} on $PATH; try giving a full path to {exe} perhaps?"
                .format(exe=cmd_argv[0], ))
            sys.exit(1)
        cmd = [exe_path] + cmd_argv[1:]
        print_cmd(cmd)

        sys.stdout.flush()
        sys.stderr.flush()
        cmd_proc = subprocess.Popen(cmd)
        rlscope_root_pid = cmd_proc.pid
    else:
        rlscope_root_pid = args.rlscope_root_pid

    # NOTE: usually, we have rls-prof program signal us to terminate.
    # However if they provide a cmd, we would like to terminate sampler when cmd finishes, and return cmd's exit status.
    util_sampler = UtilizationSampler(
        directory=args.rlscope_directory,
        pid=rlscope_root_pid,
        async_process=cmd_proc,
        util_dump_frequency_sec=args.rlscope_util_dump_frequency_sec,
        util_sample_frequency_sec=args.rlscope_util_sample_frequency_sec,
        debug=args.rlscope_debug,
        debug_single_thread=args.rlscope_debug_single_thread,
    )
    util_sampler.run()
    sys.exit(util_sampler.exit_status)
Ejemplo n.º 4
0
def main():

    try:
        check_host.check_config()
    except RLScopeConfigurationError as e:
        logger.error(e)
        sys.exit(1)

    rlscope_prof_argv, cmd_argv = gather_argv(sys.argv[1:])

    parser = argparse.ArgumentParser(
        description=
        "RL-Scope cross-stack profiler for reinforcement learning workloads.",
        formatter_class=argparse.RawTextHelpFormatter)
    # NOTE: these arguments must precede the executable (python some/script.py), otherwise they will be sent
    # to the training script, and not handled by this script (rls-prof).
    parser.add_argument('--debug', action='store_true')
    parser.add_argument("--verbosity",
                        choices=['progress', 'commands', 'output'],
                        default='progress',
                        help=textwrap.dedent("""\
                            Output information about running commands.
                            --verbosity progress (Default)
                                Only show high-level progress bar information.
                              
                            --verbosity commands
                                Show the command-line of commands that are being run.
                                
                            --verbosity output
                                Show the output of each analysis (not configuration) command on sys.stdout.
                                NOTE: This may cause interleaving of lines.
                            """))
    parser.add_argument('--line-numbers',
                        action='store_true',
                        help=textwrap.dedent("""\
    Show line numbers and timestamps in RL-Scope logging messages.
    """))
    parser.add_argument('--rlscope-debug', action='store_true')
    parser.add_argument('--rlscope-rm-traces-from',
                        help=textwrap.dedent("""\
    Delete traces rooted at this --rlscope-directory. 
    Useful if your training script has multiple training scripts, and you need to use --rlscope-skip-rm-traces 
    when launching the other scripts.
    """))
    # parser.add_argument('--rlscope-disable', action='store_true', help=textwrap.dedent("""\
    #     RL-Scope: Skip any profiling. Used for uninstrumented runs.
    #     Useful for ensuring minimal libcupti registration when we run --cuda-api-calls during config_uninstrumented.
    #
    #     Effect: sets "export RLSCOPE_DISABLE=1" for librlscope.so.
    # """))

    add_bool_arg(parser,
                 '--cuda-api-calls',
                 help=textwrap.dedent("""\
                        Trace CUDA API runtime/driver calls.
                        
                        i.e. total number of calls, and total time (usec) spent in a given API call.
                        
                        Effect: sets "export RLSCOPE_CUDA_API_CALLS=1" for librlscope.so.
                        """))
    add_bool_arg(parser,
                 '--cuda-activities',
                 help=textwrap.dedent("""\
                        Trace CUDA activities (i.e. GPU kernel runtimes, memcpy's).
                        
                        Effect: sets "export RLSCOPE_CUDA_ACTIVITIES=yes" for librlscope.so.
                        """))
    add_bool_arg(parser,
                 '--cuda-api-events',
                 help=textwrap.dedent("""\
                        Trace all the start/end timestamps of CUDA API calls.
                        Needed during instrumented runs so we know when to subtract profiling overheads.
                        
                        Effect: sets "export RLSCOPE_CUDA_API_EVENTS=yes" for librlscope.so.
                        """))
    add_bool_arg(parser,
                 '--gpu-hw',
                 help=textwrap.dedent("""\
                        Collect GPU hardware counters.
                        
                        Effect: sets "export RLSCOPE_GPU_HW=yes" for librlscope.so.
                        """))

    # parser.add_argument('--fuzz-cuda-api', action='store_true',
    #                     help=textwrap.dedent("""\
    #                     Use libcupti to trace ALL CUDA runtime API calls (# of calls, and total time spent in them).
    #                     This is useful for determining which CUDA API's we need to "calibrate subtractions" for.
    #                     NOTE: this SHOULDN'T be used for finding profiling book-keeping "subtractions", since it
    #                     adds a LOT of overhead to add start/end callbacks to all CUDA API functions.
    #
    #                     Effect: sets "export RLSCOPE_FUZZ_CUDA_API=yes" for librlscope.so.
    #                     """))

    parser.add_argument('--pc-sampling',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Perform sample-profiling using CUDA's "PC Sampling" API.
                        
                        Currently, we're just going to record GPUSamplingState.is_gpu_active.
                        
                        Effect: sets "export RLSCOPE_PC_SAMPLING=1" for librlscope.so.
                        """))
    parser.add_argument('--trace-at-start',
                        action='store_true',
                        help=textwrap.dedent("""\
                        Start tracing right at application startup.
                        
                        Effect: sets "export RLSCOPE_TRACE_AT_START=yes" for librlscope.so.
                        """))
    # parser.add_argument('--stream-sampling', action='store_true',
    #                     help=textwrap.dedent("""\
    #                     Poll cudaStreamQuery() to see if the GPU is being used.
    #
    #                     Effect: sets "export RLSCOPE_STREAM_SAMPLING=yes" for librlscope.so.
    #                     """))

    calibrate_help = textwrap.dedent("""\
    Perform multiple runs in order to calibrate for profiling overhead 
    specific to the workload being run.
    """).rstrip()
    parser.add_argument("--calibrate",
                        dest='calibrate',
                        action='store_true',
                        default=True,
                        help=calibrate_help)
    parser.add_argument("--no-calibrate",
                        dest='calibrate',
                        action='store_false',
                        help=calibrate_help)

    parser.add_argument("--re-calibrate",
                        action='store_true',
                        help=textwrap.dedent("""\
                            Remove existing profiling overhead calibration files, and recompute them.
                            """))
    parser.add_argument("--re-plot",
                        action='store_true',
                        help=textwrap.dedent("""\
                            Remove existing plots and remake them (NOTE: doesn't recompute analysis; see --re-calibrate).
                            """))

    parallel_runs_help = textwrap.dedent("""\
                            Parallelize running configurations across GPUs on this machine (assume no CPU interference). 
                            See --gpus.
                            """)
    parser.add_argument("--parallel-runs",
                        dest='parallel_runs',
                        action='store_true',
                        default=True,
                        help=parallel_runs_help)
    parser.add_argument("--no-parallel-runs",
                        dest='parallel_runs',
                        action='store_false',
                        help=parallel_runs_help)

    parser.add_argument("--retry",
                        type=int,
                        help=textwrap.dedent("""\
                            If a command fails, retry it up to --retry times.
                            Default: don't retry.
                            """))
    parser.add_argument("--dry-run",
                        action='store_true',
                        help=textwrap.dedent("""\
                            Dry run
                            """))
    # parser.add_argument("--gpus",
    #                     action='store_true',
    #                     help=textwrap.dedent("""\
    #                         Parallelize running configurations across GPUs on this machine (assume no CPU inteference). See --rlscope-gpus
    #                         """))
    parser.add_argument("--gpus",
                        help=textwrap.dedent("""\
                        # Run on the first GPU only
                        --gpus 0
                        # Run on the first 2 GPUs
                        --gpus 0,1
                        # Run on all available GPUs
                        --gpus all
                        # Don't allow running with any GPUs (CUDA_VISIBLE_DEVICES="")
                        --gpus none
                        """))
    parser.add_argument(
        '--config',
        choices=[
            'interception',
            'no-interception',
            'gpu-activities',
            'gpu-activities-api-time',
            'no-gpu-activities',
            'full',
            'time-breakdown',
            'gpu-hw',
            'uninstrumented',
        ],
        # Detect if user provides --config or not.
        # By default, run with full RL-Scope instrumentation.
        # default=DEFAULT_CONFIG,
        help=textwrap.dedent("""\
                        For measuring LD_PRELOAD CUDA API interception overhead:
                            interception:
                                Enable LD_PRELOAD CUDA API interception.
                                $ rls-prof --debug --cuda-api-calls --cuda-api-events --rlscope-disable
                            no-interception:
                                Disable LD_PRELOAD CUDA API interception.
                                $ rls-prof --debug --rlscope-disable
                                
                        For measuring CUPTI GPU activity gathering overhead on a per CUDA API call basis.
                            gpu-activities:
                                Enable CUPTI GPU activity recording.
                                $ rls-prof --debug --cuda-api-calls --cuda-activities --rlscope-disable
                            no-gpu-activities:
                                Disable CUPTI GPU activity recording.
                                $ rls-prof --debug --cuda-api-calls --rlscope-disable
                                
                        Expect (for the above configurations):
                        You should run train.py with these arguments set
                        
                            # Since we are comparing total training time, 
                            # run each configuration with the same number of training loop steps.
                            --rlscope-max-passes $N
                            
                            # Disable any pyprof or old tfprof tracing code.
                            --rlscope-disable
                                
                        For collecting full RL-Scope traces for using with rls-run / rlscope-drill:
                            full:
                                Enable all of tfprof and pyprof collection.
                                $ rls-prof --cuda-api-calls --cuda-api-events --cuda-activities --rlscope-disable
                                NOTE: we still use --rlscope-disable to prevent "old" tfprof collection.
                                
                        gpu-hw:
                          ONLY collect GPU hardware counters
                        """))
    args = parser.parse_args(rlscope_prof_argv)

    is_debug = args.debug or args.rlscope_debug or is_env_true('RLSCOPE_DEBUG')
    rlscope_logging.setup_logger(
        debug=is_debug,
        line_numbers=is_debug or args.line_numbers
        or py_config.is_development_mode(),
    )

    if args.rlscope_rm_traces_from is not None:
        logger.info(
            "rls-prof: Delete trace-files rooted at --rlscope-directory = {dir}"
            .format(dir=args.rlscope_rm_traces_from))
        return

    rlscope_api.find_librlscope()
    so_path = rlscope_api.RLSCOPE_CLIB
    assert so_path is not None
    env = dict(os.environ)
    add_env = dict()
    add_env['LD_PRELOAD'] = "{ld}:{so_path}".format(ld=env.get(
        'LD_PRELOAD', ''),
                                                    so_path=so_path)
    # Q: I just want LD_LIBRARY_PATH to get printed...
    if 'LD_LIBRARY_PATH' in env:
        add_env['LD_LIBRARY_PATH'] = env['LD_LIBRARY_PATH']
    # if 'LD_LIBRARY_PATH' in env:
    #     add_env['LD_LIBRARY_PATH'] = env['LD_LIBRARY_PATH']

    def _set_if_none(attr, value):
        if getattr(args, attr) is None:
            setattr(args, attr, value)

    def maybe_remove(xs, x):
        if x in xs:
            xs.remove(x)

    if args.calibrate:
        if args.config is not None:
            logger.error(
                "Only --calibrate or --config should be provided for rls-prof."
            )
            parser.exit(1)
        # Run calibrate.py
        cmd = ['rls-calibrate', 'run']

        if args.gpu_hw:
            cmd.extend(['--gpu-hw'])
            maybe_remove(rlscope_prof_argv, '--gpu-hw')

        cmd.extend(['--verbosity', args.verbosity])

        if args.parallel_runs:
            cmd.extend(['--parallel-runs'])
            maybe_remove(rlscope_prof_argv, '--parallel-runs')
        else:
            cmd.extend(['--no-parallel-runs'])
            maybe_remove(rlscope_prof_argv, '--no-parallel-runs')

        if args.retry is not None:
            cmd.extend(['--retry', str(args.retry)])

        # Q: Can't we just pass this through?
        # if args.re_calibrate:
        #     cmd.extend(['--re-calibrate'])
        #     rlscope_prof_argv.remove('--re-calibrate')

        # if args.gpus is not None:
        #     cmd.extend(['--gpus', args.gpus])
        maybe_remove(rlscope_prof_argv, '--calibrate')
        cmd.extend(rlscope_prof_argv)
        cmd.extend(cmd_argv)
        # cmd.remove('--calibrate')
        print_cmd(cmd)
        try:
            proc = subprocess.run(cmd, check=False)
            sys.exit(proc.returncode)
        except KeyboardInterrupt:
            logger.info(
                "Saw Ctrl-C during calibration; aborting remaining runs.")
            sys.exit(1)

    if args.config is None:
        args.config = DEFAULT_CONFIG

    add_env['RLSCOPE_CONFIG'] = args.config
    if args.config == 'interception':
        "rls-prof --debug --cuda-api-calls --cuda-api-events"
        _set_if_none('cuda_api_calls', True)
        _set_if_none('cuda_api_events', True)
    elif args.config in ['no-interception', 'uninstrumented']:
        "rls-prof --debug"
        pass
    elif args.config == 'gpu-hw':
        "$ rls-prof --debug --gpu-hw"
        _set_if_none('cuda_api_calls', False)
        _set_if_none('cuda_api_events', False)
        _set_if_none('cuda_activities', False)
        _set_if_none('gpu_hw', True)
    elif args.config == 'no-gpu-activities':
        "$ rls-prof --debug --cuda-api-calls"
        _set_if_none('cuda_api_calls', True)
        _set_if_none('gpu_hw', False)
    elif args.config == 'gpu-activities':
        "$ rls-prof --debug --cuda-api-calls --cuda-activities"
        _set_if_none('cuda_api_calls', True)
        _set_if_none('cuda_activities', True)
        _set_if_none('gpu_hw', False)
    elif args.config == 'gpu-activities-api-time':
        "$ rls-prof --debug --cuda-api-calls --cuda-api-events --cuda-activities"
        _set_if_none('cuda_api_calls', True)
        _set_if_none('cuda_api_events', True)
        _set_if_none('cuda_activities', True)
        _set_if_none('gpu_hw', False)
    elif args.config is None or args.config in {'full', 'time-breakdown'}:
        "$ rls-prof --cuda-api-calls --cuda-api-events --cuda-activities"
        _set_if_none('cuda_api_calls', True)
        _set_if_none('cuda_api_events', True)
        _set_if_none('cuda_activities', True)
        _set_if_none('gpu_hw', False)
    else:
        raise NotImplementedError()

    # if args.fuzz_cuda_api and args.cuda_api_calls:
    #     parser.error("Can only run rls-prof with --fuzz-cuda-api or --cuda-api-calls, not both")

    if args.debug or args.rlscope_debug or is_env_true('RLSCOPE_DEBUG'):
        logger.info(
            "Detected debug mode; enabling C++ logging statements (export RLSCOPE_CPP_MIN_VLOG_LEVEL=1)"
        )
        add_env['RLSCOPE_CPP_MIN_VLOG_LEVEL'] = 1

    # if args.rlscope_disable:
    #     add_env['RLSCOPE_DISABLE'] = 'yes'

    def set_yes_no(attr, env_var):
        if getattr(args, attr):
            add_env[env_var] = 'yes'
        else:
            add_env[env_var] = 'no'

    set_yes_no('cuda_api_calls', 'RLSCOPE_CUDA_API_CALLS')

    set_yes_no('cuda_activities', 'RLSCOPE_CUDA_ACTIVITIES')

    set_yes_no('gpu_hw', 'RLSCOPE_GPU_HW')

    set_yes_no('pc_sampling', 'RLSCOPE_PC_SAMPLING')

    # set_yes_no('fuzz_cuda_api', 'RLSCOPE_FUZZ_CUDA_API')

    set_yes_no('cuda_api_events', 'RLSCOPE_CUDA_API_EVENTS')

    set_yes_no('gpu_hw', 'RLSCOPE_GPU_HW')

    set_yes_no('trace_at_start', 'RLSCOPE_TRACE_AT_START')

    # set_yes_no('stream_sampling', 'RLSCOPE_STREAM_SAMPLING')

    if len(cmd_argv) == 0:
        parser.print_usage()
        logger.error(
            "You must provide a command to execute after \"rls-prof\"")
        sys.exit(1)

    exe_path = shutil.which(cmd_argv[0])
    if exe_path is None:
        print(
            "RL-Scope ERROR: couldn't locate {exe} on $PATH; try giving a full path to {exe} perhaps?"
            .format(exe=cmd_argv[0], ))
        sys.exit(1)
    # cmd = argv
    cmd = [exe_path] + cmd_argv[1:]
    print_cmd(cmd, env=add_env)

    env.update(add_env)
    for k in list(env.keys()):
        env[k] = str(env[k])

    sys.stdout.flush()
    sys.stderr.flush()
    os.execve(exe_path, cmd, env)
    # os.execve shouldn't return.
    assert False
Ejemplo n.º 5
0
def main():

    try:
        check_host.check_config()
    except RLScopeConfigurationError as e:
        logger.error(e)
        sys.exit(1)

    parser = argparse.ArgumentParser(
        description=textwrap.dedent("""\
        Process trace-files collected from running an ML script with the RL-Scope profiler.
        
        For task-specific help, provided task-name and --help, e.g.:
        $ rls-run --task OverlapStackedBarTask --help
        
        NOTE: 
        - This script is a thin usage/debugging wrapper around a "luigi" DAG execution script. 
          It just forwards arguments to it.
        - Any unparsed args are forward to the luigi script.
        """),
        formatter_class=argparse.RawTextHelpFormatter,
        add_help=False,
    )
    parser.add_argument('--pdb', action='store_true',
                        help="Break into pdb when an exception occurs")
    parser.add_argument('--task',
                        choices=[klass.__name__ for klass in tasks.RLSCOPE_TASKS],
                        help="Name of a runnable IMLTask defined in rlscope.parser.tasks")
    parser.add_argument('--workers',
                        type=int,
                        # DISABLE --workers for now to prevent opening to many postgres connections by accident;
                        # we parallelize internally instead
                        # e.g. ResourceOverlap with 32 worker threads, each of which opens a SQL
                        # connection.
                        # default=multiprocessing.cpu_count(),
                        default=1,
                        help="Maximum number of parallel tasks to run (luigi parameter)")
    parser.add_argument('--help', '-h',
                        action='store_true')
    args, luigi_argv = parser.parse_known_args(sys.argv)

    if args.help and not args.task:
        # Print available tasks.
        parser.print_help()
        sys.exit(0)

    if args.task is None and not args.help:
        # If they just run this:
        # $ rls-run --rlscope-directory <dir>
        # Then run all the targets.
        args.task = 'All'

    extra_argv = [
        '--module', 'rlscope.parser.tasks',
        '--local-scheduler',
        # Default log-level from luigi is DEBUG which is too noisy.
        # Make the default level INFO instead.
        '--log-level', 'INFO',
    ]
    luigi_argv.extend(extra_argv)
    if args.task:
        # Task needs to be the first argument after rls-run.
        luigi_argv.insert(1, args.task)

    if args.help:
        luigi_argv.extend(['--help'])

    if args.workers > 1:
        logger.warning("Each overlap plot uses all the cores; forcing --workers=1")
        args.workers = 1

    if args.pdb:
        logger.debug("Registering pdb breakpoint (--pdb)")
        register_pdb_breakpoint()
        # Debugger is useless when multithreaded.
        args.workers = 1

    luigi_argv.extend(['--workers', str(args.workers)])

    # logger.debug("Luigi arguments:\n{msg}".format(msg=textwrap.indent(pprint.pformat({
    #     'luigi_argv':luigi_argv,
    #     'sys.argv':sys.argv,
    # }), prefix='  ')))

    with warnings.catch_warnings():
        # I don't really take much advantage of luigi's DFS scheduler and instead run things manually.
        # Oh well.
        warnings.filterwarnings('ignore', category=UserWarning, message=r'.*without outputs has no custom complete', module=r'luigi')
        warnings.filterwarnings('ignore', category=UserWarning, message=r'Parameter.*with value "None" is not of type string', module=r'luigi')
        tasks.main(argv=luigi_argv[1:], should_exit=False)