Esempio n. 1
0
    def run_graph_manager(self, graph_manager: 'GraphManager',
                          args: argparse.Namespace):
        if args.distributed_coach and not graph_manager.agent_params.algorithm.distributed_coach_synchronization_type:
            screen.error(
                "{} algorithm is not supported using distributed Coach.".
                format(graph_manager.agent_params.algorithm))

        if args.distributed_coach and args.checkpoint_save_secs and graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC:
            screen.warning(
                "The --checkpoint_save_secs or -s argument will be ignored as SYNC distributed coach sync type is used. Checkpoint will be saved every training iteration."
            )

        if args.distributed_coach and not args.checkpoint_save_secs and graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.ASYNC:
            screen.error(
                "Distributed coach with ASYNC distributed coach sync type requires --checkpoint_save_secs or -s."
            )

        # Intel optimized TF seems to run significantly faster when limiting to a single OMP thread.
        # This will not affect GPU runs.
        os.environ["OMP_NUM_THREADS"] = "1"

        # turn TF debug prints off
        if args.framework == Frameworks.tensorflow:
            os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_verbosity)

        # turn off the summary at the end of the run if necessary
        if not args.no_summary and not args.distributed_coach:
            atexit.register(logger.summarize_experiment)
            screen.change_terminal_title(args.experiment_name)

        task_parameters = TaskParameters(
            framework_type=args.framework,
            evaluate_only=args.evaluate,
            experiment_path=args.experiment_path,
            seed=args.seed,
            use_cpu=args.use_cpu,
            checkpoint_save_secs=args.checkpoint_save_secs,
            checkpoint_restore_dir=args.checkpoint_restore_dir,
            checkpoint_save_dir=args.checkpoint_save_dir,
            export_onnx_graph=args.export_onnx_graph,
            apply_stop_condition=args.apply_stop_condition)

        # open dashboard
        if args.open_dashboard:
            open_dashboard(args.experiment_path)

        if args.distributed_coach and args.distributed_coach_run_type != RunType.ORCHESTRATOR:
            handle_distributed_coach_tasks(graph_manager, args,
                                           task_parameters)
            return

        if args.distributed_coach and args.distributed_coach_run_type == RunType.ORCHESTRATOR:
            handle_distributed_coach_orchestrator(args)
            return

        # Single-threaded runs
        if args.num_workers == 1:
            self.start_single_threaded(task_parameters, graph_manager, args)
        else:
            self.start_multi_threaded(graph_manager, args)
Esempio n. 2
0
    def expand_preset(self, preset):
        """
        Replace a short preset name with the full python path, and verify that it can be imported.
        """
        if preset.lower() in [p.lower() for p in list_all_presets()]:
            preset = "{}.py:graph_manager".format(
                os.path.join(get_base_dir(), 'presets', preset))
        else:
            preset = "{}".format(preset)
            # if a graph manager variable was not specified, try the default of :graph_manager
            if len(preset.split(":")) == 1:
                preset += ":graph_manager"

        # verify that the preset exists
        preset_path = preset.split(":")[0]
        if not os.path.exists(preset_path):
            screen.error(
                "The given preset ({}) cannot be found.".format(preset))

        # verify that the preset can be instantiated
        try:
            short_dynamic_import(preset, ignore_module_case=True)
        except TypeError as e:
            traceback.print_exc()
            screen.error('Internal Error: ' + str(e) +
                         "\n\nThe given preset ({}) cannot be instantiated.".
                         format(preset))

        return preset
Esempio n. 3
0
    def run_graph_manager(self, graph_manager: 'GraphManager', args: argparse.Namespace):
        if args.distributed_coach and not graph_manager.agent_params.algorithm.distributed_coach_synchronization_type:
            screen.error("{} algorithm is not supported using distributed Coach.".format(graph_manager.agent_params.algorithm))

        # Intel optimized TF seems to run significantly faster when limiting to a single OMP thread.
        # This will not affect GPU runs.
        os.environ["OMP_NUM_THREADS"] = "1"

        # turn TF debug prints off
        if args.framework == Frameworks.tensorflow:
            os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_verbosity)

        # turn off the summary at the end of the run if necessary
        if not args.no_summary and not args.distributed_coach:
            atexit.register(logger.summarize_experiment)
            screen.change_terminal_title(args.experiment_name)

        # open dashboard
        if args.open_dashboard:
            open_dashboard(args.experiment_path)

        if args.distributed_coach and args.distributed_coach_run_type != RunType.ORCHESTRATOR:
            handle_distributed_coach_tasks(graph_manager, args)
            return

        if args.distributed_coach and args.distributed_coach_run_type == RunType.ORCHESTRATOR:
            handle_distributed_coach_orchestrator(graph_manager, args)
            return

        # Single-threaded runs
        if args.num_workers == 1:
            self.start_single_threaded(graph_manager, args)
        else:
            self.start_multi_threaded(graph_manager, args)
Esempio n. 4
0
def test_preset_n_and_ew(preset_args,
                         clres,
                         start_time=time.time(),
                         time_limit=Def.TimeOuts.test_time_limit):
    """
    Test command arguments - check evaluation worker with number of workers
    """

    ew_flag = ['-ew']
    n_flag = ['-n', Def.Flags.enw]
    p_valid_params = p_utils.validation_params(preset_args)

    run_cmd = [
        'python3',
        'rl_coach/coach.py',
        '-p',
        '{}'.format(preset_args),
        '-e',
        '{}'.format("ExpName_" + preset_args),
    ]

    # add flags to run command
    test_ew_flag = a_utils.add_one_flag_value(flag=ew_flag)
    test_n_flag = a_utils.add_one_flag_value(flag=n_flag)
    run_cmd.extend(test_ew_flag)
    run_cmd.extend(test_n_flag)

    print(str(run_cmd))

    try:
        proc = subprocess.Popen(run_cmd,
                                stdout=clres.stdout,
                                stderr=clres.stdout)

        try:
            a_utils.validate_arg_result(flag=test_ew_flag,
                                        p_valid_params=p_valid_params,
                                        clres=clres,
                                        process=proc,
                                        start_time=start_time,
                                        timeout=time_limit)

            a_utils.validate_arg_result(flag=test_n_flag,
                                        p_valid_params=p_valid_params,
                                        clres=clres,
                                        process=proc,
                                        start_time=start_time,
                                        timeout=time_limit)
        except AssertionError:
            # close process once get assert false
            proc.kill()
            # if test failed - print logs
            screen.error(open(clres.stdout.name).read(), crash=False)
            assert False

    except OSError as e:
        # if test launch failed due to OSError - skip test
        pytest.skip(e)

    proc.kill()
Esempio n. 5
0
def test_preset_seed(preset_args_for_seed,
                     clres,
                     start_time=time.time(),
                     time_limit=Def.TimeOuts.test_time_limit):
    """
    Test command arguments - the test will check seed argument with all
    presets
    """
    def close_processes():
        """
        close all processes that still active in the process list
        """
        for i in range(seed_num):
            proc[i].kill()

    proc = []
    seed_num = 2
    flag = ["--seed", str(seed_num)]
    p_valid_params = p_utils.validation_params(preset_args_for_seed)

    run_cmd = [
        'python3',
        'rl_coach/coach.py',
        '-p',
        '{}'.format(preset_args_for_seed),
        '-e',
        '{}'.format("ExpName_" + preset_args_for_seed),
    ]

    if p_valid_params.trace_test_levels:
        lvl = ['-lvl', '{}'.format(p_valid_params.trace_test_levels[0])]
        run_cmd.extend(lvl)

    # add flags to run command
    test_flag = a_utils.add_one_flag_value(flag=flag)
    run_cmd.extend(test_flag)
    print(str(run_cmd))

    for _ in range(seed_num):
        proc.append(
            subprocess.Popen(run_cmd, stdout=clres.stdout,
                             stderr=clres.stdout))

    try:
        a_utils.validate_arg_result(flag=test_flag,
                                    p_valid_params=p_valid_params,
                                    clres=clres,
                                    process=proc,
                                    start_time=start_time,
                                    timeout=time_limit)
    except AssertionError:
        close_processes()
        # if test failed - print logs
        screen.error(open(clres.stdout.name).read(), crash=False)
        assert False

    close_processes()
Esempio n. 6
0
def test_preset_args(preset_args,
                     flag,
                     clres,
                     start_time=time.time(),
                     time_limit=Def.TimeOuts.test_time_limit):
    """ Test command arguments - the test will check all flags one-by-one."""

    p_valid_params = p_utils.validation_params(preset_args)

    run_cmd = [
        'python3',
        'rl_coach/coach.py',
        '-p',
        '{}'.format(preset_args),
        '-e',
        '{}'.format("ExpName_" + preset_args),
    ]

    if p_valid_params.reward_test_level:
        lvl = ['-lvl', '{}'.format(p_valid_params.reward_test_level)]
        run_cmd.extend(lvl)

    # add flags to run command
    test_flag = a_utils.add_one_flag_value(flag=flag)

    if flag[0] == "-cp":
        seed = ['--seed', '42']
        seed_flag = a_utils.add_one_flag_value(flag=seed)
        run_cmd.extend(seed_flag)

    run_cmd.extend(test_flag)
    print(str(run_cmd))

    try:
        proc = subprocess.Popen(run_cmd,
                                stdout=clres.stdout,
                                stderr=clres.stdout)

        try:
            a_utils.validate_arg_result(flag=test_flag,
                                        p_valid_params=p_valid_params,
                                        clres=clres,
                                        process=proc,
                                        start_time=start_time,
                                        timeout=time_limit)
        except AssertionError:
            # close process once get assert false
            proc.kill()
            # if test failed - print logs
            screen.error(open(clres.stdout.name).read(), crash=False)
            assert False

    except OSError as e:
        # if test launch failed due to OSError - skip test
        pytest.skip(e)

    proc.kill()
Esempio n. 7
0
def training_worker(graph_manager, task_parameters, is_multi_node_test):
    """
    restore a checkpoint then perform rollouts using the restored model
    :param graph_manager: An instance of the graph manager
    :param task_parameters: An instance of task parameters
    :param is_multi_node_test: If this is a multi node test insted of a normal run.
    """
    # initialize graph
    graph_manager.create_graph(task_parameters)

    # save randomly initialized graph
    graph_manager.save_checkpoint()

    # training loop
    steps = 0

    # evaluation offset
    eval_offset = 1

    graph_manager.setup_memory_backend()

    while steps < graph_manager.improve_steps.num_steps:

        graph_manager.phase = core_types.RunPhase.TRAIN
        if is_multi_node_test and graph_manager.get_current_episodes_count(
        ) > graph_manager.preset_validation_params.max_episodes_to_achieve_reward:
            # Test failed as it has not reached the required success rate
            graph_manager.flush_finished()
            screen.error(
                "Could not reach required success by {} episodes.".format(
                    graph_manager.preset_validation_params.
                    max_episodes_to_achieve_reward),
                crash=True)

        graph_manager.fetch_from_worker(
            graph_manager.agent_params.algorithm.num_consecutive_playing_steps)
        graph_manager.phase = core_types.RunPhase.UNDEFINED

        if graph_manager.should_train():
            steps += 1

            graph_manager.phase = core_types.RunPhase.TRAIN
            graph_manager.train()
            graph_manager.phase = core_types.RunPhase.UNDEFINED

            if steps * graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps > graph_manager.steps_between_evaluation_periods.num_steps * eval_offset:
                eval_offset += 1
                if graph_manager.evaluate(graph_manager.evaluation_steps):
                    break

            if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC:
                graph_manager.save_checkpoint()
            else:
                graph_manager.occasionally_save_checkpoint()
Esempio n. 8
0
def test_preset_runs(preset):
    test_failed = False

    print("Testing preset {}".format(preset))

    # TODO: this is a temporary workaround for presets which define more than a single available level.
    # we should probably do this in a more robust way
    level = ""
    if "Atari" in preset:
        level = "breakout"
    elif "Mujoco" in preset:
        level = "inverted_pendulum"
    elif "ControlSuite" in preset:
        level = "pendulum:swingup"
    elif 'Lab' in preset:
        level = 'nav_maze_static_01'
    experiment_name = ".test-" + preset

    params = [
        sys.executable, "rl_coach/coach.py", "-p", preset, "-ns", "-e",
        experiment_name
    ]
    if level != "":
        params += ["-lvl", level]

    p = Popen(params)

    # wait 10 seconds overhead of initialization etc.
    time.sleep(10)
    return_value = p.poll()

    if return_value is None:
        screen.success("{} passed successfully".format(preset))
    else:
        test_failed = True
        screen.error("{} failed".format(preset), crash=False)

    p.kill()
    if os.path.exists("experiments/" + experiment_name):
        shutil.rmtree("experiments/" + experiment_name)

    assert not test_failed
Esempio n. 9
0
def test_preset_runs(preset):
    test_failed = False

    print("Testing preset {}".format(preset))

    # TODO: this is a temporary workaround for presets which define more than a single available level.
    # we should probably do this in a more robust way
    level = ""
    if "Atari" in preset:
        level = "breakout"
    elif "Mujoco" in preset:
        level = "inverted_pendulum"
    elif "ControlSuite" in preset:
        level = "pendulum:swingup"

    experiment_name = ".test-" + preset

    # overriding heatup steps to some small number of steps (1000), so to finish the heatup stage, and get to train
    params = [
        "python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e",
        experiment_name, '-cp', 'heatup_steps=EnvironmentSteps(1000)'
    ]
    if level != "":
        params += ["-lvl", level]

    p = Popen(params)

    # wait 30 seconds overhead of initialization, and finishing heatup.
    time.sleep(30)
    return_value = p.poll()

    if return_value is None:
        screen.success("{} passed successfully".format(preset))
    else:
        test_failed = True
        screen.error("{} failed".format(preset), crash=False)

    p.kill()
    if os.path.exists("experiments/" + experiment_name):
        shutil.rmtree("experiments/" + experiment_name)

    assert not test_failed
Esempio n. 10
0
def test_all_presets_are_running():
    # os.chdir("../../")
    test_failed = False
    all_presets = sorted([f.split('.')[0] for f in os.listdir('rl_coach/presets') if f.endswith('.py') and f != '__init__.py'])
    for preset in all_presets:
        print("Testing preset {}".format(preset))

        # TODO: this is a temporary workaround for presets which define more than a single available level.
        # we should probably do this in a more robust way
        level = ""
        if "Atari" in preset:
            level = "breakout"
        elif "Mujoco" in preset:
            level = "inverted_pendulum"
        elif "ControlSuite" in preset:
            level = "pendulum:swingup"
        params = ["python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e", ".test"]
        if level != "":
            params += ["-lvl", level]

        p = Popen(params, stdout=DEVNULL)

        # wait 10 seconds overhead of initialization etc.
        time.sleep(10)
        return_value = p.poll()

        if return_value is None:
            screen.success("{} passed successfully".format(preset))
        else:
            test_failed = True
            screen.error("{} failed".format(preset), crash=False)

        p.kill()
        if os.path.exists("experiments/.test"):
            shutil.rmtree("experiments/.test")

    assert not test_failed
Esempio n. 11
0
def perform_reward_based_tests(args, preset_validation_params, preset_name):
    win_size = 10

    test_name = '__test_reward'
    test_path = os.path.join('./experiments', test_name)
    if path.exists(test_path):
        shutil.rmtree(test_path)

    # run the experiment in a separate thread
    screen.log_title("Running test {}".format(preset_name))
    log_file_name = 'test_log_{preset_name}.txt'.format(
        preset_name=preset_name)
    cmd = ('python3 rl_coach/coach.py '
           '-p {preset_name} '
           '-e {test_name} '
           '-n {num_workers} '
           '--seed 0 '
           '-c '
           '{level} '
           '&> {log_file_name} ').format(
               preset_name=preset_name,
               test_name=test_name,
               num_workers=preset_validation_params.num_workers,
               log_file_name=log_file_name,
               level='-lvl ' + preset_validation_params.reward_test_level
               if preset_validation_params.reward_test_level else '')

    p = subprocess.Popen(cmd,
                         shell=True,
                         executable="/bin/bash",
                         preexec_fn=os.setsid)

    start_time = time.time()

    reward_str = 'Evaluation Reward'
    if preset_validation_params.num_workers > 1:
        filename_pattern = 'worker_0*.csv'
    else:
        filename_pattern = '*.csv'

    test_passed = False

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    if csv_paths:
        csv_path = csv_paths[0]

        # verify results
        csv = None
        time.sleep(1)
        averaged_rewards = [0]

        last_num_episodes = 0

        if not args.no_progress_bar:
            print_progress(averaged_rewards, last_num_episodes,
                           preset_validation_params, start_time, args)

        while csv is None or (
                csv['Episode #'].values[-1] <
                preset_validation_params.max_episodes_to_achieve_reward
                and time.time() - start_time < args.time_limit):
            try:
                csv = pd.read_csv(csv_path)
            except:
                # sometimes the csv is being written at the same time we are
                # trying to read it. no problem -> try again
                continue

            if reward_str not in csv.keys():
                continue

            rewards = csv[reward_str].values
            rewards = rewards[~np.isnan(rewards)]

            if len(rewards) >= 1:
                averaged_rewards = np.convolve(
                    rewards,
                    np.ones(min(len(rewards), win_size)) / win_size,
                    mode='valid')
            else:
                time.sleep(1)
                continue

            if not args.no_progress_bar:
                print_progress(averaged_rewards, last_num_episodes,
                               preset_validation_params, start_time, args)

            if csv['Episode #'].shape[0] - last_num_episodes <= 0:
                continue

            last_num_episodes = csv['Episode #'].values[-1]

            # check if reward is enough
            if np.any(averaged_rewards >=
                      preset_validation_params.min_reward_threshold):
                test_passed = True
                break
            time.sleep(1)

    # kill test and print result
    os.killpg(os.getpgid(p.pid), signal.SIGTERM)
    screen.log('')
    if test_passed:
        screen.success("Passed successfully")
    else:
        if time.time() - start_time > args.time_limit:
            screen.error("Failed due to exceeding time limit", crash=False)
            if args.verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
        elif csv_paths:
            screen.error("Failed due to insufficient reward", crash=False)
            if args.verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
            screen.error(
                "preset_validation_params.max_episodes_to_achieve_reward: {}".
                format(
                    preset_validation_params.max_episodes_to_achieve_reward),
                crash=False)
            screen.error(
                "preset_validation_params.min_reward_threshold: {}".format(
                    preset_validation_params.min_reward_threshold),
                crash=False)
            screen.error("averaged_rewards: {}".format(averaged_rewards),
                         crash=False)
            screen.error("episode number: {}".format(
                csv['Episode #'].values[-1]),
                         crash=False)
        else:
            screen.error("csv file never found", crash=False)
            if args.verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    return test_passed
Esempio n. 12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-t',
                        '--trace',
                        help="(flag) perform trace based testing",
                        action='store_true')
    parser.add_argument(
        '-p',
        '--preset',
        help="(string) Name of a preset to run (as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-ip',
        '--ignore_presets',
        help=
        "(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-v',
        '--verbose',
        help="(flag) display verbose logs in the event of an error",
        action='store_true')
    parser.add_argument(
        '--stop_after_first_failure',
        help="(flag) stop executing tests after the first error",
        action='store_true')
    parser.add_argument(
        '-tl',
        '--time_limit',
        help="time limit for each test in minutes",
        default=
        40,  # setting time limit to be so high due to DDPG being very slow - its tests are long
        type=int)
    parser.add_argument(
        '-np',
        '--no_progress_bar',
        help=
        "(flag) Don't print the progress bar (makes jenkins logs more readable)",
        action='store_true')
    parser.add_argument(
        '-ow',
        '--overwrite',
        help="(flag) overwrite old trace with new ones in trace testing mode",
        action='store_true')

    args = parser.parse_args()
    if args.preset is not None:
        presets_lists = [args.preset]
    else:
        # presets_lists = list_all_classes_in_module(presets)
        presets_lists = [
            f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets'))
            if f[-3:] == '.py' and not f == '__init__.py'
        ]

    fail_count = 0
    test_count = 0

    args.time_limit = 60 * args.time_limit

    if args.ignore_presets is not None:
        presets_to_ignore = args.ignore_presets.split(',')
    else:
        presets_to_ignore = []
    for idx, preset_name in enumerate(sorted(presets_lists)):
        if args.stop_after_first_failure and fail_count > 0:
            break
        if preset_name not in presets_to_ignore:
            try:
                preset = import_module(
                    'rl_coach.presets.{}'.format(preset_name))
            except:
                screen.error("Failed to load preset <{}>".format(preset_name),
                             crash=False)
                fail_count += 1
                test_count += 1
                continue

            preset_validation_params = preset.graph_manager.preset_validation_params
            if not args.trace and not preset_validation_params.test:
                continue

            if args.trace:
                num_env_steps = preset_validation_params.trace_max_env_steps
                if preset_validation_params.trace_test_levels:
                    for level in preset_validation_params.trace_test_levels:
                        test_count += 1
                        test_passed = perform_trace_based_tests(
                            args, preset_name, num_env_steps, level)
                        if not test_passed:
                            fail_count += 1
                else:
                    test_count += 1
                    test_passed = perform_trace_based_tests(
                        args, preset_name, num_env_steps)
                    if not test_passed:
                        fail_count += 1
            else:
                test_passed = perform_reward_based_tests(
                    args, preset_validation_params, preset_name)
                if not test_passed:
                    fail_count += 1

    screen.separator()
    if fail_count == 0:
        screen.success(" Summary: " + str(test_count) + "/" + str(test_count) +
                       " tests passed successfully")
    else:
        screen.error(" Summary: " + str(test_count - fail_count) + "/" +
                     str(test_count) + " tests passed successfully")
Esempio n. 13
0
def perform_trace_based_tests(args, preset_name, num_env_steps, level=None):
    test_name = '__test_trace'
    test_path = os.path.join('./experiments', test_name)
    if path.exists(test_path):
        shutil.rmtree(test_path)

    # run the experiment in a separate thread
    screen.log_title("Running test {}{}".format(preset_name, ' - ' +
                                                level if level else ''))
    log_file_name = 'test_log_{preset_name}.txt'.format(
        preset_name=preset_name)

    cmd = ('python3 rl_coach/coach.py '
           '-p {preset_name} '
           '-e {test_name} '
           '--seed 42 '
           '-c '
           '--no_summary '
           '-cp {custom_param} '
           '{level} '
           '&> {log_file_name} ').format(
               preset_name=preset_name,
               test_name=test_name,
               log_file_name=log_file_name,
               level='-lvl ' + level if level else '',
               custom_param='\"improve_steps=EnvironmentSteps({n});'
               'steps_between_evaluation_periods=EnvironmentSteps({n});'
               'evaluation_steps=EnvironmentSteps(1);'
               'heatup_steps=EnvironmentSteps(1024)\"'.format(n=num_env_steps))

    p = subprocess.Popen(cmd,
                         shell=True,
                         executable="/bin/bash",
                         preexec_fn=os.setsid)
    p.wait()

    filename_pattern = '*.csv'

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    test_passed = False
    if not csv_paths:
        screen.error("csv file never found", crash=False)
        if args.verbose:
            screen.error("command exitcode: {}".format(p.returncode),
                         crash=False)
            screen.error(open(log_file_name).read(), crash=False)
    else:
        trace_path = os.path.join(
            './rl_coach', 'traces', preset_name + '_' +
            level.replace(':', '_') if level else preset_name, '')
        if not os.path.exists(trace_path):
            screen.log(
                'No trace found, creating new trace in: {}'.format(trace_path))
            os.makedirs(os.path.dirname(trace_path))
            df = pd.read_csv(csv_paths[0])
            df = clean_df(df)
            df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False)
            screen.success("Successfully created new trace.")
            test_passed = True
        else:
            test_df = pd.read_csv(csv_paths[0])
            test_df = clean_df(test_df)
            new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv')
            test_df.to_csv(new_trace_csv_path, index=False)
            test_df = pd.read_csv(new_trace_csv_path)
            trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv'))
            trace_csv_path = trace_csv_path[0]
            trace_df = pd.read_csv(trace_csv_path)
            test_passed = test_df.equals(trace_df)
            if test_passed:
                screen.success("Passed successfully.")
                os.remove(new_trace_csv_path)
                test_passed = True
            else:
                screen.error("Trace test failed.", crash=False)
                if args.overwrite:
                    os.remove(trace_csv_path)
                    os.rename(new_trace_csv_path, trace_csv_path)
                    screen.error("Overwriting old trace.", crash=False)
                else:
                    screen.error("bcompare {} {}".format(
                        trace_csv_path, new_trace_csv_path),
                                 crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    return test_passed
Esempio n. 14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p',
        '--preset',
        '--presets',
        help=
        "(string) Name of preset(s) to run (comma separated, and as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-ip',
        '--ignore_presets',
        help=
        "(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-v',
        '--verbose',
        help="(flag) display verbose logs in the event of an error",
        action='store_true')
    parser.add_argument(
        '--stop_after_first_failure',
        help="(flag) stop executing tests after the first error",
        action='store_true')
    parser.add_argument(
        '-tl',
        '--time_limit',
        help="time limit for each test in minutes",
        default=
        60,  # setting time limit to be so high due to DDPG being very slow - its tests are long
        type=int)
    parser.add_argument(
        '-np',
        '--no_progress_bar',
        help=
        "(flag) Don't print the progress bar (makes jenkins logs more readable)",
        action='store_true')

    args = parser.parse_args()
    if args.preset is not None:
        presets_lists = args.preset.split(',')
    else:
        presets_lists = all_presets()

    fail_count = 0
    test_count = 0

    args.time_limit = 60 * args.time_limit

    if args.ignore_presets is not None:
        presets_to_ignore = args.ignore_presets.split(',')
    else:
        presets_to_ignore = []
    for idx, preset_name in enumerate(sorted(presets_lists)):
        if args.stop_after_first_failure and fail_count > 0:
            break
        if preset_name not in presets_to_ignore:
            print("Attempting to run Preset: %s" % preset_name)
            if not importable(preset_name):
                screen.error("Failed to load preset <{}>".format(preset_name),
                             crash=False)
                fail_count += 1
                test_count += 1
                continue

            if not has_test_parameters(preset_name):
                continue

            test_count += 1
            try:
                test_preset_reward(preset_name, args.no_progress_bar,
                                   args.time_limit, args.verbose)
            except Exception as e:
                fail_count += 1

    screen.separator()
    if fail_count == 0:
        screen.success(" Summary: " + str(test_count) + "/" + str(test_count) +
                       " tests passed successfully")
    else:
        screen.error(" Summary: " + str(test_count - fail_count) + "/" +
                     str(test_count) + " tests passed successfully")
Esempio n. 15
0
def parse_arguments(parser: argparse.ArgumentParser) -> argparse.Namespace:
    """
    Parse the arguments that the user entered
    :param parser: the argparse command line parser
    :return: the parsed arguments
    """
    args = parser.parse_args()

    # if no arg is given
    if len(sys.argv) == 1:
        parser.print_help()
        exit(0)

    # list available presets
    preset_names = list_all_presets()
    if args.list:
        screen.log_title("Available Presets:")
        for preset in sorted(preset_names):
            print(preset)
        sys.exit(0)

    # replace a short preset name with the full path
    if args.preset is not None:
        if args.preset.lower() in [p.lower() for p in preset_names]:
            args.preset = "{}.py:graph_manager".format(
                os.path.join(get_base_dir(), 'presets', args.preset))
        else:
            args.preset = "{}".format(args.preset)
            # if a graph manager variable was not specified, try the default of :graph_manager
            if len(args.preset.split(":")) == 1:
                args.preset += ":graph_manager"

        # verify that the preset exists
        preset_path = args.preset.split(":")[0]
        if not os.path.exists(preset_path):
            screen.error("The given preset ({}) cannot be found.".format(
                args.preset))

        # verify that the preset can be instantiated
        try:
            short_dynamic_import(args.preset, ignore_module_case=True)
        except TypeError as e:
            traceback.print_exc()
            screen.error('Internal Error: ' + str(e) +
                         "\n\nThe given preset ({}) cannot be instantiated.".
                         format(args.preset))

    # validate the checkpoints args
    if args.checkpoint_restore_dir is not None and not os.path.exists(
            args.checkpoint_restore_dir):
        screen.error(
            "The requested checkpoint folder to load from does not exist.")

    # no preset was given. check if the user requested to play some environment on its own
    if args.preset is None and args.play:
        if args.environment_type:
            args.agent_type = 'Human'
        else:
            screen.error(
                'When no preset is given for Coach to run, and the user requests human control over '
                'the environment, the user is expected to input the desired environment_type and level.'
                '\nAt least one of these parameters was not given.')
    elif args.preset and args.play:
        screen.error(
            "Both the --preset and the --play flags were set. These flags can not be used together. "
            "For human control, please use the --play flag together with the environment type flag (-et)"
        )
    elif args.preset is None and not args.play:
        screen.error(
            "Please choose a preset using the -p flag or use the --play flag together with choosing an "
            "environment type (-et) in order to play the game.")

    # get experiment name and path
    args.experiment_name = logger.get_experiment_name(args.experiment_name)
    args.experiment_path = logger.get_experiment_path(args.experiment_name)

    if args.play and args.num_workers > 1:
        screen.warning(
            "Playing the game as a human is only available with a single worker. "
            "The number of workers will be reduced to 1")
        args.num_workers = 1

    args.framework = Frameworks[args.framework.lower()]

    # checkpoints
    args.save_checkpoint_dir = os.path.join(
        args.experiment_path,
        'checkpoint') if args.save_checkpoint_secs is not None else None

    return args
Esempio n. 16
0
def training_worker(graph_manager, task_parameters, data_store,
                    is_multi_node_test):
    """
    restore a checkpoint then perform rollouts using the restored model

    :param graph_manager: An instance of the graph manager
    :param data_store: An instance of DataStore which can be used to communicate policies to roll out workers
    :param task_parameters: An instance of task parameters
    :param is_multi_node_test: If this is a multi node test insted of a normal run.
    """
    # Load checkpoint if provided
    if task_parameters.checkpoint_restore_path:
        data_store_ckpt_load(data_store)

        # initialize graph
        graph_manager.create_graph(task_parameters)

    else:
        # initialize graph
        graph_manager.create_graph(task_parameters)

        # save randomly initialized graph
        data_store.save_policy(graph_manager)

    # training loop
    steps = 0

    # evaluation offset
    eval_offset = 1

    graph_manager.setup_memory_backend()
    graph_manager.signal_ready()

    while steps < graph_manager.improve_steps.num_steps:

        if is_multi_node_test and graph_manager.get_current_episodes_count(
        ) > graph_manager.preset_validation_params.max_episodes_to_achieve_reward:
            # Test failed as it has not reached the required success rate
            graph_manager.flush_finished()
            screen.error(
                "Could not reach required success by {} episodes.".format(
                    graph_manager.preset_validation_params.
                    max_episodes_to_achieve_reward),
                crash=True)

        graph_manager.fetch_from_worker(
            graph_manager.agent_params.algorithm.num_consecutive_playing_steps)

        if graph_manager.should_train():
            steps += 1

            graph_manager.train()

            if steps * graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps > graph_manager.steps_between_evaluation_periods.num_steps * eval_offset:
                eval_offset += 1
                if graph_manager.evaluate(graph_manager.evaluation_steps):
                    break

            if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC:
                data_store.save_policy(graph_manager)
            else:
                # NOTE: this implementation conflated occasionally saving checkpoints for later use
                # in production with checkpoints saved for communication to rollout workers.
                # TODO: this should be implemented with a new parameter: distributed_coach_synchronization_frequency or similar
                # graph_manager.occasionally_save_checkpoint()
                raise NotImplementedError()
Esempio n. 17
0
import random
import sys
from os import path, environ

from rl_coach.logger import screen
from rl_coach.filters.action.partial_discrete_action_space_map import PartialDiscreteActionSpaceMap
from rl_coach.filters.observation.observation_rgb_to_y_filter import ObservationRGBToYFilter
from rl_coach.filters.observation.observation_to_uint8_filter import ObservationToUInt8Filter

try:
    if 'CARLA_ROOT' in environ:
        sys.path.append(path.join(environ.get('CARLA_ROOT'), 'PythonClient'))
    else:
        screen.error(
            "CARLA_ROOT was not defined. Please set it to point to the CARLA root directory and try again."
        )
    from carla.client import CarlaClient
    from carla.settings import CarlaSettings
    from carla.tcp import TCPConnectionError
    from carla.sensor import Camera
    from carla.client import VehicleControl
    from carla.planner.planner import Planner
    from carla.driving_benchmark.experiment_suites.experiment_suite import ExperimentSuite
except ImportError:
    from rl_coach.logger import failed_imports
    failed_imports.append("CARLA")

import logging
import subprocess
from rl_coach.environments.environment import Environment, EnvironmentParameters, LevelSelection
Esempio n. 18
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--preset', '--presets',
                        help="(string) Name of preset(s) to run (comma separated, as configured in presets.py)",
                        default=None,
                        type=str)
    parser.add_argument('-ip', '--ignore_presets',
                        help="(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)",
                        default=None,
                        type=str)
    parser.add_argument('-v', '--verbose',
                        help="(flag) display verbose logs in the event of an error",
                        action='store_true')
    parser.add_argument('--stop_after_first_failure',
                        help="(flag) stop executing tests after the first error",
                        action='store_true')
    parser.add_argument('-ow', '--overwrite',
                        help="(flag) overwrite old trace with new ones in trace testing mode",
                        action='store_true')
    parser.add_argument('-prl', '--parallel',
                        help="(flag) run tests in parallel",
                        action='store_true')
    parser.add_argument('-ut', '--update_traces',
                        help="(flag) update traces on repository",
                        action='store_true')
    parser.add_argument('-mt', '--max_threads',
                        help="(int) maximum number of threads to run in parallel",
                        default=multiprocessing.cpu_count()-2,
                        type=int)
    parser.add_argument(
        '-i', '--image', help="(string) Name of the testing image", type=str, default=None
    )
    parser.add_argument(
        '-mb', '--memory_backend', help="(string) Name of the memory backend", type=str, default="redispubsub"
    )
    parser.add_argument(
        '-e', '--endpoint', help="(string) Name of the s3 endpoint", type=str, default='s3.amazonaws.com'
    )
    parser.add_argument(
        '-cr', '--creds_file', help="(string) Path of the s3 creds file", type=str, default='.aws_creds'
    )
    parser.add_argument(
        '-b', '--bucket', help="(string) Name of the bucket for s3", type=str, default=None
    )

    args = parser.parse_args()

    if args.update_traces:
        if not args.bucket:
            print("bucket_name required for s3")
            exit(1)
        if not os.environ.get('AWS_ACCESS_KEY_ID') or not os.environ.get('AWS_SECRET_ACCESS_KEY'):
            print("AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY env vars need to be set")
            exit(1)

        config_file = './tmp.cred'
        generate_config(args.image, args.memory_backend, args.endpoint, args.bucket, args.creds_file, config_file)

    if not args.parallel:
        args.max_threads = 1

    if args.preset is not None:
        presets_lists = args.preset.split(',')
    else:
        presets_lists = [f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if
                         f[-3:] == '.py' and not f == '__init__.py']

    fail_count = 0
    test_count = 0

    if args.ignore_presets is not None:
        presets_to_ignore = args.ignore_presets.split(',')
    else:
        presets_to_ignore = []

    for idx, preset_name in enumerate(sorted(presets_lists)):
        if args.stop_after_first_failure and fail_count > 0:
            break
        if preset_name not in presets_to_ignore:
            try:
                preset = import_module('rl_coach.presets.{}'.format(preset_name))
            except:
                screen.error("Failed to load preset <{}>".format(preset_name), crash=False)
                fail_count += 1
                test_count += 1
                continue

            preset_validation_params = preset.graph_manager.preset_validation_params
            num_env_steps = preset_validation_params.trace_max_env_steps
            if preset_validation_params.test_using_a_trace_test:
                if preset_validation_params.trace_test_levels:
                    for level in preset_validation_params.trace_test_levels:
                        test_count += 1
                        test_path, log_file, p = run_trace_based_test(preset_name, num_env_steps, level)
                        processes.append((test_path, log_file, p))
                        test_passed = wait_and_check(args, processes)
                        if test_passed is not None and not test_passed:
                            fail_count += 1
                else:
                    test_count += 1
                    test_path, log_file, p = run_trace_based_test(preset_name, num_env_steps)
                    processes.append((test_path, log_file, p))
                    test_passed = wait_and_check(args, processes)
                    if test_passed is not None and not test_passed:
                        fail_count += 1

    while len(processes) > 0:
        test_passed = wait_and_check(args, processes, force=True)
        if test_passed is not None and not test_passed:
            fail_count += 1

    screen.separator()
    if fail_count == 0:
        screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully")
    else:
        screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully", crash=False)
Esempio n. 19
0
def wait_and_check(args, processes, force=False):
    if not force and len(processes) < args.max_threads:
        return None

    test_path = processes[0][0]
    test_name = test_path.split('/')[-1]
    log_file_name = processes[0][1]
    p = processes[0][2]
    p.wait()

    filename_pattern = '*.csv'

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    test_passed = False
    screen.log('Results for {}: '.format(test_name[13:]))
    if not csv_paths:
        screen.error("csv file never found", crash=False)
        if args.verbose:
            screen.error("command exitcode: {}".format(p.returncode), crash=False)
            screen.error(open(log_file_name).read(), crash=False)
    else:
        trace_path = os.path.join('./rl_coach', 'traces', test_name[13:])
        if not os.path.exists(trace_path):
            screen.log('No trace found, creating new trace in: {}'.format(trace_path))
            os.makedirs(trace_path)
            df = pd.read_csv(csv_paths[0])
            df = clean_df(df)
            try:
                df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False)
            except:
                pass
            screen.success("Successfully created new trace.")
            test_passed = True
        else:
            test_df = pd.read_csv(csv_paths[0])
            test_df = clean_df(test_df)
            new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv')
            test_df.to_csv(new_trace_csv_path, index=False)
            test_df = pd.read_csv(new_trace_csv_path)
            trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv'))
            trace_csv_path = trace_csv_path[0]
            trace_df = pd.read_csv(trace_csv_path)
            test_passed = test_df.equals(trace_df)
            if test_passed:
                screen.success("Passed successfully.")
                os.remove(new_trace_csv_path)
                test_passed = True
            else:
                screen.error("Trace test failed.", crash=False)
                if args.overwrite:
                    os.remove(trace_csv_path)
                    os.rename(new_trace_csv_path, trace_csv_path)
                    screen.error("Overwriting old trace.", crash=False)
                else:
                    screen.error("bcompare {} {}".format(trace_csv_path, new_trace_csv_path), crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    processes.pop(0)
    return test_passed
Esempio n. 20
0
    def __init__(self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters,
                 additional_simulator_parameters: Dict[str, Any] = None, seed: Union[None, int]=None,
                 human_control: bool=False, custom_reward_threshold: Union[int, float]=None,
                 random_initialization_steps: int=1, max_over_num_frames: int=1, **kwargs):
        super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold,
                         visualization_parameters)

        self.random_initialization_steps = random_initialization_steps
        self.max_over_num_frames = max_over_num_frames
        self.additional_simulator_parameters = additional_simulator_parameters

        # hide warnings
        gym.logger.set_level(40)

        """
        load and initialize environment
        environment ids can be defined in 3 ways:
        1. Native gym environments like BreakoutDeterministic-v0 for example
        2. Custom gym environments written and installed as python packages.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space
           For example: my_environment_package:MyEnvironmentClass will run an environment defined in the
           MyEnvironmentClass class
        3. Custom gym environments written as an independent module which is not installed.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space.
           For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an
           environment defined in the MyEnvironmentClass class which is located in the module in the relative path
           path_to_my_environment.sub_directory.my_module
        """
        if ':' in self.env_id:
            # custom environments
            if '/' in self.env_id or '.' in self.env_id:
                # environment in a an absolute path module written as a unix path or in a relative path module
                # written as a python import path
                env_class = short_dynamic_import(self.env_id)
            else:
                # environment in a python package
                env_class = gym.envs.registration.load(self.env_id)

            # instantiate the environment
            if self.additional_simulator_parameters:
                self.env = env_class(**self.additional_simulator_parameters)
            else:
                self.env = env_class()
        else:
            self.env = gym.make(self.env_id)

        # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows
        environment_to_always_use_with_native_rendering = ['classic_control', 'mujoco', 'robotics']
        self.native_rendering = self.native_rendering or \
                                any([env in str(self.env.unwrapped.__class__)
                                     for env in environment_to_always_use_with_native_rendering])
        if self.native_rendering:
            if hasattr(self, 'renderer'):
                self.renderer.close()

        # seed
        if self.seed is not None:
            self.env.seed(self.seed)
            np.random.seed(self.seed)
            random.seed(self.seed)

        # frame skip and max between consecutive frames
        self.is_robotics_env = 'robotics' in str(self.env.unwrapped.__class__)
        self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__)
        self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__)
        self.timelimit_env_wrapper = self.env
        if self.is_atari_env:
            self.env.unwrapped.frameskip = 1  # this accesses the atari env that is wrapped with a timelimit wrapper env
            if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4:
                screen.warning("Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. "
                               "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the "
                               "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4.")
                self.frame_skip = 3
            self.env = MaxOverFramesAndFrameskipEnvWrapper(self.env,
                                                           frameskip=self.frame_skip,
                                                           max_over_num_frames=self.max_over_num_frames)
        else:
            self.env.unwrapped.frameskip = self.frame_skip

        self.state_space = StateSpace({})

        # observations
        if not isinstance(self.env.observation_space, gym.spaces.dict_space.Dict):
            state_space = {'observation': self.env.observation_space}
        else:
            state_space = self.env.observation_space.spaces

        for observation_space_name, observation_space in state_space.items():
            if len(observation_space.shape) == 3 and observation_space.shape[-1] == 3:
                # we assume gym has image observations which are RGB and where their values are within 0-255
                self.state_space[observation_space_name] = ImageObservationSpace(
                    shape=np.array(observation_space.shape),
                    high=255,
                    channels_axis=-1
                )
            else:
                self.state_space[observation_space_name] = VectorObservationSpace(
                    shape=observation_space.shape[0],
                    low=observation_space.low,
                    high=observation_space.high
                )
        if 'desired_goal' in state_space.keys():
            self.goal_space = self.state_space['desired_goal']

        # actions
        if type(self.env.action_space) == gym.spaces.box.Box:
            self.action_space = BoxActionSpace(
                shape=self.env.action_space.shape,
                low=self.env.action_space.low,
                high=self.env.action_space.high
            )
        elif type(self.env.action_space) == gym.spaces.discrete.Discrete:
            actions_description = []
            if hasattr(self.env.unwrapped, 'get_action_meanings'):
                actions_description = self.env.unwrapped.get_action_meanings()
            self.action_space = DiscreteActionSpace(
                num_actions=self.env.action_space.n,
                descriptions=actions_description
            )

        if self.human_control:
            # TODO: add this to the action space
            # map keyboard keys to actions
            self.key_to_action = {}
            if hasattr(self.env.unwrapped, 'get_keys_to_action'):
                self.key_to_action = self.env.unwrapped.get_keys_to_action()
            else:
                screen.error("Error: Environment {} does not support human control.".format(self.env), crash=True)

        # initialize the state by getting a new state from the environment
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            scale = 1
            if self.human_control:
                scale = 2
            if not self.native_rendering:
                self.renderer.create_screen(image.shape[1]*scale, image.shape[0]*scale)

        # measurements
        if self.env.spec is not None:
            self.timestep_limit = self.env.spec.timestep_limit
        else:
            self.timestep_limit = None

        # the info is only updated after the first step
        self.state = self.step(self.action_space.default_action).next_state
        self.state_space['measurements'] = VectorObservationSpace(shape=len(self.info.keys()))

        if self.env.spec and custom_reward_threshold is None:
                self.reward_success_threshold = self.env.spec.reward_threshold
                self.reward_space = RewardSpace(1, reward_success_threshold=self.reward_success_threshold)
Esempio n. 21
0
def test_preset_n_and_ew_and_onnx(preset_args,
                                  clres,
                                  start_time=time.time(),
                                  time_limit=Def.TimeOuts.test_time_limit):
    """
    Test command arguments - check evaluation worker, number of workers and
                             onnx.
    """

    ew_flag = ['-ew']
    n_flag = ['-n', Def.Flags.enw]
    onnx_flag = ['-onnx']
    s_flag = ['-s', Def.Flags.css]
    p_valid_params = p_utils.validation_params(preset_args)

    run_cmd = [
        'python3',
        'rl_coach/coach.py',
        '-p',
        '{}'.format(preset_args),
        '-e',
        '{}'.format("ExpName_" + preset_args),
    ]

    # add flags to run command
    test_ew_flag = a_utils.add_one_flag_value(flag=ew_flag)
    test_n_flag = a_utils.add_one_flag_value(flag=n_flag)
    test_onnx_flag = a_utils.add_one_flag_value(flag=onnx_flag)
    test_s_flag = a_utils.add_one_flag_value(flag=s_flag)

    run_cmd.extend(test_ew_flag)
    run_cmd.extend(test_n_flag)
    run_cmd.extend(test_onnx_flag)
    run_cmd.extend(test_s_flag)

    print(str(run_cmd))

    proc = subprocess.Popen(run_cmd, stdout=clres.stdout, stderr=clres.stdout)

    try:
        # Check csv files has been created
        a_utils.validate_arg_result(flag=test_ew_flag,
                                    p_valid_params=p_valid_params,
                                    clres=clres,
                                    process=proc,
                                    start_time=start_time,
                                    timeout=time_limit)

        # Check csv files created same as the number of the workers
        a_utils.validate_arg_result(flag=test_n_flag,
                                    p_valid_params=p_valid_params,
                                    clres=clres,
                                    process=proc,
                                    start_time=start_time,
                                    timeout=time_limit)

        # Check checkpoint files
        a_utils.validate_arg_result(flag=test_s_flag,
                                    p_valid_params=p_valid_params,
                                    clres=clres,
                                    process=proc,
                                    start_time=start_time,
                                    timeout=time_limit)

        # TODO: add onnx check; issue found #257

    except AssertionError:
        # close process once get assert false
        proc.kill()
        # if test failed - print logs
        screen.error(open(clres.stdout.name).read(), crash=False)
        assert False

    proc.kill()
Esempio n. 22
0
# python rl_coach/utilities/carla_dataset_to_replay_buffer.py
agent_params.memory.load_memory_from_file_path = "./datasets/carla_train_set_replay_buffer.p"
agent_params.memory.state_key_with_the_class_index = 'high_level_command'
agent_params.memory.num_classes = 4

# download dataset if it doesn't exist
if not os.path.exists(agent_params.memory.load_memory_from_file_path):
    screen.log_title(
        "The CARLA dataset is not present in the following path: {}".format(
            agent_params.memory.load_memory_from_file_path))
    result = screen.ask_yes_no("Do you want to download it now?")
    if result:
        create_dataset(None, "./datasets/carla_train_set_replay_buffer.p")
    else:
        screen.error(
            "Please update the path to the CARLA dataset in the CARLA_CIL preset",
            crash=True)

###############
# Environment #
###############
env_params = CarlaEnvironmentParameters()
env_params.cameras = ['CameraRGB']
env_params.camera_height = 600
env_params.camera_width = 800
env_params.separate_actions_for_throttle_and_brake = True
env_params.allow_braking = True
env_params.quality = CarlaEnvironmentParameters.Quality.EPIC
env_params.experiment_suite = CoRL2017('Town01')

graph_manager = BasicRLGraphManager(agent_params=agent_params,
Esempio n. 23
0
def test_restore_checkpoint(preset_args,
                            clres,
                            framework,
                            timeout=Def.TimeOuts.test_time_limit):
    """
    Create checkpoints and restore them in second run.
    :param preset_args: all preset that can be tested for argument tests
    :param clres: logs and csv files
    :param framework: name of the test framework
    :param timeout: max time for test
    """
    def _create_cmd_and_run(flag):
        """
        Create default command with given flag and run it
        :param flag: name of the tested flag, this flag will be extended to the
                     running command line
        :return: active process
        """
        run_cmd = [
            'python3',
            'rl_coach/coach.py',
            '-p',
            '{}'.format(preset_args),
            '-e',
            '{}'.format("ExpName_" + preset_args),
            '--seed',
            '{}'.format(4),
            '-f',
            '{}'.format(framework),
        ]

        test_flag = a_utils.add_one_flag_value(flag=flag)
        run_cmd.extend(test_flag)
        print(str(run_cmd))
        p = subprocess.Popen(run_cmd, stdout=clres.stdout, stderr=clres.stdout)

        return p

    start_time = time.time()

    if framework == "mxnet":
        # update preset name - for mxnet framework we are using *_DQN
        preset_args = Def.Presets.mxnet_args_test[0]
        # update logs paths
        test_name = 'ExpName_{}'.format(preset_args)
        test_path = os.path.join(Def.Path.experiments, test_name)
        clres.experiment_path = test_path
        clres.stdout_path = 'test_log_{}.txt'.format(preset_args)

    p_valid_params = p_utils.validation_params(preset_args)
    create_cp_proc = _create_cmd_and_run(flag=['--checkpoint_save_secs', '5'])

    # wait for checkpoint files
    csv_list = a_utils.get_csv_path(clres=clres)
    assert len(csv_list) > 0
    exp_dir = os.path.dirname(csv_list[0])

    checkpoint_dir = os.path.join(exp_dir, Def.Path.checkpoint)

    checkpoint_test_dir = os.path.join(Def.Path.experiments, Def.Path.test_dir)
    if os.path.exists(checkpoint_test_dir):
        shutil.rmtree(checkpoint_test_dir)

    res = a_utils.is_reward_reached(csv_path=csv_list[0],
                                    p_valid_params=p_valid_params,
                                    start_time=start_time,
                                    time_limit=timeout)
    if not res:
        screen.error(open(clres.stdout.name).read(), crash=False)
        assert False

    entities = a_utils.get_files_from_dir(checkpoint_dir)

    assert len(entities) > 0
    assert any(".ckpt." in file for file in entities)

    # send CTRL+C to close experiment
    create_cp_proc.send_signal(signal.SIGINT)

    if os.path.isdir(checkpoint_dir):
        shutil.copytree(exp_dir, checkpoint_test_dir)
        shutil.rmtree(exp_dir)

    create_cp_proc.kill()
    checkpoint_test_dir = "{}/{}".format(checkpoint_test_dir,
                                         Def.Path.checkpoint)
    # run second time with checkpoint folder  (restore)
    restore_cp_proc = _create_cmd_and_run(
        flag=['-crd', checkpoint_test_dir, '--evaluate'])

    new_csv_list = test_utils.get_csv_path(clres=clres)
    time.sleep(10)

    csv = pd.read_csv(new_csv_list[0])
    res = csv['Episode Length'].values[-1]
    expected_reward = 100
    assert res >= expected_reward, Def.Consts.ASSERT_MSG.format(
        str(expected_reward), str(res))
    restore_cp_proc.kill()

    test_folder = os.path.join(Def.Path.experiments, Def.Path.test_dir)
    if os.path.exists(test_folder):
        shutil.rmtree(test_folder)
Esempio n. 24
0
    def __init__(self,
                 level: LevelSelection,
                 frame_skip: int,
                 visualization_parameters: VisualizationParameters,
                 target_success_rate: float = 1.0,
                 additional_simulator_parameters: Dict[str, Any] = {},
                 seed: Union[None, int] = None,
                 human_control: bool = False,
                 custom_reward_threshold: Union[int, float] = None,
                 random_initialization_steps: int = 1,
                 max_over_num_frames: int = 1,
                 observation_space_type: ObservationSpaceType = None,
                 **kwargs):
        """
        :param level: (str)
            A string representing the gym level to run. This can also be a LevelSelection object.
            For example, BreakoutDeterministic-v0

        :param frame_skip: (int)
            The number of frames to skip between any two actions given by the agent. The action will be repeated
            for all the skipped frames.

        :param visualization_parameters: (VisualizationParameters)
            The parameters used for visualizing the environment, such as the render flag, storing videos etc.

        :param additional_simulator_parameters: (Dict[str, Any])
            Any additional parameters that the user can pass to the Gym environment. These parameters should be
            accepted by the __init__ function of the implemented Gym environment.

        :param seed: (int)
            A seed to use for the random number generator when running the environment.

        :param human_control: (bool)
            A flag that allows controlling the environment using the keyboard keys.

        :param custom_reward_threshold: (float)
            Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment.
            If not set, this value will be taken from the Gym environment definition.

        :param random_initialization_steps: (int)
            The number of random steps that will be taken in the environment after each reset.
            This is a feature presented in the DQN paper, which improves the variability of the episodes the agent sees.

        :param max_over_num_frames: (int)
            This value will be used for merging multiple frames into a single frame by taking the maximum value for each
            of the pixels in the frame. This is particularly used in Atari games, where the frames flicker, and objects
            can be seen in one frame but disappear in the next.

        :param observation_space_type:
            This value will be used for generating observation space. Allows a custom space. Should be one of
            ObservationSpaceType. If not specified, observation space is inferred from the number of dimensions
            of the observation: 1D: Vector space, 3D: Image space if 1 or 3 channels, PlanarMaps space otherwise.
        """
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters,
                         target_success_rate)

        self.random_initialization_steps = random_initialization_steps
        self.max_over_num_frames = max_over_num_frames
        self.additional_simulator_parameters = additional_simulator_parameters

        # hide warnings
        gym.logger.set_level(40)
        """
        load and initialize environment
        environment ids can be defined in 3 ways:
        1. Native gym environments like BreakoutDeterministic-v0 for example
        2. Custom gym environments written and installed as python packages.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space
           For example: my_environment_package:MyEnvironmentClass will run an environment defined in the
           MyEnvironmentClass class
        3. Custom gym environments written as an independent module which is not installed.
           This environments should have a python module with a class inheriting gym.Env, implementing the
           relevant functions (_reset, _step, _render) and defining the observation and action space.
           For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an
           environment defined in the MyEnvironmentClass class which is located in the module in the relative path
           path_to_my_environment.sub_directory.my_module
        """
        if ':' in self.env_id:
            # custom environments
            if '/' in self.env_id or '.' in self.env_id:
                # environment in a an absolute path module written as a unix path or in a relative path module
                # written as a python import path
                env_class = short_dynamic_import(self.env_id)
            else:
                # environment in a python package
                env_class = gym.envs.registration.load(self.env_id)

            # instantiate the environment
            try:
                self.env = env_class(**self.additional_simulator_parameters)
            except:
                screen.error(
                    "Failed to instantiate Gym environment class %s with arguments %s"
                    % (env_class, self.additional_simulator_parameters),
                    crash=False)
                raise
        else:
            self.env = gym.make(self.env_id)

        # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows
        environment_to_always_use_with_native_rendering = [
            'classic_control', 'mujoco', 'robotics'
        ]
        self.native_rendering = self.native_rendering or \
                                any([env in str(self.env.unwrapped.__class__)
                                     for env in environment_to_always_use_with_native_rendering])
        if self.native_rendering:
            if hasattr(self, 'renderer'):
                self.renderer.close()

        # seed
        if self.seed is not None:
            self.env.seed(self.seed)
            np.random.seed(self.seed)
            random.seed(self.seed)

        # frame skip and max between consecutive frames
        self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__)
        self.is_roboschool_env = 'roboschool' in str(
            self.env.unwrapped.__class__)
        self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__)
        if self.is_atari_env:
            self.env.unwrapped.frameskip = 1  # this accesses the atari env that is wrapped with a timelimit wrapper env
            if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4:
                screen.warning(
                    "Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. "
                    "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the "
                    "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4."
                )
                self.frame_skip = 3
            self.env = MaxOverFramesAndFrameskipEnvWrapper(
                self.env,
                frameskip=self.frame_skip,
                max_over_num_frames=self.max_over_num_frames)
        else:
            self.env.unwrapped.frameskip = self.frame_skip

        self.state_space = StateSpace({})

        # observations
        if not isinstance(self.env.observation_space, gym.spaces.dict.Dict):
            state_space = {'observation': self.env.observation_space}
        else:
            state_space = self.env.observation_space.spaces

        for observation_space_name, observation_space in state_space.items():
            if observation_space_type == ObservationSpaceType.Tensor:
                # we consider arbitrary input tensor which does not necessarily represent images
                self.state_space[
                    observation_space_name] = TensorObservationSpace(
                        shape=np.array(observation_space.shape),
                        low=observation_space.low,
                        high=observation_space.high)
            elif observation_space_type == ObservationSpaceType.Image or len(
                    observation_space.shape) == 3:
                # we assume gym has image observations (with arbitrary number of channels) where their values are
                # within 0-255, and where the channel dimension is the last dimension
                if observation_space.shape[-1] in [1, 3]:
                    self.state_space[
                        observation_space_name] = ImageObservationSpace(
                            shape=np.array(observation_space.shape),
                            high=255,
                            channels_axis=-1)
                else:
                    # For any number of channels other than 1 or 3, use the generic PlanarMaps space
                    self.state_space[
                        observation_space_name] = PlanarMapsObservationSpace(
                            shape=np.array(observation_space.shape),
                            low=0,
                            high=255,
                            channels_axis=-1)
            elif observation_space_type == ObservationSpaceType.Vector or len(
                    observation_space.shape) == 1:
                self.state_space[
                    observation_space_name] = VectorObservationSpace(
                        shape=observation_space.shape[0],
                        low=observation_space.low,
                        high=observation_space.high)
            else:
                raise screen.error(
                    "Failed to instantiate Gym environment class %s with observation space type %s"
                    % (env_class, observation_space_type),
                    crash=True)

        if 'desired_goal' in state_space.keys():
            self.goal_space = self.state_space['desired_goal']

        # actions
        if type(self.env.action_space) == gym.spaces.box.Box:
            self.action_space = BoxActionSpace(
                shape=self.env.action_space.shape,
                low=self.env.action_space.low,
                high=self.env.action_space.high)
        elif type(self.env.action_space) == gym.spaces.discrete.Discrete:
            actions_description = []
            if hasattr(self.env.unwrapped, 'get_action_meanings'):
                actions_description = self.env.unwrapped.get_action_meanings()
            self.action_space = DiscreteActionSpace(
                num_actions=self.env.action_space.n,
                descriptions=actions_description)
        else:
            raise screen.error((
                "Failed to instantiate gym environment class {} due to unsupported "
                "action space {}. Expected BoxActionSpace or DiscreteActionSpace."
            ).format(env_class, self.env.action_space),
                               crash=True)

        if self.human_control:
            # TODO: add this to the action space
            # map keyboard keys to actions
            self.key_to_action = {}
            if hasattr(self.env.unwrapped, 'get_keys_to_action'):
                self.key_to_action = self.env.unwrapped.get_keys_to_action()
            else:
                screen.error(
                    "Error: Environment {} does not support human control.".
                    format(self.env),
                    crash=True)

        # initialize the state by getting a new state from the environment
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            scale = 1
            if self.human_control:
                scale = 2
            if not self.native_rendering:
                self.renderer.create_screen(image.shape[1] * scale,
                                            image.shape[0] * scale)

        # the info is only updated after the first step
        self.state = self.step(self.action_space.default_action).next_state
        self.state_space['measurements'] = VectorObservationSpace(
            shape=len(self.info.keys()))

        if self.env.spec and custom_reward_threshold is None:
            self.reward_success_threshold = self.env.spec.reward_threshold
            self.reward_space = RewardSpace(
                1, reward_success_threshold=self.reward_success_threshold)

        self.target_success_rate = target_success_rate
Esempio n. 25
0
def test_preset_reward(preset_name,
                       no_progress_bar=True,
                       time_limit=60 * 60,
                       verbose=True):
    preset_validation_params = validation_params(preset_name)

    win_size = 10

    test_name = '__test_reward_{}'.format(preset_name)
    test_path = os.path.join('./experiments', test_name)
    if path.exists(test_path):
        shutil.rmtree(test_path)

    # run the experiment in a separate thread
    screen.log_title("Running test {}".format(preset_name))
    log_file_name = 'test_log_{preset_name}.txt'.format(
        preset_name=preset_name)
    cmd = [
        'python3', 'rl_coach/coach.py', '-p',
        '{preset_name}'.format(preset_name=preset_name), '-e',
        '{test_name}'.format(test_name=test_name), '-n',
        '{num_workers}'.format(
            num_workers=preset_validation_params.num_workers), '--seed', '0',
        '-c'
    ]
    if preset_validation_params.reward_test_level:
        cmd += [
            '-lvl',
            '{level}'.format(level=preset_validation_params.reward_test_level)
        ]

    stdout = open(log_file_name, 'w')

    p = subprocess.Popen(cmd, stdout=stdout, stderr=stdout)

    start_time = time.time()

    reward_str = 'Evaluation Reward'
    if preset_validation_params.num_workers > 1:
        filename_pattern = 'worker_0*.csv'
    else:
        filename_pattern = '*.csv'

    test_passed = False

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    if csv_paths:
        csv_path = csv_paths[0]

        # verify results
        csv = None
        time.sleep(1)
        averaged_rewards = [0]

        last_num_episodes = 0

        if not no_progress_bar:
            print_progress(averaged_rewards, last_num_episodes,
                           preset_validation_params, start_time, time_limit)

        while csv is None or (
                csv['Episode #'].values[-1] <
                preset_validation_params.max_episodes_to_achieve_reward
                and time.time() - start_time < time_limit):
            try:
                csv = pd.read_csv(csv_path)
            except:
                # sometimes the csv is being written at the same time we are
                # trying to read it. no problem -> try again
                continue

            if reward_str not in csv.keys():
                continue

            rewards = csv[reward_str].values
            rewards = rewards[~np.isnan(rewards)]

            if len(rewards) >= 1:
                averaged_rewards = np.convolve(
                    rewards,
                    np.ones(min(len(rewards), win_size)) / win_size,
                    mode='valid')
            else:
                time.sleep(1)
                continue

            if not no_progress_bar:
                print_progress(averaged_rewards, last_num_episodes,
                               preset_validation_params, start_time,
                               time_limit)

            if csv['Episode #'].shape[0] - last_num_episodes <= 0:
                continue

            last_num_episodes = csv['Episode #'].values[-1]

            # check if reward is enough
            if np.any(averaged_rewards >=
                      preset_validation_params.min_reward_threshold):
                test_passed = True
                break
            time.sleep(1)

    # kill test and print result
    # os.killpg(os.getpgid(p.pid), signal.SIGKILL)
    p.kill()
    screen.log('')
    if test_passed:
        screen.success("Passed successfully")
    else:
        if time.time() - start_time > time_limit:
            screen.error("Failed due to exceeding time limit", crash=False)
            if verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
        elif csv_paths:
            screen.error("Failed due to insufficient reward", crash=False)
            if verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
            screen.error(
                "preset_validation_params.max_episodes_to_achieve_reward: {}".
                format(
                    preset_validation_params.max_episodes_to_achieve_reward),
                crash=False)
            screen.error(
                "preset_validation_params.min_reward_threshold: {}".format(
                    preset_validation_params.min_reward_threshold),
                crash=False)
            screen.error("averaged_rewards: {}".format(averaged_rewards),
                         crash=False)
            screen.error("episode number: {}".format(
                csv['Episode #'].values[-1]),
                         crash=False)
        else:
            screen.error("csv file never found", crash=False)
            if verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    if not test_passed:
        raise ValueError('golden test failed')
Esempio n. 26
0
    def get_config_args(self,
                        parser: argparse.ArgumentParser) -> argparse.Namespace:
        """
        Returns a Namespace object with all the user-specified configuration options needed to launch.
        This implementation uses argparse to take arguments from the CLI, but this can be over-ridden by
        another method that gets its configuration from elsewhere.  An equivalent method however must
        return an identically structured Namespace object, which conforms to the structure defined by
        get_argument_parser.

        This method parses the arguments that the user entered, does some basic validation, and
        modification of user-specified values in short form to be more explicit.

        :param parser: a parser object which implicitly defines the format of the Namespace that
                       is expected to be returned.
        :return: the parsed arguments as a Namespace
        """
        args = parser.parse_args()

        if args.nocolor:
            screen.set_use_colors(False)

        # if no arg is given
        if len(sys.argv) == 1:
            parser.print_help()
            sys.exit(1)

        # list available presets
        if args.list:
            self.display_all_presets_and_exit()

        # Read args from config file for distributed Coach.
        if args.distributed_coach and args.distributed_coach_run_type == RunType.ORCHESTRATOR:
            coach_config = ConfigParser({
                'image': '',
                'memory_backend': 'redispubsub',
                'data_store': 's3',
                's3_end_point': 's3.amazonaws.com',
                's3_bucket_name': '',
                's3_creds_file': ''
            })
            try:
                coach_config.read(args.distributed_coach_config_path)
                args.image = coach_config.get('coach', 'image')
                args.memory_backend = coach_config.get('coach',
                                                       'memory_backend')
                args.data_store = coach_config.get('coach', 'data_store')
                if args.data_store == 's3':
                    args.s3_end_point = coach_config.get(
                        'coach', 's3_end_point')
                    args.s3_bucket_name = coach_config.get(
                        'coach', 's3_bucket_name')
                    args.s3_creds_file = coach_config.get(
                        'coach', 's3_creds_file')
            except Error as e:
                screen.error(
                    "Error when reading distributed Coach config file: {}".
                    format(e))

            if args.image == '':
                screen.error("Image cannot be empty.")

            data_store_choices = ['s3', 'nfs']
            if args.data_store not in data_store_choices:
                screen.warning("{} data store is unsupported.".format(
                    args.data_store))
                screen.error(
                    "Supported data stores are {}.".format(data_store_choices))

            memory_backend_choices = ['redispubsub']
            if args.memory_backend not in memory_backend_choices:
                screen.warning("{} memory backend is not supported.".format(
                    args.memory_backend))
                screen.error("Supported memory backends are {}.".format(
                    memory_backend_choices))

            if args.data_store == 's3':
                if args.s3_bucket_name == '':
                    screen.error("S3 bucket name cannot be empty.")
                if args.s3_creds_file == '':
                    args.s3_creds_file = None

        if args.play and args.distributed_coach:
            screen.error("Playing is not supported in distributed Coach.")

        # replace a short preset name with the full path
        if args.preset is not None:
            args.preset = self.expand_preset(args.preset)

        # validate the checkpoints args
        if args.checkpoint_restore_dir is not None and not os.path.exists(
                args.checkpoint_restore_dir):
            screen.error(
                "The requested checkpoint folder to load from does not exist.")

        # validate the checkpoints args
        if args.checkpoint_restore_file is not None and not glob(
                args.checkpoint_restore_file + '*'):
            screen.error(
                "The requested checkpoint file to load from does not exist.")

        # no preset was given. check if the user requested to play some environment on its own
        if args.preset is None and args.play and not args.environment_type:
            screen.error(
                'When no preset is given for Coach to run, and the user requests human control over '
                'the environment, the user is expected to input the desired environment_type and level.'
                '\nAt least one of these parameters was not given.')
        elif args.preset and args.play:
            screen.error(
                "Both the --preset and the --play flags were set. These flags can not be used together. "
                "For human control, please use the --play flag together with the environment type flag (-et)"
            )
        elif args.preset is None and not args.play:
            screen.error(
                "Please choose a preset using the -p flag or use the --play flag together with choosing an "
                "environment type (-et) in order to play the game.")

        # get experiment name and path
        args.experiment_name = logger.get_experiment_name(args.experiment_name)
        args.experiment_path = logger.get_experiment_path(args.experiment_name)

        if args.play and args.num_workers > 1:
            screen.warning(
                "Playing the game as a human is only available with a single worker. "
                "The number of workers will be reduced to 1")
            args.num_workers = 1

        args.framework = Frameworks[args.framework.lower()]

        # checkpoints
        args.checkpoint_save_dir = os.path.join(
            args.experiment_path,
            'checkpoint') if args.checkpoint_save_secs is not None else None

        if args.export_onnx_graph and not args.checkpoint_save_secs:
            screen.warning(
                "Exporting ONNX graphs requires setting the --checkpoint_save_secs flag. "
                "The --export_onnx_graph will have no effect.")

        return args
Esempio n. 27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p',
        '--preset',
        help="(string) Name of a preset to run (as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-ip',
        '--ignore_presets',
        help=
        "(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-v',
        '--verbose',
        help="(flag) display verbose logs in the event of an error",
        action='store_true')
    parser.add_argument(
        '--stop_after_first_failure',
        help="(flag) stop executing tests after the first error",
        action='store_true')
    parser.add_argument(
        '-ow',
        '--overwrite',
        help="(flag) overwrite old trace with new ones in trace testing mode",
        action='store_true')
    parser.add_argument('-prl',
                        '--parallel',
                        help="(flag) run tests in parallel",
                        action='store_true')
    parser.add_argument(
        '-mt',
        '--max_threads',
        help="(int) maximum number of threads to run in parallel",
        default=multiprocessing.cpu_count() - 2,
        type=int)

    args = parser.parse_args()
    if not args.parallel:
        args.max_threads = 1

    if args.preset is not None:
        presets_lists = [args.preset]
    else:
        presets_lists = [
            f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets'))
            if f[-3:] == '.py' and not f == '__init__.py'
        ]

    fail_count = 0
    test_count = 0

    if args.ignore_presets is not None:
        presets_to_ignore = args.ignore_presets.split(',')
    else:
        presets_to_ignore = []

    for idx, preset_name in enumerate(sorted(presets_lists)):
        if args.stop_after_first_failure and fail_count > 0:
            break
        if preset_name not in presets_to_ignore:
            try:
                preset = import_module(
                    'rl_coach.presets.{}'.format(preset_name))
            except:
                screen.error("Failed to load preset <{}>".format(preset_name),
                             crash=False)
                fail_count += 1
                test_count += 1
                continue

            preset_validation_params = preset.graph_manager.preset_validation_params
            num_env_steps = preset_validation_params.trace_max_env_steps
            if preset_validation_params.test_using_a_trace_test:
                if preset_validation_params.trace_test_levels:
                    for level in preset_validation_params.trace_test_levels:
                        test_count += 1
                        test_path, log_file, p = run_trace_based_test(
                            preset_name, num_env_steps, level)
                        processes.append((test_path, log_file, p))
                        test_passed = wait_and_check(args, processes)
                        if test_passed is not None and not test_passed:
                            fail_count += 1
                else:
                    test_count += 1
                    test_path, log_file, p = run_trace_based_test(
                        preset_name, num_env_steps)
                    processes.append((test_path, log_file, p))
                    test_passed = wait_and_check(args, processes)
                    if test_passed is not None and not test_passed:
                        fail_count += 1

    while len(processes) > 0:
        test_passed = wait_and_check(args, processes, force=True)
        if test_passed is not None and not test_passed:
            fail_count += 1

    screen.separator()
    if fail_count == 0:
        screen.success(" Summary: " + str(test_count) + "/" + str(test_count) +
                       " tests passed successfully")
    else:
        screen.error(" Summary: " + str(test_count - fail_count) + "/" +
                     str(test_count) + " tests passed successfully")