Ejemplo n.º 1
0
    def evaluate(self,
                 steps: PlayingStepsType,
                 reset_before_eval: bool = True) -> bool:
        """Perform evaluation for several steps

        Args:
            steps (PlayingStepsType): the number of steps as a tuple of steps time and steps count
            reset_before_eval (bool): True if reset before evaluation and False otherwise.

        Returns:
            bool, True if the target reward and target success has been reached
        """
        self.verify_graph_was_created()

        if steps.num_steps > 0:
            with self.phase_context(RunPhase.TEST):
                # reset all the levels before starting to evaluate
                if reset_before_eval:
                    self.reset_internal_state(force_environment_reset=True)
                self.sync()

                # act for at least `steps`, though don't interrupt an episode
                count_end = self.current_step_counter + steps
                while self.current_step_counter < count_end:
                    self.act(EnvironmentEpisodes(1))
                    self.sync()
        if self.should_stop():
            self.flush_finished()
            screen.success("Reached required success rate. Exiting.")
            return True
        return False
Ejemplo n.º 2
0
    def evaluate(self,
                 steps: PlayingStepsType,
                 keep_networks_in_sync: bool = False) -> bool:
        """
        Perform evaluation for several steps
        :param steps: the number of steps as a tuple of steps time and steps count
        :param keep_networks_in_sync: sync the network parameters with the global network before each episode
        :return: bool, True if the target reward and target success has been reached
        """
        self.verify_graph_was_created()

        if steps.num_steps > 0:
            with self.phase_context(RunPhase.TEST):
                # reset all the levels before starting to evaluate
                self.reset_internal_state(force_environment_reset=True)
                self.sync()

                # act for at least `steps`, though don't interrupt an episode
                count_end = self.current_step_counter + steps
                while self.current_step_counter < count_end:
                    self.act(EnvironmentEpisodes(1))
                    self.sync()
        if self.should_stop():
            if self.task_parameters.checkpoint_save_dir:
                open(
                    os.path.join(self.task_parameters.checkpoint_save_dir,
                                 SyncFiles.FINISHED.value), 'w').close()
            if hasattr(self, 'data_store_params'):
                data_store = self.get_data_store(self.data_store_params)
                data_store.save_to_store()

            screen.success("Reached required success rate. Exiting.")
            return True
        return False
Ejemplo n.º 3
0
    def evaluate(self, steps: PlayingStepsType) -> bool:
        """
        Perform evaluation for several steps
        :param steps: the number of steps as a tuple of steps time and steps count
        :return: bool, True if the target reward and target success has been reached
        """

        import smdebug.tensorflow as smd
        self.smdebug_hook.set_mode(smd.modes.EVAL)

        self.verify_graph_was_created()

        if steps.num_steps > 0:
            with self.phase_context(RunPhase.TEST):
                # reset all the levels before starting to evaluate
                self.reset_internal_state(force_environment_reset=True)
                self.sync()

                # act for at least `steps`, though don't interrupt an episode
                count_end = self.current_step_counter + steps
                while self.current_step_counter < count_end:
                    self.act(EnvironmentEpisodes(1))
                    self.sync()
        if self.should_stop():
            self.flush_finished()
            screen.success("Reached required success rate. Exiting.")
            return True
        return False
Ejemplo n.º 4
0
    def evaluate(self, steps: PlayingStepsType) -> bool:
        """
        Perform evaluation for several steps
        :param steps: the number of steps as a tuple of steps time and steps count
        :return: bool, True if the target reward and target success has been reached
        """
        self.verify_graph_was_created()

        if steps.num_steps > 0:
            with self.phase_context(RunPhase.TEST):
                # reset all the levels before starting to evaluate
                self.reset_internal_state(force_environment_reset=True)
                self.sync()

                # act for at least `steps`, though don't interrupt an episode
                count_end = self.current_step_counter + steps
                while self.current_step_counter < count_end:
                    # In case of an evaluation-only worker, fake a phase transition before and after every
                    # episode to make sure results are logged correctly
                    if self.task_parameters.evaluate_only is not None:
                        self.phase = RunPhase.TEST
                    self.act(EnvironmentEpisodes(1))
                    self.sync()
                    if self.task_parameters.evaluate_only is not None:
                        self.phase = RunPhase.TRAIN
        if self.should_stop():
            self.flush_finished()
            screen.success("Reached required success rate. Exiting.")
            return True
        return False
Ejemplo n.º 5
0
def test_preset_runs(preset):
    test_failed = False

    print("Testing preset {}".format(preset))

    # TODO: this is a temporary workaround for presets which define more than a single available level.
    # we should probably do this in a more robust way
    level = ""
    if "Atari" in preset:
        level = "breakout"
    elif "Mujoco" in preset:
        level = "inverted_pendulum"
    elif "ControlSuite" in preset:
        level = "pendulum:swingup"

    experiment_name = ".test-" + preset

    # overriding heatup steps to some small number of steps (1000), so to finish the heatup stage, and get to train
    params = [
        "python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e",
        experiment_name, '-cp', 'heatup_steps=EnvironmentSteps(1000)'
    ]
    if level != "":
        params += ["-lvl", level]

    p = Popen(params)

    # wait 30 seconds overhead of initialization, and finishing heatup.
    time.sleep(30)
    return_value = p.poll()

    if return_value is None:
        screen.success("{} passed successfully".format(preset))
    else:
        test_failed = True
        screen.error("{} failed".format(preset), crash=False)

    p.kill()
    if os.path.exists("experiments/" + experiment_name):
        shutil.rmtree("experiments/" + experiment_name)

    assert not test_failed
Ejemplo n.º 6
0
def test_preset_runs(preset):
    test_failed = False

    print("Testing preset {}".format(preset))

    # TODO: this is a temporary workaround for presets which define more than a single available level.
    # we should probably do this in a more robust way
    level = ""
    if "Atari" in preset:
        level = "breakout"
    elif "Mujoco" in preset:
        level = "inverted_pendulum"
    elif "ControlSuite" in preset:
        level = "pendulum:swingup"
    elif 'Lab' in preset:
        level = 'nav_maze_static_01'
    experiment_name = ".test-" + preset

    params = [
        sys.executable, "rl_coach/coach.py", "-p", preset, "-ns", "-e",
        experiment_name
    ]
    if level != "":
        params += ["-lvl", level]

    p = Popen(params)

    # wait 10 seconds overhead of initialization etc.
    time.sleep(10)
    return_value = p.poll()

    if return_value is None:
        screen.success("{} passed successfully".format(preset))
    else:
        test_failed = True
        screen.error("{} failed".format(preset), crash=False)

    p.kill()
    if os.path.exists("experiments/" + experiment_name):
        shutil.rmtree("experiments/" + experiment_name)

    assert not test_failed
Ejemplo n.º 7
0
def test_all_presets_are_running():
    # os.chdir("../../")
    test_failed = False
    all_presets = sorted([f.split('.')[0] for f in os.listdir('rl_coach/presets') if f.endswith('.py') and f != '__init__.py'])
    for preset in all_presets:
        print("Testing preset {}".format(preset))

        # TODO: this is a temporary workaround for presets which define more than a single available level.
        # we should probably do this in a more robust way
        level = ""
        if "Atari" in preset:
            level = "breakout"
        elif "Mujoco" in preset:
            level = "inverted_pendulum"
        elif "ControlSuite" in preset:
            level = "pendulum:swingup"
        params = ["python3", "rl_coach/coach.py", "-p", preset, "-ns", "-e", ".test"]
        if level != "":
            params += ["-lvl", level]

        p = Popen(params, stdout=DEVNULL)

        # wait 10 seconds overhead of initialization etc.
        time.sleep(10)
        return_value = p.poll()

        if return_value is None:
            screen.success("{} passed successfully".format(preset))
        else:
            test_failed = True
            screen.error("{} failed".format(preset), crash=False)

        p.kill()
        if os.path.exists("experiments/.test"):
            shutil.rmtree("experiments/.test")

    assert not test_failed
Ejemplo n.º 8
0
def perform_reward_based_tests(args, preset_validation_params, preset_name):
    win_size = 10

    test_name = '__test_reward'
    test_path = os.path.join('./experiments', test_name)
    if path.exists(test_path):
        shutil.rmtree(test_path)

    # run the experiment in a separate thread
    screen.log_title("Running test {}".format(preset_name))
    log_file_name = 'test_log_{preset_name}.txt'.format(
        preset_name=preset_name)
    cmd = ('python3 rl_coach/coach.py '
           '-p {preset_name} '
           '-e {test_name} '
           '-n {num_workers} '
           '--seed 0 '
           '-c '
           '{level} '
           '&> {log_file_name} ').format(
               preset_name=preset_name,
               test_name=test_name,
               num_workers=preset_validation_params.num_workers,
               log_file_name=log_file_name,
               level='-lvl ' + preset_validation_params.reward_test_level
               if preset_validation_params.reward_test_level else '')

    p = subprocess.Popen(cmd,
                         shell=True,
                         executable="/bin/bash",
                         preexec_fn=os.setsid)

    start_time = time.time()

    reward_str = 'Evaluation Reward'
    if preset_validation_params.num_workers > 1:
        filename_pattern = 'worker_0*.csv'
    else:
        filename_pattern = '*.csv'

    test_passed = False

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    if csv_paths:
        csv_path = csv_paths[0]

        # verify results
        csv = None
        time.sleep(1)
        averaged_rewards = [0]

        last_num_episodes = 0

        if not args.no_progress_bar:
            print_progress(averaged_rewards, last_num_episodes,
                           preset_validation_params, start_time, args)

        while csv is None or (
                csv['Episode #'].values[-1] <
                preset_validation_params.max_episodes_to_achieve_reward
                and time.time() - start_time < args.time_limit):
            try:
                csv = pd.read_csv(csv_path)
            except:
                # sometimes the csv is being written at the same time we are
                # trying to read it. no problem -> try again
                continue

            if reward_str not in csv.keys():
                continue

            rewards = csv[reward_str].values
            rewards = rewards[~np.isnan(rewards)]

            if len(rewards) >= 1:
                averaged_rewards = np.convolve(
                    rewards,
                    np.ones(min(len(rewards), win_size)) / win_size,
                    mode='valid')
            else:
                time.sleep(1)
                continue

            if not args.no_progress_bar:
                print_progress(averaged_rewards, last_num_episodes,
                               preset_validation_params, start_time, args)

            if csv['Episode #'].shape[0] - last_num_episodes <= 0:
                continue

            last_num_episodes = csv['Episode #'].values[-1]

            # check if reward is enough
            if np.any(averaged_rewards >=
                      preset_validation_params.min_reward_threshold):
                test_passed = True
                break
            time.sleep(1)

    # kill test and print result
    os.killpg(os.getpgid(p.pid), signal.SIGTERM)
    screen.log('')
    if test_passed:
        screen.success("Passed successfully")
    else:
        if time.time() - start_time > args.time_limit:
            screen.error("Failed due to exceeding time limit", crash=False)
            if args.verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
        elif csv_paths:
            screen.error("Failed due to insufficient reward", crash=False)
            if args.verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
            screen.error(
                "preset_validation_params.max_episodes_to_achieve_reward: {}".
                format(
                    preset_validation_params.max_episodes_to_achieve_reward),
                crash=False)
            screen.error(
                "preset_validation_params.min_reward_threshold: {}".format(
                    preset_validation_params.min_reward_threshold),
                crash=False)
            screen.error("averaged_rewards: {}".format(averaged_rewards),
                         crash=False)
            screen.error("episode number: {}".format(
                csv['Episode #'].values[-1]),
                         crash=False)
        else:
            screen.error("csv file never found", crash=False)
            if args.verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    return test_passed
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-t',
                        '--trace',
                        help="(flag) perform trace based testing",
                        action='store_true')
    parser.add_argument(
        '-p',
        '--preset',
        help="(string) Name of a preset to run (as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-ip',
        '--ignore_presets',
        help=
        "(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-v',
        '--verbose',
        help="(flag) display verbose logs in the event of an error",
        action='store_true')
    parser.add_argument(
        '--stop_after_first_failure',
        help="(flag) stop executing tests after the first error",
        action='store_true')
    parser.add_argument(
        '-tl',
        '--time_limit',
        help="time limit for each test in minutes",
        default=
        40,  # setting time limit to be so high due to DDPG being very slow - its tests are long
        type=int)
    parser.add_argument(
        '-np',
        '--no_progress_bar',
        help=
        "(flag) Don't print the progress bar (makes jenkins logs more readable)",
        action='store_true')
    parser.add_argument(
        '-ow',
        '--overwrite',
        help="(flag) overwrite old trace with new ones in trace testing mode",
        action='store_true')

    args = parser.parse_args()
    if args.preset is not None:
        presets_lists = [args.preset]
    else:
        # presets_lists = list_all_classes_in_module(presets)
        presets_lists = [
            f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets'))
            if f[-3:] == '.py' and not f == '__init__.py'
        ]

    fail_count = 0
    test_count = 0

    args.time_limit = 60 * args.time_limit

    if args.ignore_presets is not None:
        presets_to_ignore = args.ignore_presets.split(',')
    else:
        presets_to_ignore = []
    for idx, preset_name in enumerate(sorted(presets_lists)):
        if args.stop_after_first_failure and fail_count > 0:
            break
        if preset_name not in presets_to_ignore:
            try:
                preset = import_module(
                    'rl_coach.presets.{}'.format(preset_name))
            except:
                screen.error("Failed to load preset <{}>".format(preset_name),
                             crash=False)
                fail_count += 1
                test_count += 1
                continue

            preset_validation_params = preset.graph_manager.preset_validation_params
            if not args.trace and not preset_validation_params.test:
                continue

            if args.trace:
                num_env_steps = preset_validation_params.trace_max_env_steps
                if preset_validation_params.trace_test_levels:
                    for level in preset_validation_params.trace_test_levels:
                        test_count += 1
                        test_passed = perform_trace_based_tests(
                            args, preset_name, num_env_steps, level)
                        if not test_passed:
                            fail_count += 1
                else:
                    test_count += 1
                    test_passed = perform_trace_based_tests(
                        args, preset_name, num_env_steps)
                    if not test_passed:
                        fail_count += 1
            else:
                test_passed = perform_reward_based_tests(
                    args, preset_validation_params, preset_name)
                if not test_passed:
                    fail_count += 1

    screen.separator()
    if fail_count == 0:
        screen.success(" Summary: " + str(test_count) + "/" + str(test_count) +
                       " tests passed successfully")
    else:
        screen.error(" Summary: " + str(test_count - fail_count) + "/" +
                     str(test_count) + " tests passed successfully")
Ejemplo n.º 10
0
def perform_trace_based_tests(args, preset_name, num_env_steps, level=None):
    test_name = '__test_trace'
    test_path = os.path.join('./experiments', test_name)
    if path.exists(test_path):
        shutil.rmtree(test_path)

    # run the experiment in a separate thread
    screen.log_title("Running test {}{}".format(preset_name, ' - ' +
                                                level if level else ''))
    log_file_name = 'test_log_{preset_name}.txt'.format(
        preset_name=preset_name)

    cmd = ('python3 rl_coach/coach.py '
           '-p {preset_name} '
           '-e {test_name} '
           '--seed 42 '
           '-c '
           '--no_summary '
           '-cp {custom_param} '
           '{level} '
           '&> {log_file_name} ').format(
               preset_name=preset_name,
               test_name=test_name,
               log_file_name=log_file_name,
               level='-lvl ' + level if level else '',
               custom_param='\"improve_steps=EnvironmentSteps({n});'
               'steps_between_evaluation_periods=EnvironmentSteps({n});'
               'evaluation_steps=EnvironmentSteps(1);'
               'heatup_steps=EnvironmentSteps(1024)\"'.format(n=num_env_steps))

    p = subprocess.Popen(cmd,
                         shell=True,
                         executable="/bin/bash",
                         preexec_fn=os.setsid)
    p.wait()

    filename_pattern = '*.csv'

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    test_passed = False
    if not csv_paths:
        screen.error("csv file never found", crash=False)
        if args.verbose:
            screen.error("command exitcode: {}".format(p.returncode),
                         crash=False)
            screen.error(open(log_file_name).read(), crash=False)
    else:
        trace_path = os.path.join(
            './rl_coach', 'traces', preset_name + '_' +
            level.replace(':', '_') if level else preset_name, '')
        if not os.path.exists(trace_path):
            screen.log(
                'No trace found, creating new trace in: {}'.format(trace_path))
            os.makedirs(os.path.dirname(trace_path))
            df = pd.read_csv(csv_paths[0])
            df = clean_df(df)
            df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False)
            screen.success("Successfully created new trace.")
            test_passed = True
        else:
            test_df = pd.read_csv(csv_paths[0])
            test_df = clean_df(test_df)
            new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv')
            test_df.to_csv(new_trace_csv_path, index=False)
            test_df = pd.read_csv(new_trace_csv_path)
            trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv'))
            trace_csv_path = trace_csv_path[0]
            trace_df = pd.read_csv(trace_csv_path)
            test_passed = test_df.equals(trace_df)
            if test_passed:
                screen.success("Passed successfully.")
                os.remove(new_trace_csv_path)
                test_passed = True
            else:
                screen.error("Trace test failed.", crash=False)
                if args.overwrite:
                    os.remove(trace_csv_path)
                    os.rename(new_trace_csv_path, trace_csv_path)
                    screen.error("Overwriting old trace.", crash=False)
                else:
                    screen.error("bcompare {} {}".format(
                        trace_csv_path, new_trace_csv_path),
                                 crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    return test_passed
Ejemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p',
        '--preset',
        '--presets',
        help=
        "(string) Name of preset(s) to run (comma separated, and as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-ip',
        '--ignore_presets',
        help=
        "(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-v',
        '--verbose',
        help="(flag) display verbose logs in the event of an error",
        action='store_true')
    parser.add_argument(
        '--stop_after_first_failure',
        help="(flag) stop executing tests after the first error",
        action='store_true')
    parser.add_argument(
        '-tl',
        '--time_limit',
        help="time limit for each test in minutes",
        default=
        60,  # setting time limit to be so high due to DDPG being very slow - its tests are long
        type=int)
    parser.add_argument(
        '-np',
        '--no_progress_bar',
        help=
        "(flag) Don't print the progress bar (makes jenkins logs more readable)",
        action='store_true')

    args = parser.parse_args()
    if args.preset is not None:
        presets_lists = args.preset.split(',')
    else:
        presets_lists = all_presets()

    fail_count = 0
    test_count = 0

    args.time_limit = 60 * args.time_limit

    if args.ignore_presets is not None:
        presets_to_ignore = args.ignore_presets.split(',')
    else:
        presets_to_ignore = []
    for idx, preset_name in enumerate(sorted(presets_lists)):
        if args.stop_after_first_failure and fail_count > 0:
            break
        if preset_name not in presets_to_ignore:
            print("Attempting to run Preset: %s" % preset_name)
            if not importable(preset_name):
                screen.error("Failed to load preset <{}>".format(preset_name),
                             crash=False)
                fail_count += 1
                test_count += 1
                continue

            if not has_test_parameters(preset_name):
                continue

            test_count += 1
            try:
                test_preset_reward(preset_name, args.no_progress_bar,
                                   args.time_limit, args.verbose)
            except Exception as e:
                fail_count += 1

    screen.separator()
    if fail_count == 0:
        screen.success(" Summary: " + str(test_count) + "/" + str(test_count) +
                       " tests passed successfully")
    else:
        screen.error(" Summary: " + str(test_count - fail_count) + "/" +
                     str(test_count) + " tests passed successfully")
Ejemplo n.º 12
0
def test_preset_reward(preset_name,
                       no_progress_bar=True,
                       time_limit=60 * 60,
                       verbose=True):
    preset_validation_params = validation_params(preset_name)

    win_size = 10

    test_name = '__test_reward_{}'.format(preset_name)
    test_path = os.path.join('./experiments', test_name)
    if path.exists(test_path):
        shutil.rmtree(test_path)

    # run the experiment in a separate thread
    screen.log_title("Running test {}".format(preset_name))
    log_file_name = 'test_log_{preset_name}.txt'.format(
        preset_name=preset_name)
    cmd = [
        'python3', 'rl_coach/coach.py', '-p',
        '{preset_name}'.format(preset_name=preset_name), '-e',
        '{test_name}'.format(test_name=test_name), '-n',
        '{num_workers}'.format(
            num_workers=preset_validation_params.num_workers), '--seed', '0',
        '-c'
    ]
    if preset_validation_params.reward_test_level:
        cmd += [
            '-lvl',
            '{level}'.format(level=preset_validation_params.reward_test_level)
        ]

    stdout = open(log_file_name, 'w')

    p = subprocess.Popen(cmd, stdout=stdout, stderr=stdout)

    start_time = time.time()

    reward_str = 'Evaluation Reward'
    if preset_validation_params.num_workers > 1:
        filename_pattern = 'worker_0*.csv'
    else:
        filename_pattern = '*.csv'

    test_passed = False

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    if csv_paths:
        csv_path = csv_paths[0]

        # verify results
        csv = None
        time.sleep(1)
        averaged_rewards = [0]

        last_num_episodes = 0

        if not no_progress_bar:
            print_progress(averaged_rewards, last_num_episodes,
                           preset_validation_params, start_time, time_limit)

        while csv is None or (
                csv['Episode #'].values[-1] <
                preset_validation_params.max_episodes_to_achieve_reward
                and time.time() - start_time < time_limit):
            try:
                csv = pd.read_csv(csv_path)
            except:
                # sometimes the csv is being written at the same time we are
                # trying to read it. no problem -> try again
                continue

            if reward_str not in csv.keys():
                continue

            rewards = csv[reward_str].values
            rewards = rewards[~np.isnan(rewards)]

            if len(rewards) >= 1:
                averaged_rewards = np.convolve(
                    rewards,
                    np.ones(min(len(rewards), win_size)) / win_size,
                    mode='valid')
            else:
                time.sleep(1)
                continue

            if not no_progress_bar:
                print_progress(averaged_rewards, last_num_episodes,
                               preset_validation_params, start_time,
                               time_limit)

            if csv['Episode #'].shape[0] - last_num_episodes <= 0:
                continue

            last_num_episodes = csv['Episode #'].values[-1]

            # check if reward is enough
            if np.any(averaged_rewards >=
                      preset_validation_params.min_reward_threshold):
                test_passed = True
                break
            time.sleep(1)

    # kill test and print result
    # os.killpg(os.getpgid(p.pid), signal.SIGKILL)
    p.kill()
    screen.log('')
    if test_passed:
        screen.success("Passed successfully")
    else:
        if time.time() - start_time > time_limit:
            screen.error("Failed due to exceeding time limit", crash=False)
            if verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
        elif csv_paths:
            screen.error("Failed due to insufficient reward", crash=False)
            if verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
            screen.error(
                "preset_validation_params.max_episodes_to_achieve_reward: {}".
                format(
                    preset_validation_params.max_episodes_to_achieve_reward),
                crash=False)
            screen.error(
                "preset_validation_params.min_reward_threshold: {}".format(
                    preset_validation_params.min_reward_threshold),
                crash=False)
            screen.error("averaged_rewards: {}".format(averaged_rewards),
                         crash=False)
            screen.error("episode number: {}".format(
                csv['Episode #'].values[-1]),
                         crash=False)
        else:
            screen.error("csv file never found", crash=False)
            if verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    if not test_passed:
        raise ValueError('golden test failed')
Ejemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--preset', '--presets',
                        help="(string) Name of preset(s) to run (comma separated, as configured in presets.py)",
                        default=None,
                        type=str)
    parser.add_argument('-ip', '--ignore_presets',
                        help="(string) Name of preset(s) to ignore (comma separated, and as configured in presets.py)",
                        default=None,
                        type=str)
    parser.add_argument('-v', '--verbose',
                        help="(flag) display verbose logs in the event of an error",
                        action='store_true')
    parser.add_argument('--stop_after_first_failure',
                        help="(flag) stop executing tests after the first error",
                        action='store_true')
    parser.add_argument('-ow', '--overwrite',
                        help="(flag) overwrite old trace with new ones in trace testing mode",
                        action='store_true')
    parser.add_argument('-prl', '--parallel',
                        help="(flag) run tests in parallel",
                        action='store_true')
    parser.add_argument('-ut', '--update_traces',
                        help="(flag) update traces on repository",
                        action='store_true')
    parser.add_argument('-mt', '--max_threads',
                        help="(int) maximum number of threads to run in parallel",
                        default=multiprocessing.cpu_count()-2,
                        type=int)
    parser.add_argument(
        '-i', '--image', help="(string) Name of the testing image", type=str, default=None
    )
    parser.add_argument(
        '-mb', '--memory_backend', help="(string) Name of the memory backend", type=str, default="redispubsub"
    )
    parser.add_argument(
        '-e', '--endpoint', help="(string) Name of the s3 endpoint", type=str, default='s3.amazonaws.com'
    )
    parser.add_argument(
        '-cr', '--creds_file', help="(string) Path of the s3 creds file", type=str, default='.aws_creds'
    )
    parser.add_argument(
        '-b', '--bucket', help="(string) Name of the bucket for s3", type=str, default=None
    )

    args = parser.parse_args()

    if args.update_traces:
        if not args.bucket:
            print("bucket_name required for s3")
            exit(1)
        if not os.environ.get('AWS_ACCESS_KEY_ID') or not os.environ.get('AWS_SECRET_ACCESS_KEY'):
            print("AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY env vars need to be set")
            exit(1)

        config_file = './tmp.cred'
        generate_config(args.image, args.memory_backend, args.endpoint, args.bucket, args.creds_file, config_file)

    if not args.parallel:
        args.max_threads = 1

    if args.preset is not None:
        presets_lists = args.preset.split(',')
    else:
        presets_lists = [f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets')) if
                         f[-3:] == '.py' and not f == '__init__.py']

    fail_count = 0
    test_count = 0

    if args.ignore_presets is not None:
        presets_to_ignore = args.ignore_presets.split(',')
    else:
        presets_to_ignore = []

    for idx, preset_name in enumerate(sorted(presets_lists)):
        if args.stop_after_first_failure and fail_count > 0:
            break
        if preset_name not in presets_to_ignore:
            try:
                preset = import_module('rl_coach.presets.{}'.format(preset_name))
            except:
                screen.error("Failed to load preset <{}>".format(preset_name), crash=False)
                fail_count += 1
                test_count += 1
                continue

            preset_validation_params = preset.graph_manager.preset_validation_params
            num_env_steps = preset_validation_params.trace_max_env_steps
            if preset_validation_params.test_using_a_trace_test:
                if preset_validation_params.trace_test_levels:
                    for level in preset_validation_params.trace_test_levels:
                        test_count += 1
                        test_path, log_file, p = run_trace_based_test(preset_name, num_env_steps, level)
                        processes.append((test_path, log_file, p))
                        test_passed = wait_and_check(args, processes)
                        if test_passed is not None and not test_passed:
                            fail_count += 1
                else:
                    test_count += 1
                    test_path, log_file, p = run_trace_based_test(preset_name, num_env_steps)
                    processes.append((test_path, log_file, p))
                    test_passed = wait_and_check(args, processes)
                    if test_passed is not None and not test_passed:
                        fail_count += 1

    while len(processes) > 0:
        test_passed = wait_and_check(args, processes, force=True)
        if test_passed is not None and not test_passed:
            fail_count += 1

    screen.separator()
    if fail_count == 0:
        screen.success(" Summary: " + str(test_count) + "/" + str(test_count) + " tests passed successfully")
    else:
        screen.error(" Summary: " + str(test_count - fail_count) + "/" + str(test_count) + " tests passed successfully", crash=False)
Ejemplo n.º 14
0
def wait_and_check(args, processes, force=False):
    if not force and len(processes) < args.max_threads:
        return None

    test_path = processes[0][0]
    test_name = test_path.split('/')[-1]
    log_file_name = processes[0][1]
    p = processes[0][2]
    p.wait()

    filename_pattern = '*.csv'

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    test_passed = False
    screen.log('Results for {}: '.format(test_name[13:]))
    if not csv_paths:
        screen.error("csv file never found", crash=False)
        if args.verbose:
            screen.error("command exitcode: {}".format(p.returncode), crash=False)
            screen.error(open(log_file_name).read(), crash=False)
    else:
        trace_path = os.path.join('./rl_coach', 'traces', test_name[13:])
        if not os.path.exists(trace_path):
            screen.log('No trace found, creating new trace in: {}'.format(trace_path))
            os.makedirs(trace_path)
            df = pd.read_csv(csv_paths[0])
            df = clean_df(df)
            try:
                df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False)
            except:
                pass
            screen.success("Successfully created new trace.")
            test_passed = True
        else:
            test_df = pd.read_csv(csv_paths[0])
            test_df = clean_df(test_df)
            new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv')
            test_df.to_csv(new_trace_csv_path, index=False)
            test_df = pd.read_csv(new_trace_csv_path)
            trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv'))
            trace_csv_path = trace_csv_path[0]
            trace_df = pd.read_csv(trace_csv_path)
            test_passed = test_df.equals(trace_df)
            if test_passed:
                screen.success("Passed successfully.")
                os.remove(new_trace_csv_path)
                test_passed = True
            else:
                screen.error("Trace test failed.", crash=False)
                if args.overwrite:
                    os.remove(trace_csv_path)
                    os.rename(new_trace_csv_path, trace_csv_path)
                    screen.error("Overwriting old trace.", crash=False)
                else:
                    screen.error("bcompare {} {}".format(trace_csv_path, new_trace_csv_path), crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    processes.pop(0)
    return test_passed
Ejemplo n.º 15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p',
        '--preset',
        help="(string) Name of a preset to run (as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-ip',
        '--ignore_presets',
        help=
        "(string) Name of a preset(s) to ignore (comma separated, and as configured in presets.py)",
        default=None,
        type=str)
    parser.add_argument(
        '-v',
        '--verbose',
        help="(flag) display verbose logs in the event of an error",
        action='store_true')
    parser.add_argument(
        '--stop_after_first_failure',
        help="(flag) stop executing tests after the first error",
        action='store_true')
    parser.add_argument(
        '-ow',
        '--overwrite',
        help="(flag) overwrite old trace with new ones in trace testing mode",
        action='store_true')
    parser.add_argument('-prl',
                        '--parallel',
                        help="(flag) run tests in parallel",
                        action='store_true')
    parser.add_argument(
        '-mt',
        '--max_threads',
        help="(int) maximum number of threads to run in parallel",
        default=multiprocessing.cpu_count() - 2,
        type=int)

    args = parser.parse_args()
    if not args.parallel:
        args.max_threads = 1

    if args.preset is not None:
        presets_lists = [args.preset]
    else:
        presets_lists = [
            f[:-3] for f in os.listdir(os.path.join('rl_coach', 'presets'))
            if f[-3:] == '.py' and not f == '__init__.py'
        ]

    fail_count = 0
    test_count = 0

    if args.ignore_presets is not None:
        presets_to_ignore = args.ignore_presets.split(',')
    else:
        presets_to_ignore = []

    for idx, preset_name in enumerate(sorted(presets_lists)):
        if args.stop_after_first_failure and fail_count > 0:
            break
        if preset_name not in presets_to_ignore:
            try:
                preset = import_module(
                    'rl_coach.presets.{}'.format(preset_name))
            except:
                screen.error("Failed to load preset <{}>".format(preset_name),
                             crash=False)
                fail_count += 1
                test_count += 1
                continue

            preset_validation_params = preset.graph_manager.preset_validation_params
            num_env_steps = preset_validation_params.trace_max_env_steps
            if preset_validation_params.test_using_a_trace_test:
                if preset_validation_params.trace_test_levels:
                    for level in preset_validation_params.trace_test_levels:
                        test_count += 1
                        test_path, log_file, p = run_trace_based_test(
                            preset_name, num_env_steps, level)
                        processes.append((test_path, log_file, p))
                        test_passed = wait_and_check(args, processes)
                        if test_passed is not None and not test_passed:
                            fail_count += 1
                else:
                    test_count += 1
                    test_path, log_file, p = run_trace_based_test(
                        preset_name, num_env_steps)
                    processes.append((test_path, log_file, p))
                    test_passed = wait_and_check(args, processes)
                    if test_passed is not None and not test_passed:
                        fail_count += 1

    while len(processes) > 0:
        test_passed = wait_and_check(args, processes, force=True)
        if test_passed is not None and not test_passed:
            fail_count += 1

    screen.separator()
    if fail_count == 0:
        screen.success(" Summary: " + str(test_count) + "/" + str(test_count) +
                       " tests passed successfully")
    else:
        screen.error(" Summary: " + str(test_count - fail_count) + "/" +
                     str(test_count) + " tests passed successfully")