예제 #1
0
    parser = argparse.ArgumentParser()
    parser.add_argument("--dir", type=check_file_existence, required=True)
    parser.add_argument("--param_names", type=check_param_names, required=True)
    parser.add_argument("--algo_name", choices=SUPPORTED_ALGOS, required=True)
    parser.add_argument("--env_name", choices=SUPPORTED_ENVS, required=True)

    args = parser.parse_args()

    logger = Log("analyze_number_of_executions_skipped")
    logging.basicConfig(
        filename=os.path.join(args.dir, "analyze_number_of_executions_skipped.txt"), filemode="w", level=logging.DEBUG
    )

    env_variables = instantiate_env_variables(
        algo_name=args.algo_name,
        discrete_action_space=False,  # I do not care about this parameter in this case
        env_name=args.env_name,
        param_names=args.param_names,
    )

    iterations_dirs = glob.glob(os.path.join(args.dir, "n_iterations_*"))
    iterations_dirs = filter_resampling_artifacts(files=iterations_dirs)
    iterations_dirs_sorted = sorted(iterations_dirs, key=get_result_dir_iteration_number)

    executions_skipped_dict = dict()

    all_search_points = []
    time_elapsed_per_run = []
    time_taken_per_repetition = []
    regression_time_per_repetition = []
    all_frontier_points = []
    executions_skipped = []
def get_binary_search_candidate(
    t_env_variables: EnvVariables,
    f_env_variables: EnvVariables,
    algo_name: str,
    env_name: str,
    param_names,
    discrete_action_space: bool,
    buffer_env_predicate_pairs: BufferEnvPredicatePairs,
) -> EnvVariables:
    original_max_iterations = 50
    logger = Log("get_binary_search_candidate")
    max_number_iterations = original_max_iterations

    candidate_new_env_variables = copy.deepcopy(t_env_variables)

    while True:

        # compute all possible combinations of environments
        candidates_dict = dict()
        t_f_env_variables = random.choice([(t_env_variables, True),
                                           (f_env_variables, False)])

        for i in range(len(t_env_variables.get_params())):
            new_value = (
                t_env_variables.get_param(index=i).get_current_value() +
                f_env_variables.get_param(index=i).get_current_value()) / 2
            if i not in candidates_dict:
                candidates_dict[i] = []
            if (t_env_variables.get_param(index=i).get_current_value() !=
                    f_env_variables.get_param(index=i).get_current_value()):
                candidates_dict[i].append(new_value)
            for index in range(len(t_env_variables.get_params())):
                if index not in candidates_dict:
                    candidates_dict[index] = []
                if index != i:
                    candidates_dict[index].append(
                        t_f_env_variables[0].get_values()[index])

        all_candidates = list(
            itertools.product(*list(candidates_dict.values())))
        logger.info("t_env: {}, f_env: {}".format(
            t_env_variables.get_params_string(),
            f_env_variables.get_params_string()))
        logger.info("all candidates binary search: {}".format(all_candidates))
        all_candidates_env_variables_filtered = []
        all_candidates_env_variables = []
        for candidate_values in all_candidates:
            env_values = dict()
            for i in range(len(t_f_env_variables[0].get_params())):
                param_name = t_f_env_variables[0].get_param(index=i).get_name()
                env_values[param_name] = candidate_values[i]
            candidate_env_variables = instantiate_env_variables(
                algo_name=algo_name,
                discrete_action_space=discrete_action_space,
                env_name=env_name,
                param_names=param_names,
                env_values=env_values,
            )
            # do not consider candidate = t_f_env_variables
            if not candidate_env_variables.is_equal(
                    t_env_variables) and not candidate_env_variables.is_equal(
                        f_env_variables):
                if not buffer_env_predicate_pairs.is_already_evaluated(
                        candidate_env_variables=candidate_env_variables):
                    all_candidates_env_variables_filtered.append(
                        candidate_env_variables)
                all_candidates_env_variables.append(candidate_env_variables)

        if len(all_candidates_env_variables_filtered) > 0:
            candidate_new_env_variables = random.choice(
                all_candidates_env_variables_filtered)
            break
        else:
            assert len(
                all_candidates
            ) > 0, "there must be at least one candidate env for binary search"
            candidate_env_variables_already_evaluated = random.choice(
                all_candidates_env_variables_filtered)
            if t_f_env_variables[1]:
                t_env_variables = copy.deepcopy(
                    candidate_env_variables_already_evaluated)
            else:
                f_env_variables = copy.deepcopy(
                    candidate_env_variables_already_evaluated)

        max_number_iterations -= 1

        if max_number_iterations == 0:
            break

    assert max_number_iterations > 0, "Could not binary mutate any param of envs {} and {} in {} steps".format(
        t_env_variables.get_params_string(),
        f_env_variables.get_params_string(), str(original_max_iterations))

    assert not candidate_new_env_variables.is_equal(
        t_env_variables
    ) and not candidate_new_env_variables.is_equal(
        f_env_variables
    ), "candidate_env_variables {} must be different than t_env_variables {} and f_env_variables {}".format(
        candidate_new_env_variables.get_params_string(),
        t_env_variables.get_params_string(),
        f_env_variables.get_params_string(),
    )

    return candidate_new_env_variables
예제 #3
0
    parser.add_argument("--num_threads", type=int, default=0)
    parser.add_argument("--sb_version", type=str, default="sb2")

    parser.add_argument("--frontier_path", type=str, default=None)
    parser.add_argument("--runs_for_probability_estimation", type=int, default=1)
    args = parser.parse_args()

    if args.random_seed:
        args.seed = np.random.randint(2 ** 32 - 1, dtype="int64").item()
    else:
        args.seed = 0

    env_kwargs = instantiate_env_variables(
        algo_name=args.algo_name,
        discrete_action_space=args.discrete_action_space,
        env_name=args.env_name,
        env_values=args.env_values,
        param_names=args.param_names,
    )
    env_eval_callback = None
    if args.instantiate_eval_callback:
        env_eval_callback = instantiate_eval_callback(env_name=args.env_name)

    if args.num_threads:
        print(f"Setting torch.num_threads to {args.num_threads}")
        th.set_num_threads(args.num_threads)
        th.set_num_interop_threads(args.num_threads)

    logger = Log("main")

    agent = Agent(
    param_names = None
    if args.param_names:
        try:
            param_names = args.param_names.split(sep=",")
            if len(param_names) != 2:
                raise SyntaxError("2 param names must be specified: {}".format(
                    args.param_names))
        except Exception:
            raise SyntaxError("param names must be comma separated: {}".format(
                args.param_names))

    env_variables = instantiate_env_variables(
        algo_name=args.algo_name,
        discrete_action_space=args.discrete_action_space,
        env_name=args.env_name,
        param_names=param_names,
        model_suffix=args.model_suffix,
    )
    env_eval_callback = instantiate_eval_callback(env_name=args.env_name)

    if not args.stub_agent:
        agent = Agent(
            algo_name=args.algo_name,
            env_name=args.env_name,
            log_to_tensorboard=args.log_to_tensorboard,
            tb_log_name=args.tb_log_name,
            train_total_timesteps=args.train_total_timesteps,
            n_eval_episodes=args.n_eval_episodes,
            render=args.render,
            num_envs=args.num_envs,
예제 #5
0
    def __init__(
        self,
        agent: AbstractAgent,
        num_iterations: int,
        algo_name: str,
        env_name: str,
        tb_log_name: str,
        continue_learning_suffix: str,
        env_variables: EnvVariables,
        param_names=None,
        runs_for_probability_estimation: int = 1,
        buffer_file: str = None,
        archive_file: str = None,
        executions_skipped_file: str = None,
        parallelize_search: bool = False,
        monitor_search_every: bool = False,
        binary_search_epsilon: float = 0.05,
        start_search_time: float = None,
        starting_progress_report_number: int = 0,
        stop_at_first_iteration: bool = False,
        exp_suffix: str = None,
    ):
        assert agent, "agent should have a value: {}".format(agent)
        assert algo_name, "algo_name should have a value: {}".format(algo_name)
        assert env_name, "env_name should have a value: {}".format(env_name)

        self.agent = agent
        self.num_iterations = num_iterations
        self.init_env_variables = env_variables
        self.previous_num_iterations = None
        self.start_time = time.time()
        self.logger = Log("Random")
        self.param_names = param_names
        self.all_params = env_variables.instantiate_env()
        self.runs_for_probability_estimation = runs_for_probability_estimation
        self.buffer_file = buffer_file
        self.archive_file = archive_file
        self.parallelize_search = parallelize_search
        self.stop_at_first_iteration = stop_at_first_iteration
        self.exp_suffix = exp_suffix

        if param_names:
            self.param_names_string = "_".join(param_names)

        # TODO: refactor buffer restoring in abstract class extended by search algo
        #  (for now only random search and alphatest)
        if buffer_file:
            previously_saved_buffer = read_saved_buffer(
                buffer_file=buffer_file)
            index_last_slash = buffer_file.rindex("/")

            self.algo_save_dir = buffer_file[:index_last_slash]
            self.logger.debug(
                "Algo save dir from restored execution: {}".format(
                    self.algo_save_dir))
            self.buffer_env_predicate_pairs = BufferEnvPredicatePairs(
                save_dir=self.algo_save_dir)
            self.archive = Archive(save_dir=self.algo_save_dir,
                                   epsilon=binary_search_epsilon)

            # restore buffer
            for buffer_item in previously_saved_buffer:
                previous_env_variables = instantiate_env_variables(
                    algo_name=algo_name,
                    discrete_action_space=self.
                    all_params["discrete_action_space"],
                    env_name=env_name,
                    param_names=param_names,
                    env_values=buffer_item.get_env_values(),
                )
                self.buffer_env_predicate_pairs.append(
                    EnvPredicatePair(
                        env_variables=previous_env_variables,
                        pass_probability=buffer_item.get_pass_probability(),
                        predicate=buffer_item.is_predicate(),
                        regression_probability=buffer_item.
                        get_regression_probability(),
                        probability_estimation_runs=buffer_item.
                        get_probability_estimation_runs(),
                        regression_estimation_runs=buffer_item.
                        get_regression_estimation_runs(),
                        model_dirs=buffer_item.get_model_dirs(),
                    ))
            assert archive_file, (
                "when buffer file is available so needs to be the archive file to "
                "restore a previous execution")
            try:
                previous_num_iterations_buffer = get_result_file_iteration_number(
                    filename=buffer_file)
                previous_num_iterations_archive = get_result_file_iteration_number(
                    filename=archive_file)
                assert (previous_num_iterations_buffer ==
                        previous_num_iterations_archive
                        ), "The two nums must coincide: {}, {}".format(
                            previous_num_iterations_buffer,
                            previous_num_iterations_archive)
                previous_num_iterations = previous_num_iterations_buffer + 1
            except ValueError as e:
                raise ValueError(e)

            self.previous_num_iterations = previous_num_iterations
            self.logger.info(
                "Restore previous execution of {} iterations.".format(
                    previous_num_iterations))

            # restore archive
            previously_saved_archive = read_saved_archive(
                archive_file=archive_file)
            t_env_variables = None
            f_env_variables = None
            for env_values, predicate in previously_saved_archive:
                all_params = env_variables.instantiate_env()
                previous_env_variables = instantiate_env_variables(
                    algo_name=algo_name,
                    discrete_action_space=all_params["discrete_action_space"],
                    env_name=env_name,
                    param_names=param_names,
                    env_values=env_values,
                )
                if predicate:
                    t_env_variables = previous_env_variables
                else:
                    f_env_variables = previous_env_variables

                if t_env_variables and f_env_variables:
                    self.archive.append(t_env_variables=t_env_variables,
                                        f_env_variables=f_env_variables)
                    t_env_variables = None
                    f_env_variables = None

                # restore executions skipped
                previously_saved_executions_skipped = read_saved_buffer_executions_skipped(
                    buffer_executions_skipped_file=executions_skipped_file)
                for buffer_executions_skipped_item in previously_saved_executions_skipped:
                    previous_env_variables_skipped = instantiate_env_variables(
                        algo_name=algo_name,
                        discrete_action_space=self.
                        all_params["discrete_action_space"],
                        env_name=env_name,
                        param_names=param_names,
                        env_values=buffer_executions_skipped_item.
                        env_values_skipped,
                    )
                    env_predicate_pair_skipped = EnvPredicatePair(
                        env_variables=previous_env_variables_skipped,
                        predicate=buffer_executions_skipped_item.predicate)
                    previous_env_variables_executed = instantiate_env_variables(
                        algo_name=algo_name,
                        discrete_action_space=self.
                        all_params["discrete_action_space"],
                        env_name=env_name,
                        param_names=param_names,
                        env_values=buffer_executions_skipped_item.
                        env_values_executed,
                    )
                    env_predicate_pair_executed = EnvPredicatePair(
                        env_variables=previous_env_variables_executed,
                        predicate=buffer_executions_skipped_item.predicate)
                    self.buffer_executions_skipped.append(
                        ExecutionSkipped(
                            env_predicate_pair_skipped=
                            env_predicate_pair_skipped,
                            env_predicate_pair_executed=
                            env_predicate_pair_executed,
                            search_component=buffer_executions_skipped_item.
                            search_component,
                        ))
        else:
            attempt = 0

            suffix = "n_iterations_"
            if self.param_names:
                suffix += self.param_names_string + "_"
            if self.exp_suffix:
                suffix += self.exp_suffix + "_"
            suffix += str(num_iterations)

            algo_save_dir = os.path.abspath(HOME + "/random/" + env_name +
                                            "/" + algo_name + "/" + suffix +
                                            "_" + str(attempt))
            _algo_save_dir = algo_save_dir
            while os.path.exists(_algo_save_dir):
                attempt += 1
                _algo_save_dir = algo_save_dir[:-1] + str(attempt)
            self.algo_save_dir = _algo_save_dir
            os.makedirs(self.algo_save_dir)
            self.buffer_env_predicate_pairs = BufferEnvPredicatePairs(
                save_dir=self.algo_save_dir)
            # assuming initial env_variables satisfies the predicate of adequate performance
            if self.runs_for_probability_estimation:
                env_predicate_pair = EnvPredicatePair(
                    env_variables=self.init_env_variables,
                    predicate=True,
                    probability_estimation_runs=[True] *
                    self.runs_for_probability_estimation,
                )
            else:
                env_predicate_pair = EnvPredicatePair(
                    env_variables=self.init_env_variables, predicate=True)
            self.buffer_env_predicate_pairs.append(env_predicate_pair)
            self.buffer_executions_skipped = BufferExecutionsSkipped(
                save_dir=self.algo_save_dir)
            self.archive = Archive(save_dir=self.algo_save_dir,
                                   epsilon=binary_search_epsilon)

        self.env_name = env_name
        self.algo_name = algo_name
        self.tb_log_name = tb_log_name
        self.continue_learning_suffix = continue_learning_suffix
        self.binary_search_epsilon = binary_search_epsilon

        self.runner = Runner(
            agent=self.agent,
            runs_for_probability_estimation=self.
            runs_for_probability_estimation,
        )

        self.monitor_search_every = monitor_search_every
        self.monitor_progress = None
        if self.monitor_search_every != -1 and self.monitor_search_every > 0:
            self.monitor_progress = MonitorProgress(
                algo_name=self.algo_name,
                env_name=standardize_env_name(env_name=self.env_name),
                results_dir=self.algo_save_dir,
                param_names_string=self.param_names_string,
                search_type="random",
                start_search_time=start_search_time,
                starting_progress_report_number=starting_progress_report_number,
            )