Beispiel #1
0
 def get_predicate_of_evaluated_env(self,
                                    evaluated_env: EnvVariables) -> bool:
     for env_predicate_pair in self.env_predicate_pairs:
         evaluated_env_variables = env_predicate_pair.get_env_variables()
         if evaluated_env_variables.is_equal(evaluated_env):
             return env_predicate_pair.is_predicate()
     raise AttributeError("{} must be evaluated".format(
         evaluated_env.get_params_string()))
Beispiel #2
0
 def is_already_evaluated(self,
                          candidate_env_variables: EnvVariables) -> bool:
     for env_predicate_pair in self.env_predicate_pairs:
         evaluated_env_variables = env_predicate_pair.get_env_variables()
         if evaluated_env_variables.is_equal(candidate_env_variables):
             self.logger.debug("Env {} was already evaluated".format(
                 candidate_env_variables.get_params_string()))
             return True
     return False
def execute_train(
    agent: AbstractAgent,
    current_iteration: int,
    search_suffix: str,
    current_env_variables: EnvVariables,
    _start_time: float,
    random_search: bool = False,
) -> Tuple[EnvPredicatePair, float, float]:

    env_predicate_pairs = []
    communication_queue = Queue()
    logger = Log("execute_train")

    # agent.train sets seed globally (for tf, np and random)
    seed = np.random.randint(2 ** 32 - 1)

    # order of argument matters in the args param; must match the order of args in the train method of agent
    thread = threading.Thread(
        target=agent.train,
        args=(seed, communication_queue, current_iteration, search_suffix, current_env_variables, random_search,),
    )
    thread.start()
    sum_training_time = 0.0
    sum_regression_time = 0.0
    while True:
        data: ExecutionResult = communication_queue.get()  # blocking code
        logger.debug(
            "Env: {}, evaluates to {}".format(current_env_variables.get_params_string(), data.is_adequate_performance(),)
        )
        logger.debug("Info: {}".format(data.get_info()))
        env_predicate_pairs.append(
            EnvPredicatePair(
                env_variables=current_env_variables,
                predicate=data.is_adequate_performance(),
                regression=data.is_regression(),
                execution_info=data.get_info(),
                model_dirs=[search_suffix],
            )
        )
        sum_regression_time += data.get_regression_time()
        sum_training_time += data.get_training_time()
        if data.is_task_completed():
            break

    while thread.is_alive():
        time.sleep(1.0)

    logger.info("TIME ELAPSED: {}".format(str(datetime.timedelta(seconds=(time.time() - _start_time)))))

    return env_predicate_pairs[-1], sum_training_time, sum_regression_time
Beispiel #4
0
 def append(self, t_env_variables: EnvVariables,
            f_env_variables: EnvVariables) -> bool:
     assert is_frontier_pair(
         t_env_variables=t_env_variables,
         f_env_variables=f_env_variables,
         epsilon=self.epsilon
     ), "The pair t_env: {} - f_env: {} is not a frontier pair since its distance {} is > {}".format(
         t_env_variables.get_params_string(),
         f_env_variables.get_params_string(),
         compute_dist(t_env_variables=t_env_variables,
                      f_env_variables=f_env_variables),
         self.epsilon,
     )
     candidate_frontier_pair = FrontierPair(t_env_variables,
                                            f_env_variables)
     for frontier_pair in self.frontier_pairs:
         if frontier_pair.is_equal(candidate_frontier_pair):
             return False
     self.logger.info(
         "New frontier pair found. t_env: {}, f_env: {}".format(
             t_env_variables.get_params_string(),
             f_env_variables.get_params_string()))
     self.frontier_pairs.append(candidate_frontier_pair)
     return True
Beispiel #5
0
    def test_with_callback(self, seed, env_variables: EnvVariables, n_eval_episodes: int = None) -> EnvPredicatePair:

        assert self.env_eval_callback, "env_eval_callback should be instantiated"

        self._set_global_seed(seed=seed)

        self.logger.debug("env_variables: {}".format(env_variables.get_params_string()))

        best_model_save_path, tensorboard_log_dir = self._preprocess_storage_dirs()

        if self.algo_hyperparams:
            self.logger.debug("Overriding file specified hyperparams with {}".format(eval(self.algo_hyperparams)))
            hyperparams = eval(self.algo_hyperparams)
        else:
            hyperparams = load_hyperparams(algo_name=self.algo_name, env_name=self.env_name)

        normalize_kwargs = _parse_normalize(dictionary=hyperparams)

        eval_env = make_custom_env(
            seed=seed,
            sb_version=self.sb_version,
            env_kwargs=env_variables,
            algo_name=self.algo_name,
            env_name=self.env_name,
            normalize_kwargs=normalize_kwargs,
            log_dir=best_model_save_path,
            evaluate=True,
            continue_learning_suffix=self.continue_learning_suffix,
        )
        model = self.create_model(
            seed=seed,
            algo_name=self.algo_name,
            env=eval_env,
            tensorboard_log_dir=tensorboard_log_dir,
            hyperparams=hyperparams,
            best_model_save_path=best_model_save_path,
            model_to_load=self.model_to_load,
            env_name=self.env_name,
        )

        n_eval_episodes_to_run = n_eval_episodes if n_eval_episodes else self.n_eval_episodes
        adequate_performance, info = self.env_eval_callback.evaluate_env(
            model=model, env=eval_env, n_eval_episodes=n_eval_episodes_to_run, sb_version=self.sb_version,
        )
        return EnvPredicatePair(env_variables=env_variables, predicate=adequate_performance, execution_info=info,)
def get_binary_search_candidate(
    t_env_variables: EnvVariables,
    f_env_variables: EnvVariables,
    algo_name: str,
    env_name: str,
    param_names,
    discrete_action_space: bool,
    buffer_env_predicate_pairs: BufferEnvPredicatePairs,
) -> EnvVariables:
    original_max_iterations = 50
    logger = Log("get_binary_search_candidate")
    max_number_iterations = original_max_iterations

    candidate_new_env_variables = copy.deepcopy(t_env_variables)

    while True:

        # compute all possible combinations of environments
        candidates_dict = dict()
        t_f_env_variables = random.choice([(t_env_variables, True),
                                           (f_env_variables, False)])

        for i in range(len(t_env_variables.get_params())):
            new_value = (
                t_env_variables.get_param(index=i).get_current_value() +
                f_env_variables.get_param(index=i).get_current_value()) / 2
            if i not in candidates_dict:
                candidates_dict[i] = []
            if (t_env_variables.get_param(index=i).get_current_value() !=
                    f_env_variables.get_param(index=i).get_current_value()):
                candidates_dict[i].append(new_value)
            for index in range(len(t_env_variables.get_params())):
                if index not in candidates_dict:
                    candidates_dict[index] = []
                if index != i:
                    candidates_dict[index].append(
                        t_f_env_variables[0].get_values()[index])

        all_candidates = list(
            itertools.product(*list(candidates_dict.values())))
        logger.info("t_env: {}, f_env: {}".format(
            t_env_variables.get_params_string(),
            f_env_variables.get_params_string()))
        logger.info("all candidates binary search: {}".format(all_candidates))
        all_candidates_env_variables_filtered = []
        all_candidates_env_variables = []
        for candidate_values in all_candidates:
            env_values = dict()
            for i in range(len(t_f_env_variables[0].get_params())):
                param_name = t_f_env_variables[0].get_param(index=i).get_name()
                env_values[param_name] = candidate_values[i]
            candidate_env_variables = instantiate_env_variables(
                algo_name=algo_name,
                discrete_action_space=discrete_action_space,
                env_name=env_name,
                param_names=param_names,
                env_values=env_values,
            )
            # do not consider candidate = t_f_env_variables
            if not candidate_env_variables.is_equal(
                    t_env_variables) and not candidate_env_variables.is_equal(
                        f_env_variables):
                if not buffer_env_predicate_pairs.is_already_evaluated(
                        candidate_env_variables=candidate_env_variables):
                    all_candidates_env_variables_filtered.append(
                        candidate_env_variables)
                all_candidates_env_variables.append(candidate_env_variables)

        if len(all_candidates_env_variables_filtered) > 0:
            candidate_new_env_variables = random.choice(
                all_candidates_env_variables_filtered)
            break
        else:
            assert len(
                all_candidates
            ) > 0, "there must be at least one candidate env for binary search"
            candidate_env_variables_already_evaluated = random.choice(
                all_candidates_env_variables_filtered)
            if t_f_env_variables[1]:
                t_env_variables = copy.deepcopy(
                    candidate_env_variables_already_evaluated)
            else:
                f_env_variables = copy.deepcopy(
                    candidate_env_variables_already_evaluated)

        max_number_iterations -= 1

        if max_number_iterations == 0:
            break

    assert max_number_iterations > 0, "Could not binary mutate any param of envs {} and {} in {} steps".format(
        t_env_variables.get_params_string(),
        f_env_variables.get_params_string(), str(original_max_iterations))

    assert not candidate_new_env_variables.is_equal(
        t_env_variables
    ) and not candidate_new_env_variables.is_equal(
        f_env_variables
    ), "candidate_env_variables {} must be different than t_env_variables {} and f_env_variables {}".format(
        candidate_new_env_variables.get_params_string(),
        t_env_variables.get_params_string(),
        f_env_variables.get_params_string(),
    )

    return candidate_new_env_variables
Beispiel #7
0
    def dominance_analysis(
            self,
            candidate_env_variables: EnvVariables,
            predicate_to_consider: bool = True
    ) -> Union[EnvPredicatePair, None]:
        assert not self.is_already_evaluated(
            candidate_env_variables=candidate_env_variables
        ), "Env {} must not be evaluated".format(
            candidate_env_variables.get_params_string())

        executed_env_dominate = None
        if predicate_to_consider:
            # searching for an executed env that evaluates to True that dominates the env passed as parameter
            for env_predicate_pair in self.env_predicate_pairs:
                predicate = env_predicate_pair.is_predicate()
                if predicate:
                    dominates = True
                    for i in range(
                            len(env_predicate_pair.get_env_variables().
                                get_params())):
                        direction = env_predicate_pair.get_env_variables(
                        ).get_param(index=i).get_direction()
                        starting_multiplier = (
                            env_predicate_pair.get_env_variables().get_param(
                                index=i).get_starting_multiplier())
                        assert direction == "positive", "unknown and negative direction is not supported"
                        env_value = env_predicate_pair.get_env_variables(
                        ).get_param(index=i).get_current_value()
                        other_env_value = candidate_env_variables.get_param(
                            index=i).get_current_value()
                        if direction == "positive" and starting_multiplier > 1.0:
                            if env_value < other_env_value:
                                dominates = False
                        elif direction == "positive" and starting_multiplier < 1.0:
                            if env_value > other_env_value:
                                dominates = False
                    if dominates:
                        executed_env_dominate = env_predicate_pair
                        self.logger.debug(
                            "candidate {} dominated by executed env {} that evaluates to {}"
                            .format(
                                candidate_env_variables.get_params_string(),
                                env_predicate_pair.get_env_variables().
                                get_params_string(),
                                predicate,
                            ))
        else:
            # searching for an executed env that evaluates to False that is dominated by the env passed as parameter
            for env_predicate_pair in self.env_predicate_pairs:
                predicate = env_predicate_pair.is_predicate()
                if not predicate:
                    is_dominated = True
                    for i in range(
                            len(env_predicate_pair.get_env_variables().
                                get_params())):
                        direction = env_predicate_pair.get_env_variables(
                        ).get_param(index=i).get_direction()
                        starting_multiplier = (
                            env_predicate_pair.get_env_variables().get_param(
                                index=i).get_starting_multiplier())
                        assert direction == "positive", "unknown and negative direction is not supported"
                        env_value = env_predicate_pair.get_env_variables(
                        ).get_param(index=i).get_current_value()
                        other_env_value = candidate_env_variables.get_param(
                            index=i).get_current_value()
                        if direction == "positive" and starting_multiplier > 1.0:
                            if other_env_value < env_value:
                                is_dominated = False
                        elif direction == "positive" and starting_multiplier < 1.0:
                            if other_env_value > env_value:
                                is_dominated = False
                    if is_dominated:
                        executed_env_dominate = env_predicate_pair
                        self.logger.debug(
                            "candidate {} dominates executed env {} that evaluates to {}"
                            .format(
                                candidate_env_variables.get_params_string(),
                                env_predicate_pair.get_env_variables().
                                get_params_string(),
                                not predicate,
                            ))

        return executed_env_dominate