def is_frontier_pair(t_env_variables: EnvVariables, f_env_variables: EnvVariables, epsilon: float, dist: float = None) -> bool: return is_frontier_pair_values(t_env_values=t_env_variables.get_values(), f_env_values=f_env_variables.get_values(), epsilon=epsilon, dist=dist)
def compute_inverse_dist_random_search(env_variables: EnvVariables, index_param: int, epsilon: float) -> List[float]: num_params = len(env_variables.get_params()) env_value = env_variables.get_param(index=index_param).get_current_value() sol1 = (env_value * (-epsilon) * num_params - 2 * env_value) / (epsilon * num_params - 2) sol2 = (2 * env_value - env_value * epsilon * num_params) / (epsilon * num_params + 2) return [sol1, sol2]
def get_predicate_of_evaluated_env(self, evaluated_env: EnvVariables) -> bool: for env_predicate_pair in self.env_predicate_pairs: evaluated_env_variables = env_predicate_pair.get_env_variables() if evaluated_env_variables.is_equal(evaluated_env): return env_predicate_pair.is_predicate() raise AttributeError("{} must be evaluated".format( evaluated_env.get_params_string()))
def is_already_evaluated(self, candidate_env_variables: EnvVariables) -> bool: for env_predicate_pair in self.env_predicate_pairs: evaluated_env_variables = env_predicate_pair.get_env_variables() if evaluated_env_variables.is_equal(candidate_env_variables): self.logger.debug("Env {} was already evaluated".format( candidate_env_variables.get_params_string())) return True return False
def _find_closest_env(possible_envs_dict: Dict, env_to_search: EnvVariables) -> EnvExecDetails: min_distance = np.inf closest_env = None for possible_env in list(itertools.chain(*possible_envs_dict.values())): dist = np.linalg.norm( np.asarray(possible_env.get_env_values()) - np.asarray(env_to_search.get_values())) if dist < min_distance: closest_env = copy.deepcopy(possible_env) min_distance = dist return closest_env
def execute_train( agent: AbstractAgent, current_iteration: int, search_suffix: str, current_env_variables: EnvVariables, _start_time: float, random_search: bool = False, ) -> Tuple[EnvPredicatePair, float, float]: env_predicate_pairs = [] communication_queue = Queue() logger = Log("execute_train") # agent.train sets seed globally (for tf, np and random) seed = np.random.randint(2 ** 32 - 1) # order of argument matters in the args param; must match the order of args in the train method of agent thread = threading.Thread( target=agent.train, args=(seed, communication_queue, current_iteration, search_suffix, current_env_variables, random_search,), ) thread.start() sum_training_time = 0.0 sum_regression_time = 0.0 while True: data: ExecutionResult = communication_queue.get() # blocking code logger.debug( "Env: {}, evaluates to {}".format(current_env_variables.get_params_string(), data.is_adequate_performance(),) ) logger.debug("Info: {}".format(data.get_info())) env_predicate_pairs.append( EnvPredicatePair( env_variables=current_env_variables, predicate=data.is_adequate_performance(), regression=data.is_regression(), execution_info=data.get_info(), model_dirs=[search_suffix], ) ) sum_regression_time += data.get_regression_time() sum_training_time += data.get_training_time() if data.is_task_completed(): break while thread.is_alive(): time.sleep(1.0) logger.info("TIME ELAPSED: {}".format(str(datetime.timedelta(seconds=(time.time() - _start_time))))) return env_predicate_pairs[-1], sum_training_time, sum_regression_time
def append(self, t_env_variables: EnvVariables, f_env_variables: EnvVariables) -> bool: assert is_frontier_pair( t_env_variables=t_env_variables, f_env_variables=f_env_variables, epsilon=self.epsilon ), "The pair t_env: {} - f_env: {} is not a frontier pair since its distance {} is > {}".format( t_env_variables.get_params_string(), f_env_variables.get_params_string(), compute_dist(t_env_variables=t_env_variables, f_env_variables=f_env_variables), self.epsilon, ) candidate_frontier_pair = FrontierPair(t_env_variables, f_env_variables) for frontier_pair in self.frontier_pairs: if frontier_pair.is_equal(candidate_frontier_pair): return False self.logger.info( "New frontier pair found. t_env: {}, f_env: {}".format( t_env_variables.get_params_string(), f_env_variables.get_params_string())) self.frontier_pairs.append(candidate_frontier_pair) return True
def test_with_callback(self, seed, env_variables: EnvVariables, n_eval_episodes: int = None) -> EnvPredicatePair: assert self.env_eval_callback, "env_eval_callback should be instantiated" self._set_global_seed(seed=seed) self.logger.debug("env_variables: {}".format(env_variables.get_params_string())) best_model_save_path, tensorboard_log_dir = self._preprocess_storage_dirs() if self.algo_hyperparams: self.logger.debug("Overriding file specified hyperparams with {}".format(eval(self.algo_hyperparams))) hyperparams = eval(self.algo_hyperparams) else: hyperparams = load_hyperparams(algo_name=self.algo_name, env_name=self.env_name) normalize_kwargs = _parse_normalize(dictionary=hyperparams) eval_env = make_custom_env( seed=seed, sb_version=self.sb_version, env_kwargs=env_variables, algo_name=self.algo_name, env_name=self.env_name, normalize_kwargs=normalize_kwargs, log_dir=best_model_save_path, evaluate=True, continue_learning_suffix=self.continue_learning_suffix, ) model = self.create_model( seed=seed, algo_name=self.algo_name, env=eval_env, tensorboard_log_dir=tensorboard_log_dir, hyperparams=hyperparams, best_model_save_path=best_model_save_path, model_to_load=self.model_to_load, env_name=self.env_name, ) n_eval_episodes_to_run = n_eval_episodes if n_eval_episodes else self.n_eval_episodes adequate_performance, info = self.env_eval_callback.evaluate_env( model=model, env=eval_env, n_eval_episodes=n_eval_episodes_to_run, sb_version=self.sb_version, ) return EnvPredicatePair(env_variables=env_variables, predicate=adequate_performance, execution_info=info,)
def make_custom_env( seed, sb_version, env_kwargs: EnvVariables = None, env_name="CartPole-v1", continue_learning=False, log_dir=None, algo_name="ppo2", evaluate=False, evaluate_during_learning=False, normalize_kwargs=None, continue_learning_suffix="continue_learning", ): orig_log_dir = log_dir if continue_learning and log_dir: log_dir = log_dir + "_" + continue_learning_suffix + "/" if normalize_kwargs is None: normalize_kwargs = {} info_keywords = () if env_name == "CartPole-v1": cartpole_env_params = env_kwargs.instantiate_env() env = CartPoleEnvWrapper(**cartpole_env_params) env = TimeLimit(env, max_episode_steps=500) elif env_name == "Pendulum-v0": pendulum_env_params = env_kwargs.instantiate_env() env = PendulumEnvWrapper(**pendulum_env_params) env = TimeLimit(env, max_episode_steps=200) elif env_name == "MountainCar-v0" and algo_name != "sac": mountaincar_env_params = env_kwargs.instantiate_env() env = MountainCarEnvWrapper(**mountaincar_env_params) env = TimeLimit(env, max_episode_steps=200) elif env_name == "MountainCar-v0" and algo_name == "sac": mountaincar_env_params = env_kwargs.instantiate_env() env = MountainCarEnvWrapper(**mountaincar_env_params) env = TimeLimit(env, max_episode_steps=999) elif env_name == "Acrobot-v1": acrobot_env_params = env_kwargs.instantiate_env() env = AcrobotEnvWrapper(**acrobot_env_params) env = TimeLimit(env, max_episode_steps=500) else: env = gym.make(env_name) if log_dir is not None and not evaluate: log_file = os.path.join(log_dir, "0") logger.debug("Saving monitor files in {}".format(log_file)) env = Monitor(env, log_file, info_keywords=info_keywords) if len(normalize_kwargs) > 0: env = normalize_env( env=env, sb_version=sb_version, orig_log_dir=orig_log_dir, continue_learning=continue_learning, evaluate=evaluate, evaluate_during_learning=evaluate_during_learning, normalize_kwargs=normalize_kwargs, ) if (len(normalize_kwargs) == 0 and not evaluate_during_learning and ((evaluate and algo_name == "ppo2") or (continue_learning and algo_name == "ppo2"))): env = DummyVecEnv([lambda: env]) env.seed(seed) return env
def get_binary_search_candidate( t_env_variables: EnvVariables, f_env_variables: EnvVariables, algo_name: str, env_name: str, param_names, discrete_action_space: bool, buffer_env_predicate_pairs: BufferEnvPredicatePairs, ) -> EnvVariables: original_max_iterations = 50 logger = Log("get_binary_search_candidate") max_number_iterations = original_max_iterations candidate_new_env_variables = copy.deepcopy(t_env_variables) while True: # compute all possible combinations of environments candidates_dict = dict() t_f_env_variables = random.choice([(t_env_variables, True), (f_env_variables, False)]) for i in range(len(t_env_variables.get_params())): new_value = ( t_env_variables.get_param(index=i).get_current_value() + f_env_variables.get_param(index=i).get_current_value()) / 2 if i not in candidates_dict: candidates_dict[i] = [] if (t_env_variables.get_param(index=i).get_current_value() != f_env_variables.get_param(index=i).get_current_value()): candidates_dict[i].append(new_value) for index in range(len(t_env_variables.get_params())): if index not in candidates_dict: candidates_dict[index] = [] if index != i: candidates_dict[index].append( t_f_env_variables[0].get_values()[index]) all_candidates = list( itertools.product(*list(candidates_dict.values()))) logger.info("t_env: {}, f_env: {}".format( t_env_variables.get_params_string(), f_env_variables.get_params_string())) logger.info("all candidates binary search: {}".format(all_candidates)) all_candidates_env_variables_filtered = [] all_candidates_env_variables = [] for candidate_values in all_candidates: env_values = dict() for i in range(len(t_f_env_variables[0].get_params())): param_name = t_f_env_variables[0].get_param(index=i).get_name() env_values[param_name] = candidate_values[i] candidate_env_variables = instantiate_env_variables( algo_name=algo_name, discrete_action_space=discrete_action_space, env_name=env_name, param_names=param_names, env_values=env_values, ) # do not consider candidate = t_f_env_variables if not candidate_env_variables.is_equal( t_env_variables) and not candidate_env_variables.is_equal( f_env_variables): if not buffer_env_predicate_pairs.is_already_evaluated( candidate_env_variables=candidate_env_variables): all_candidates_env_variables_filtered.append( candidate_env_variables) all_candidates_env_variables.append(candidate_env_variables) if len(all_candidates_env_variables_filtered) > 0: candidate_new_env_variables = random.choice( all_candidates_env_variables_filtered) break else: assert len( all_candidates ) > 0, "there must be at least one candidate env for binary search" candidate_env_variables_already_evaluated = random.choice( all_candidates_env_variables_filtered) if t_f_env_variables[1]: t_env_variables = copy.deepcopy( candidate_env_variables_already_evaluated) else: f_env_variables = copy.deepcopy( candidate_env_variables_already_evaluated) max_number_iterations -= 1 if max_number_iterations == 0: break assert max_number_iterations > 0, "Could not binary mutate any param of envs {} and {} in {} steps".format( t_env_variables.get_params_string(), f_env_variables.get_params_string(), str(original_max_iterations)) assert not candidate_new_env_variables.is_equal( t_env_variables ) and not candidate_new_env_variables.is_equal( f_env_variables ), "candidate_env_variables {} must be different than t_env_variables {} and f_env_variables {}".format( candidate_new_env_variables.get_params_string(), t_env_variables.get_params_string(), f_env_variables.get_params_string(), ) return candidate_new_env_variables
def compute_dist(t_env_variables: EnvVariables, f_env_variables: EnvVariables) -> float: return compute_dist_values(t_env_values=t_env_variables.get_values(), f_env_values=f_env_variables.get_values())
def __init__( self, agent: AbstractAgent, num_iterations: int, algo_name: str, env_name: str, tb_log_name: str, continue_learning_suffix: str, env_variables: EnvVariables, param_names=None, runs_for_probability_estimation: int = 1, buffer_file: str = None, archive_file: str = None, executions_skipped_file: str = None, parallelize_search: bool = False, monitor_search_every: bool = False, binary_search_epsilon: float = 0.05, start_search_time: float = None, starting_progress_report_number: int = 0, stop_at_first_iteration: bool = False, exp_suffix: str = None, ): assert agent, "agent should have a value: {}".format(agent) assert algo_name, "algo_name should have a value: {}".format(algo_name) assert env_name, "env_name should have a value: {}".format(env_name) self.agent = agent self.num_iterations = num_iterations self.init_env_variables = env_variables self.previous_num_iterations = None self.start_time = time.time() self.logger = Log("Random") self.param_names = param_names self.all_params = env_variables.instantiate_env() self.runs_for_probability_estimation = runs_for_probability_estimation self.buffer_file = buffer_file self.archive_file = archive_file self.parallelize_search = parallelize_search self.stop_at_first_iteration = stop_at_first_iteration self.exp_suffix = exp_suffix if param_names: self.param_names_string = "_".join(param_names) # TODO: refactor buffer restoring in abstract class extended by search algo # (for now only random search and alphatest) if buffer_file: previously_saved_buffer = read_saved_buffer( buffer_file=buffer_file) index_last_slash = buffer_file.rindex("/") self.algo_save_dir = buffer_file[:index_last_slash] self.logger.debug( "Algo save dir from restored execution: {}".format( self.algo_save_dir)) self.buffer_env_predicate_pairs = BufferEnvPredicatePairs( save_dir=self.algo_save_dir) self.archive = Archive(save_dir=self.algo_save_dir, epsilon=binary_search_epsilon) # restore buffer for buffer_item in previously_saved_buffer: previous_env_variables = instantiate_env_variables( algo_name=algo_name, discrete_action_space=self. all_params["discrete_action_space"], env_name=env_name, param_names=param_names, env_values=buffer_item.get_env_values(), ) self.buffer_env_predicate_pairs.append( EnvPredicatePair( env_variables=previous_env_variables, pass_probability=buffer_item.get_pass_probability(), predicate=buffer_item.is_predicate(), regression_probability=buffer_item. get_regression_probability(), probability_estimation_runs=buffer_item. get_probability_estimation_runs(), regression_estimation_runs=buffer_item. get_regression_estimation_runs(), model_dirs=buffer_item.get_model_dirs(), )) assert archive_file, ( "when buffer file is available so needs to be the archive file to " "restore a previous execution") try: previous_num_iterations_buffer = get_result_file_iteration_number( filename=buffer_file) previous_num_iterations_archive = get_result_file_iteration_number( filename=archive_file) assert (previous_num_iterations_buffer == previous_num_iterations_archive ), "The two nums must coincide: {}, {}".format( previous_num_iterations_buffer, previous_num_iterations_archive) previous_num_iterations = previous_num_iterations_buffer + 1 except ValueError as e: raise ValueError(e) self.previous_num_iterations = previous_num_iterations self.logger.info( "Restore previous execution of {} iterations.".format( previous_num_iterations)) # restore archive previously_saved_archive = read_saved_archive( archive_file=archive_file) t_env_variables = None f_env_variables = None for env_values, predicate in previously_saved_archive: all_params = env_variables.instantiate_env() previous_env_variables = instantiate_env_variables( algo_name=algo_name, discrete_action_space=all_params["discrete_action_space"], env_name=env_name, param_names=param_names, env_values=env_values, ) if predicate: t_env_variables = previous_env_variables else: f_env_variables = previous_env_variables if t_env_variables and f_env_variables: self.archive.append(t_env_variables=t_env_variables, f_env_variables=f_env_variables) t_env_variables = None f_env_variables = None # restore executions skipped previously_saved_executions_skipped = read_saved_buffer_executions_skipped( buffer_executions_skipped_file=executions_skipped_file) for buffer_executions_skipped_item in previously_saved_executions_skipped: previous_env_variables_skipped = instantiate_env_variables( algo_name=algo_name, discrete_action_space=self. all_params["discrete_action_space"], env_name=env_name, param_names=param_names, env_values=buffer_executions_skipped_item. env_values_skipped, ) env_predicate_pair_skipped = EnvPredicatePair( env_variables=previous_env_variables_skipped, predicate=buffer_executions_skipped_item.predicate) previous_env_variables_executed = instantiate_env_variables( algo_name=algo_name, discrete_action_space=self. all_params["discrete_action_space"], env_name=env_name, param_names=param_names, env_values=buffer_executions_skipped_item. env_values_executed, ) env_predicate_pair_executed = EnvPredicatePair( env_variables=previous_env_variables_executed, predicate=buffer_executions_skipped_item.predicate) self.buffer_executions_skipped.append( ExecutionSkipped( env_predicate_pair_skipped= env_predicate_pair_skipped, env_predicate_pair_executed= env_predicate_pair_executed, search_component=buffer_executions_skipped_item. search_component, )) else: attempt = 0 suffix = "n_iterations_" if self.param_names: suffix += self.param_names_string + "_" if self.exp_suffix: suffix += self.exp_suffix + "_" suffix += str(num_iterations) algo_save_dir = os.path.abspath(HOME + "/random/" + env_name + "/" + algo_name + "/" + suffix + "_" + str(attempt)) _algo_save_dir = algo_save_dir while os.path.exists(_algo_save_dir): attempt += 1 _algo_save_dir = algo_save_dir[:-1] + str(attempt) self.algo_save_dir = _algo_save_dir os.makedirs(self.algo_save_dir) self.buffer_env_predicate_pairs = BufferEnvPredicatePairs( save_dir=self.algo_save_dir) # assuming initial env_variables satisfies the predicate of adequate performance if self.runs_for_probability_estimation: env_predicate_pair = EnvPredicatePair( env_variables=self.init_env_variables, predicate=True, probability_estimation_runs=[True] * self.runs_for_probability_estimation, ) else: env_predicate_pair = EnvPredicatePair( env_variables=self.init_env_variables, predicate=True) self.buffer_env_predicate_pairs.append(env_predicate_pair) self.buffer_executions_skipped = BufferExecutionsSkipped( save_dir=self.algo_save_dir) self.archive = Archive(save_dir=self.algo_save_dir, epsilon=binary_search_epsilon) self.env_name = env_name self.algo_name = algo_name self.tb_log_name = tb_log_name self.continue_learning_suffix = continue_learning_suffix self.binary_search_epsilon = binary_search_epsilon self.runner = Runner( agent=self.agent, runs_for_probability_estimation=self. runs_for_probability_estimation, ) self.monitor_search_every = monitor_search_every self.monitor_progress = None if self.monitor_search_every != -1 and self.monitor_search_every > 0: self.monitor_progress = MonitorProgress( algo_name=self.algo_name, env_name=standardize_env_name(env_name=self.env_name), results_dir=self.algo_save_dir, param_names_string=self.param_names_string, search_type="random", start_search_time=start_search_time, starting_progress_report_number=starting_progress_report_number, )
def dominance_analysis( self, candidate_env_variables: EnvVariables, predicate_to_consider: bool = True ) -> Union[EnvPredicatePair, None]: assert not self.is_already_evaluated( candidate_env_variables=candidate_env_variables ), "Env {} must not be evaluated".format( candidate_env_variables.get_params_string()) executed_env_dominate = None if predicate_to_consider: # searching for an executed env that evaluates to True that dominates the env passed as parameter for env_predicate_pair in self.env_predicate_pairs: predicate = env_predicate_pair.is_predicate() if predicate: dominates = True for i in range( len(env_predicate_pair.get_env_variables(). get_params())): direction = env_predicate_pair.get_env_variables( ).get_param(index=i).get_direction() starting_multiplier = ( env_predicate_pair.get_env_variables().get_param( index=i).get_starting_multiplier()) assert direction == "positive", "unknown and negative direction is not supported" env_value = env_predicate_pair.get_env_variables( ).get_param(index=i).get_current_value() other_env_value = candidate_env_variables.get_param( index=i).get_current_value() if direction == "positive" and starting_multiplier > 1.0: if env_value < other_env_value: dominates = False elif direction == "positive" and starting_multiplier < 1.0: if env_value > other_env_value: dominates = False if dominates: executed_env_dominate = env_predicate_pair self.logger.debug( "candidate {} dominated by executed env {} that evaluates to {}" .format( candidate_env_variables.get_params_string(), env_predicate_pair.get_env_variables(). get_params_string(), predicate, )) else: # searching for an executed env that evaluates to False that is dominated by the env passed as parameter for env_predicate_pair in self.env_predicate_pairs: predicate = env_predicate_pair.is_predicate() if not predicate: is_dominated = True for i in range( len(env_predicate_pair.get_env_variables(). get_params())): direction = env_predicate_pair.get_env_variables( ).get_param(index=i).get_direction() starting_multiplier = ( env_predicate_pair.get_env_variables().get_param( index=i).get_starting_multiplier()) assert direction == "positive", "unknown and negative direction is not supported" env_value = env_predicate_pair.get_env_variables( ).get_param(index=i).get_current_value() other_env_value = candidate_env_variables.get_param( index=i).get_current_value() if direction == "positive" and starting_multiplier > 1.0: if other_env_value < env_value: is_dominated = False elif direction == "positive" and starting_multiplier < 1.0: if other_env_value > env_value: is_dominated = False if is_dominated: executed_env_dominate = env_predicate_pair self.logger.debug( "candidate {} dominates executed env {} that evaluates to {}" .format( candidate_env_variables.get_params_string(), env_predicate_pair.get_env_variables(). get_params_string(), not predicate, )) return executed_env_dominate