def init(self): feature_representation = get_representation( name=self.agent_info.get("representations"), **self.agent_info) self.state_features = np.array([ feature_representation[self.obs[i]] for i in range(self.num_obs) ]).reshape(self.num_obs, -1) self.rl_glue = RLGlue(self.env, self.agent) self.rl_glue.rl_init(self.agent_info, self.env_info)
def init(self): FR = get_representation(name=self.agent_info.get("representations"), **self.agent_info) self.representations = np.array([ FR[self.states[i]] for i in range(len(self.states)) ]).reshape(len(self.states), FR.num_features) if self.experiment_info.get("save_representations"): path = path_exists(self.output_dir / "representations") self.save(path / f"repr_{self.id}", self.representations) self.rl_glue = RLGlue(self.env, self.agent) self.rl_glue.rl_init(self.agent_info, self.env_info)
def test_chain_init(num_states): environment = get_environment(env_info["env"]) env_info["num_states"] = num_states agent_info["num_states"] = num_states agent_info["num_dims"] = num_states agent = get_agent(agent_info["algorithm"]) rl_glue = RLGlue(environment, agent) rl_glue.rl_init(agent_init_info=agent_info, env_init_info=env_info) (last_state, _) = rl_glue.rl_start() assert last_state == num_states // 2
def test_same_walks_per_run_for_each_algorithm(algorithm): runs_with_episodes = { 0: [[2, 1, 2, 3, 2, 3, 4], [2, 3, 4], [2, 3, 2, 1, 2, 1, 0]], 1: [ [2, 3, 4, 3, 2, 3, 4], [2, 3, 4, 3, 2, 3, 2, 3, 4, 3, 2, 3, 2, 1, 0, 1, 0], [2, 3, 2, 1, 0, 1, 0], ], } env_info["log_episodes"] = 1 env_info["num_states"] = 5 agent_info["num_states"] = 5 agent_info["num_dims"] = 5 num_runs = len(runs_with_episodes) for i in range(num_runs): agent_info["algorithm"] = algorithm agent_info["seed"] = i rl_glue = RLGlue( get_environment(env_info["env"]), get_agent(agent_info["algorithm"]) ) rl_glue.rl_init(agent_info, env_info) num_episodes = len(runs_with_episodes[i]) for j in range(num_episodes): rl_glue.rl_episode(0) assert np.array_equiv( runs_with_episodes[i][j], np.array(rl_glue.rl_env_message("get episode")).squeeze(), )
def test_emphasis_reset_at_start_of_episode(algorithm): agent_info["algorithm"] = algorithm agent = get_agent(agent_info["algorithm"]) rl_glue = RLGlue(environment, agent) rl_glue.rl_init(agent_init_info=agent_info, env_init_info=env_info) rl_glue.rl_start() assert rl_glue.rl_agent_message("get emphasis trace") == 0.0
def test_eligibility_trace_reset_at_start_of_episode(algorithm): agent_info["algorithm"] = algorithm agent = get_agent(agent_info["algorithm"]) rl_glue = RLGlue(environment, agent) rl_glue.rl_init(agent_init_info=agent_info, env_init_info=env_info) rl_glue.rl_start() e = rl_glue.rl_agent_message("get eligibility trace") assert np.allclose(e, np.zeros(e.shape[0]))
def test_increasing_steps_over_episodes(algorithm): environment = get_environment(env_info["env"]) agent_info["algorithm"] = algorithm agent = get_agent(agent_info["algorithm"]) rl_glue = RLGlue(environment, agent) rl_glue.rl_init(agent_info, env_info) for episode in range(1, 10): total_timesteps_before_episode = rl_glue.rl_agent_message("get steps") rl_glue.rl_episode(0) total_timesteps_after_episode = rl_glue.rl_agent_message("get steps") assert total_timesteps_after_episode - total_timesteps_before_episode > 0
def test_agent_start(algorithm): agent_info["algorithm"] = algorithm agent = get_agent(agent_info["algorithm"]) rl_glue = RLGlue(environment, agent) rl_glue.rl_init(agent_info, env_info) rl_glue.rl_start() z = rl_glue.rl_agent_message("get eligibility trace") w = rl_glue.rl_agent_message("get weight vector") try: M = rl_glue.rl_agent_message("get emphasis vector") F = rl_glue.rl_agent_message("get weight vector") assert F == 0.0 assert M == 0.0 except Exception: pass assert np.array_equal(z, np.zeros(z.shape[0])) assert np.array_equal(w, np.zeros(w.shape[0]))
def test_constant_emphasis(): agent_info["discount_rate"] = 1.0 agent_info["trace_decay"] = 1.0 agent_info["interest"] = "UI" agent = get_agent("ETDTileCoding") rl_glue = RLGlue(environment, agent) for episode in range(1, 3): rl_glue.rl_init(agent_init_info=agent_info, env_init_info=env_info) rl_glue.rl_episode(0) assert rl_glue.rl_agent_message("get emphasis trace") == 1.0
def test_linear_followon_trace(): agent_info["discount_rate"] = 1.0 agent_info["trace_decay"] = 0.0 agent_info["interest"] = "UI" agent = get_agent("ETDTileCoding") rl_glue = RLGlue(environment, agent) for episode in range(1, 3): rl_glue.rl_init(agent_init_info=agent_info, env_init_info=env_info) rl_glue.rl_episode(0) assert rl_glue.rl_agent_message( "get followon trace") - 1 == rl_glue.num_steps
def simulate_on_policy(**kwargs): env_id = kwargs.get("env") steps = kwargs.get("steps") policy_name = kwargs.get("policy_name") save_rootpath = kwargs.get("save_rootpath") discount_rate = kwargs.get("discount_rate") n_samples = kwargs.get("num_obs") agent_info = { "algorithm": "TDTileCoding", "representations": "TC", "max_x": "0.6,0.07", "min_x": "-1.2,-0.07", "tiles_per_dim": "4,4", "tilings": 5, "discount_rate": discount_rate, "trace_decay": 0.0, "step_size": 0.0001, "seed": 0, "interest": "UI", "policy": policy_name, } env_info = {"env": env_id, "seed": 0} agent = agents.get_agent(agent_info.get("algorithm")) env = envs.get_environment(env_info.get("env")) rl_glue = RLGlue(env, agent) rl_glue.rl_init(agent_info, env_info) last_state, _ = rl_glue.rl_start() states = [] for _ in tqdm(range(steps)): states.append(last_state) reward, last_state, last_action, term = rl_glue.rl_step() if term: last_state, _ = rl_glue.rl_start() states = np.vstack(states) rand_generator = np.random.RandomState(0) idxs = rand_generator.choice( np.arange(steps // 2, steps), size=(n_samples,), replace=False ) states = states[idxs, :] np.save(save_rootpath / "S", states)
def test_same_feature_representation_for_one_trial(representations): agent_info = { "num_states": 19, "algorithm": "ETD", "representations": representations, "num_features": 18, "num_ones": 10, "discount_rate": 0.95, "trace_decay": 0.5, "step_size": 0.0001, "interest": "UI", "policy": "random-chain", } env_info = {"env": "RandomWalk", "num_states": 19} num_states = agent_info.get("num_states") for seed in np.arange(10): agent_info["seed"] = seed states = np.arange(num_states).reshape(-1, 1) RF = get_representation(agent_info.get("representations"), **agent_info) rl_glue = RLGlue( get_environment(env_info["env"]), get_agent(agent_info["algorithm"]) ) random_features = np.vstack([RF[states[i]] for i in range(num_states)]) rl_glue.rl_init(agent_info, env_info) max_steps_this_episode = 0 for i in range(10): is_terminal = False rl_glue.rl_start() while (not is_terminal) and ( (max_steps_this_episode == 0) or (rl_glue.num_steps < max_steps_this_episode) ): rl_step_result = rl_glue.rl_step() is_terminal = rl_step_result[3] last_state = rl_step_result[2] np.array_equiv( rl_glue.agent.FR[last_state], random_features[last_state] )
def calculate_state_distribution(N): agent_info = { "num_states": N, "algorithm": "TD", "representations": "TA", "discount_rate": 1, "trace_decay": 0, "step_size": 0.125, "seed": 0, "interest": "UI", } env_info = {"env": "RandomWalk", "num_states": N} exp_info = { "max_timesteps_episode": 1000000, "episode_eval_freq": 1, "n_episodes": 1, } rl_glue = RLGlue(envs.get_environment(env_info["env"]), agents.get_agent(agent_info["algorithm"])) rl_glue.rl_init(agent_info, env_info) eta = np.zeros(env_info["num_states"]) last_state, _ = rl_glue.rl_start() for _ in tqdm(range(1, int(exp_info["max_timesteps_episode"]) + 1)): eta[last_state] += 1 _, last_state, _, term = rl_glue.rl_step() if term: last_state, _ = rl_glue.rl_start() state_distribution = eta / np.sum(eta) return state_distribution
class RandomWalkExp(BaseExperiment): def __init__(self, agent_info, env_info, experiment_info): super().__init__() self.agent_info = agent_info self.env_info = env_info self.experiment_info = experiment_info self.agent = agents.get_agent(agent_info.get("algorithm")) self.N = env_info["num_states"] self.env = envs.get_environment(env_info.get("env")) self.n_episodes = experiment_info.get("n_episodes") self.episode_eval_freq = experiment_info.get("episode_eval_freq") self.id = experiment_info.get("id") self.max_episode_steps = experiment_info.get("max_episode_steps") self.output_dir = Path(experiment_info.get("output_dir")).expanduser() self.initial_seed = experiment_info.get("seed") path_exists(self.output_dir) path_exists(self.output_dir / "logs") self.logger = get_simple_logger( __name__, self.output_dir / "logs" / f"{self.id}.txt") self.logger.info( json.dumps([self.agent_info, self.env_info, self.experiment_info], indent=4)) path = self.output_dir.parents[ 0] / f"true_v_{self.N}_{self.agent_info.get('discount_rate')}".replace( ".", "-") self.true_values = np.load(f"{path}.npy") self.states = np.arange(self.N).reshape((-1, 1)) self.state_distribution = np.ones_like(self.true_values) * 1 / len( self.states) self.msve_error = np.zeros( (self.n_episodes // self.episode_eval_freq + 1, )) self.error = get_objective( "RMSVE", self.true_values, self.state_distribution, np.ones(len(self.true_values)), ) self.timesteps = [] def init(self): FR = get_representation(name=self.agent_info.get("representations"), **self.agent_info) self.representations = np.array([ FR[self.states[i]] for i in range(len(self.states)) ]).reshape(len(self.states), FR.num_features) if self.experiment_info.get("save_representations"): path = path_exists(self.output_dir / "representations") self.save(path / f"repr_{self.id}", self.representations) self.rl_glue = RLGlue(self.env, self.agent) self.rl_glue.rl_init(self.agent_info, self.env_info) def run(self): for i in range(self.experiment_info.get("runs")): self.agent_info["seed"] = i + self.initial_seed self.env_info["seed"] = i + self.initial_seed self.init() self.learn() self.save(self.output_dir / f"{self.id}", self.msve_error) def learn(self): estimated_state_values = self.message("get state value") self.msve_error[0] = self.error.value(estimated_state_values) for episode in range(1, self.n_episodes + 1): self._learn(episode) def _learn(self, episode): self.rl_glue.rl_episode(self.max_episode_steps) if episode % self.episode_eval_freq == 0: estimated_state_values = self.message("get state value") self.msve_error[episode // self.episode_eval_freq] = self.error.value( estimated_state_values) def save(self, path, data): np.save(path, data) def cleanup(self): pass def message(self, message): if message == "get state value": current_theta = self.rl_glue.rl_agent_message("get weight vector") current_approx_v = np.dot(self.representations, current_theta) return current_approx_v raise Exception("Unexpected message given.")
class Exp(BaseExperiment): def __init__(self, agent_info, env_info, experiment_info): super().__init__() self.agent_info = agent_info self.env_info = env_info self.experiment_info = experiment_info self.agent = agents.get_agent(agent_info.get("algorithm")) self.env = envs.get_environment(env_info.get("env")) self.num_episodes = experiment_info.get("n_episodes") self.episode_eval_freq = experiment_info.get("episode_eval_freq") self.id = experiment_info.get("id") self.max_episode_steps = experiment_info.get("max_episode_steps") self.output_dir = Path(experiment_info.get("output_dir")).expanduser() self.output_dir = path_exists(self.output_dir) self.true_values = np.load(self.output_dir / "true_values.npy") self.obs = np.load(self.output_dir / "states.npy") self.num_obs = len(self.obs) self.on_policy_dist = np.ones(self.num_obs) * 1 / self.num_obs self.msve_error = np.zeros( (self.num_episodes // self.episode_eval_freq + 1, )) self.log_episodes = self.env_info.get("log_episodes") if self.log_episodes: self.episodes = [[] for i in range(self.experiment_info.get("runs"))] self.objective = get_objective( "RMSVE", self.true_values, self.on_policy_dist, np.ones(self.num_obs), ) def init(self): feature_representation = get_representation( name=self.agent_info.get("representations"), **self.agent_info) self.state_features = np.array([ feature_representation[self.obs[i]] for i in range(self.num_obs) ]).reshape(self.num_obs, -1) self.rl_glue = RLGlue(self.env, self.agent) self.rl_glue.rl_init(self.agent_info, self.env_info) def run(self): self.init() self.learn() self.save(self.output_dir / f"{self.id}", self.msve_error) def learn(self): estimated_state_values = self.message("get approx value") self.msve_error[0] = self.objective.value(estimated_state_values) for episode in range(1, self.num_episodes + 1): self._learn(episode) if self.log_episodes: self.episodes[0].append( self.rl_glue.rl_env_message("get episode")) def _learn(self, episode): self.rl_glue.rl_episode(self.max_episode_steps) if episode % self.episode_eval_freq == 0: estimated_state_values = self.message("get approx value") self.msve_error[episode // self.episode_eval_freq] = self.objective.value( estimated_state_values) def save(self, path, data): np.save(path, data) def message(self, message): if message == "get approx value": current_theta = self.rl_glue.rl_agent_message("get weight vector") if self.agent_info.get("representations") == "TC": current_approx_v = np.sum(current_theta[self.state_features], axis=1) else: current_approx_v = np.dot(self.state_features, current_theta) return current_approx_v raise Exception("Unexpected message given.")