Esempio n. 1
0
    def init(self):
        feature_representation = get_representation(
            name=self.agent_info.get("representations"), **self.agent_info)
        self.state_features = np.array([
            feature_representation[self.obs[i]] for i in range(self.num_obs)
        ]).reshape(self.num_obs, -1)

        self.rl_glue = RLGlue(self.env, self.agent)
        self.rl_glue.rl_init(self.agent_info, self.env_info)
    def init(self):
        FR = get_representation(name=self.agent_info.get("representations"),
                                **self.agent_info)
        self.representations = np.array([
            FR[self.states[i]] for i in range(len(self.states))
        ]).reshape(len(self.states), FR.num_features)
        if self.experiment_info.get("save_representations"):
            path = path_exists(self.output_dir / "representations")
            self.save(path / f"repr_{self.id}", self.representations)

        self.rl_glue = RLGlue(self.env, self.agent)
        self.rl_glue.rl_init(self.agent_info, self.env_info)
Esempio n. 3
0
def test_chain_init(num_states):
    environment = get_environment(env_info["env"])
    env_info["num_states"] = num_states
    agent_info["num_states"] = num_states
    agent_info["num_dims"] = num_states
    agent = get_agent(agent_info["algorithm"])
    rl_glue = RLGlue(environment, agent)
    rl_glue.rl_init(agent_init_info=agent_info, env_init_info=env_info)

    (last_state, _) = rl_glue.rl_start()

    assert last_state == num_states // 2
Esempio n. 4
0
def test_same_walks_per_run_for_each_algorithm(algorithm):
    runs_with_episodes = {
        0: [[2, 1, 2, 3, 2, 3, 4], [2, 3, 4], [2, 3, 2, 1, 2, 1, 0]],
        1: [
            [2, 3, 4, 3, 2, 3, 4],
            [2, 3, 4, 3, 2, 3, 2, 3, 4, 3, 2, 3, 2, 1, 0, 1, 0],
            [2, 3, 2, 1, 0, 1, 0],
        ],
    }

    env_info["log_episodes"] = 1
    env_info["num_states"] = 5
    agent_info["num_states"] = 5
    agent_info["num_dims"] = 5
    num_runs = len(runs_with_episodes)
    for i in range(num_runs):
        agent_info["algorithm"] = algorithm
        agent_info["seed"] = i
        rl_glue = RLGlue(
            get_environment(env_info["env"]), get_agent(agent_info["algorithm"])
        )
        rl_glue.rl_init(agent_info, env_info)
        num_episodes = len(runs_with_episodes[i])
        for j in range(num_episodes):
            rl_glue.rl_episode(0)
            assert np.array_equiv(
                runs_with_episodes[i][j],
                np.array(rl_glue.rl_env_message("get episode")).squeeze(),
            )
Esempio n. 5
0
def test_emphasis_reset_at_start_of_episode(algorithm):
    agent_info["algorithm"] = algorithm
    agent = get_agent(agent_info["algorithm"])

    rl_glue = RLGlue(environment, agent)

    rl_glue.rl_init(agent_init_info=agent_info, env_init_info=env_info)
    rl_glue.rl_start()
    assert rl_glue.rl_agent_message("get emphasis trace") == 0.0
Esempio n. 6
0
def test_eligibility_trace_reset_at_start_of_episode(algorithm):
    agent_info["algorithm"] = algorithm
    agent = get_agent(agent_info["algorithm"])

    rl_glue = RLGlue(environment, agent)
    rl_glue.rl_init(agent_init_info=agent_info, env_init_info=env_info)
    rl_glue.rl_start()
    e = rl_glue.rl_agent_message("get eligibility trace")
    assert np.allclose(e, np.zeros(e.shape[0]))
Esempio n. 7
0
def test_increasing_steps_over_episodes(algorithm):
    environment = get_environment(env_info["env"])
    agent_info["algorithm"] = algorithm
    agent = get_agent(agent_info["algorithm"])

    rl_glue = RLGlue(environment, agent)
    rl_glue.rl_init(agent_info, env_info)

    for episode in range(1, 10):
        total_timesteps_before_episode = rl_glue.rl_agent_message("get steps")
        rl_glue.rl_episode(0)
        total_timesteps_after_episode = rl_glue.rl_agent_message("get steps")
        assert total_timesteps_after_episode - total_timesteps_before_episode > 0
Esempio n. 8
0
def test_agent_start(algorithm):
    agent_info["algorithm"] = algorithm
    agent = get_agent(agent_info["algorithm"])

    rl_glue = RLGlue(environment, agent)
    rl_glue.rl_init(agent_info, env_info)
    rl_glue.rl_start()

    z = rl_glue.rl_agent_message("get eligibility trace")
    w = rl_glue.rl_agent_message("get weight vector")

    try:
        M = rl_glue.rl_agent_message("get emphasis vector")
        F = rl_glue.rl_agent_message("get weight vector")
        assert F == 0.0
        assert M == 0.0
    except Exception:
        pass

    assert np.array_equal(z, np.zeros(z.shape[0]))

    assert np.array_equal(w, np.zeros(w.shape[0]))
Esempio n. 9
0
def test_constant_emphasis():
    agent_info["discount_rate"] = 1.0
    agent_info["trace_decay"] = 1.0
    agent_info["interest"] = "UI"
    agent = get_agent("ETDTileCoding")

    rl_glue = RLGlue(environment, agent)

    for episode in range(1, 3):
        rl_glue.rl_init(agent_init_info=agent_info, env_init_info=env_info)
        rl_glue.rl_episode(0)
        assert rl_glue.rl_agent_message("get emphasis trace") == 1.0
Esempio n. 10
0
def test_linear_followon_trace():
    agent_info["discount_rate"] = 1.0
    agent_info["trace_decay"] = 0.0
    agent_info["interest"] = "UI"
    agent = get_agent("ETDTileCoding")

    rl_glue = RLGlue(environment, agent)

    for episode in range(1, 3):
        rl_glue.rl_init(agent_init_info=agent_info, env_init_info=env_info)
        rl_glue.rl_episode(0)
        assert rl_glue.rl_agent_message(
            "get followon trace") - 1 == rl_glue.num_steps
Esempio n. 11
0
def simulate_on_policy(**kwargs):
    env_id = kwargs.get("env")
    steps = kwargs.get("steps")
    policy_name = kwargs.get("policy_name")
    save_rootpath = kwargs.get("save_rootpath")
    discount_rate = kwargs.get("discount_rate")
    n_samples = kwargs.get("num_obs")

    agent_info = {
        "algorithm": "TDTileCoding",
        "representations": "TC",
        "max_x": "0.6,0.07",
        "min_x": "-1.2,-0.07",
        "tiles_per_dim": "4,4",
        "tilings": 5,
        "discount_rate": discount_rate,
        "trace_decay": 0.0,
        "step_size": 0.0001,
        "seed": 0,
        "interest": "UI",
        "policy": policy_name,
    }

    env_info = {"env": env_id, "seed": 0}

    agent = agents.get_agent(agent_info.get("algorithm"))
    env = envs.get_environment(env_info.get("env"))

    rl_glue = RLGlue(env, agent)
    rl_glue.rl_init(agent_info, env_info)
    last_state, _ = rl_glue.rl_start()
    states = []
    for _ in tqdm(range(steps)):
        states.append(last_state)
        reward, last_state, last_action, term = rl_glue.rl_step()
        if term:
            last_state, _ = rl_glue.rl_start()
    states = np.vstack(states)

    rand_generator = np.random.RandomState(0)
    idxs = rand_generator.choice(
        np.arange(steps // 2, steps), size=(n_samples,), replace=False
    )
    states = states[idxs, :]
    np.save(save_rootpath / "S", states)
Esempio n. 12
0
def test_same_feature_representation_for_one_trial(representations):
    agent_info = {
        "num_states": 19,
        "algorithm": "ETD",
        "representations": representations,
        "num_features": 18,
        "num_ones": 10,
        "discount_rate": 0.95,
        "trace_decay": 0.5,
        "step_size": 0.0001,
        "interest": "UI",
        "policy": "random-chain",
    }
    env_info = {"env": "RandomWalk", "num_states": 19}
    num_states = agent_info.get("num_states")
    for seed in np.arange(10):
        agent_info["seed"] = seed
        states = np.arange(num_states).reshape(-1, 1)
        RF = get_representation(agent_info.get("representations"), **agent_info)
        rl_glue = RLGlue(
            get_environment(env_info["env"]), get_agent(agent_info["algorithm"])
        )
        random_features = np.vstack([RF[states[i]] for i in range(num_states)])
        rl_glue.rl_init(agent_info, env_info)
        max_steps_this_episode = 0
        for i in range(10):
            is_terminal = False

            rl_glue.rl_start()

            while (not is_terminal) and (
                (max_steps_this_episode == 0)
                or (rl_glue.num_steps < max_steps_this_episode)
            ):
                rl_step_result = rl_glue.rl_step()
                is_terminal = rl_step_result[3]
                last_state = rl_step_result[2]
                np.array_equiv(
                    rl_glue.agent.FR[last_state], random_features[last_state]
                )
Esempio n. 13
0
def calculate_state_distribution(N):
    agent_info = {
        "num_states": N,
        "algorithm": "TD",
        "representations": "TA",
        "discount_rate": 1,
        "trace_decay": 0,
        "step_size": 0.125,
        "seed": 0,
        "interest": "UI",
    }

    env_info = {"env": "RandomWalk", "num_states": N}

    exp_info = {
        "max_timesteps_episode": 1000000,
        "episode_eval_freq": 1,
        "n_episodes": 1,
    }

    rl_glue = RLGlue(envs.get_environment(env_info["env"]),
                     agents.get_agent(agent_info["algorithm"]))

    rl_glue.rl_init(agent_info, env_info)

    eta = np.zeros(env_info["num_states"])
    last_state, _ = rl_glue.rl_start()
    for _ in tqdm(range(1, int(exp_info["max_timesteps_episode"]) + 1)):
        eta[last_state] += 1
        _, last_state, _, term = rl_glue.rl_step()
        if term:
            last_state, _ = rl_glue.rl_start()

    state_distribution = eta / np.sum(eta)

    return state_distribution
class RandomWalkExp(BaseExperiment):
    def __init__(self, agent_info, env_info, experiment_info):
        super().__init__()
        self.agent_info = agent_info
        self.env_info = env_info
        self.experiment_info = experiment_info
        self.agent = agents.get_agent(agent_info.get("algorithm"))
        self.N = env_info["num_states"]
        self.env = envs.get_environment(env_info.get("env"))
        self.n_episodes = experiment_info.get("n_episodes")
        self.episode_eval_freq = experiment_info.get("episode_eval_freq")
        self.id = experiment_info.get("id")
        self.max_episode_steps = experiment_info.get("max_episode_steps")
        self.output_dir = Path(experiment_info.get("output_dir")).expanduser()
        self.initial_seed = experiment_info.get("seed")
        path_exists(self.output_dir)
        path_exists(self.output_dir / "logs")
        self.logger = get_simple_logger(
            __name__, self.output_dir / "logs" / f"{self.id}.txt")
        self.logger.info(
            json.dumps([self.agent_info, self.env_info, self.experiment_info],
                       indent=4))
        path = self.output_dir.parents[
            0] / f"true_v_{self.N}_{self.agent_info.get('discount_rate')}".replace(
                ".", "-")
        self.true_values = np.load(f"{path}.npy")
        self.states = np.arange(self.N).reshape((-1, 1))
        self.state_distribution = np.ones_like(self.true_values) * 1 / len(
            self.states)
        self.msve_error = np.zeros(
            (self.n_episodes // self.episode_eval_freq + 1, ))

        self.error = get_objective(
            "RMSVE",
            self.true_values,
            self.state_distribution,
            np.ones(len(self.true_values)),
        )
        self.timesteps = []

    def init(self):
        FR = get_representation(name=self.agent_info.get("representations"),
                                **self.agent_info)
        self.representations = np.array([
            FR[self.states[i]] for i in range(len(self.states))
        ]).reshape(len(self.states), FR.num_features)
        if self.experiment_info.get("save_representations"):
            path = path_exists(self.output_dir / "representations")
            self.save(path / f"repr_{self.id}", self.representations)

        self.rl_glue = RLGlue(self.env, self.agent)
        self.rl_glue.rl_init(self.agent_info, self.env_info)

    def run(self):
        for i in range(self.experiment_info.get("runs")):
            self.agent_info["seed"] = i + self.initial_seed
            self.env_info["seed"] = i + self.initial_seed
            self.init()
            self.learn()
        self.save(self.output_dir / f"{self.id}", self.msve_error)

    def learn(self):
        estimated_state_values = self.message("get state value")
        self.msve_error[0] = self.error.value(estimated_state_values)

        for episode in range(1, self.n_episodes + 1):
            self._learn(episode)

    def _learn(self, episode):
        self.rl_glue.rl_episode(self.max_episode_steps)

        if episode % self.episode_eval_freq == 0:
            estimated_state_values = self.message("get state value")
            self.msve_error[episode //
                            self.episode_eval_freq] = self.error.value(
                                estimated_state_values)

    def save(self, path, data):
        np.save(path, data)

    def cleanup(self):
        pass

    def message(self, message):
        if message == "get state value":
            current_theta = self.rl_glue.rl_agent_message("get weight vector")
            current_approx_v = np.dot(self.representations, current_theta)
            return current_approx_v
        raise Exception("Unexpected message given.")
Esempio n. 15
0
class Exp(BaseExperiment):
    def __init__(self, agent_info, env_info, experiment_info):
        super().__init__()
        self.agent_info = agent_info
        self.env_info = env_info
        self.experiment_info = experiment_info
        self.agent = agents.get_agent(agent_info.get("algorithm"))
        self.env = envs.get_environment(env_info.get("env"))
        self.num_episodes = experiment_info.get("n_episodes")
        self.episode_eval_freq = experiment_info.get("episode_eval_freq")
        self.id = experiment_info.get("id")
        self.max_episode_steps = experiment_info.get("max_episode_steps")
        self.output_dir = Path(experiment_info.get("output_dir")).expanduser()
        self.output_dir = path_exists(self.output_dir)
        self.true_values = np.load(self.output_dir / "true_values.npy")
        self.obs = np.load(self.output_dir / "states.npy")
        self.num_obs = len(self.obs)
        self.on_policy_dist = np.ones(self.num_obs) * 1 / self.num_obs
        self.msve_error = np.zeros(
            (self.num_episodes // self.episode_eval_freq + 1, ))
        self.log_episodes = self.env_info.get("log_episodes")
        if self.log_episodes:
            self.episodes = [[]
                             for i in range(self.experiment_info.get("runs"))]

        self.objective = get_objective(
            "RMSVE",
            self.true_values,
            self.on_policy_dist,
            np.ones(self.num_obs),
        )

    def init(self):
        feature_representation = get_representation(
            name=self.agent_info.get("representations"), **self.agent_info)
        self.state_features = np.array([
            feature_representation[self.obs[i]] for i in range(self.num_obs)
        ]).reshape(self.num_obs, -1)

        self.rl_glue = RLGlue(self.env, self.agent)
        self.rl_glue.rl_init(self.agent_info, self.env_info)

    def run(self):
        self.init()
        self.learn()
        self.save(self.output_dir / f"{self.id}", self.msve_error)

    def learn(self):
        estimated_state_values = self.message("get approx value")
        self.msve_error[0] = self.objective.value(estimated_state_values)

        for episode in range(1, self.num_episodes + 1):
            self._learn(episode)
            if self.log_episodes:
                self.episodes[0].append(
                    self.rl_glue.rl_env_message("get episode"))

    def _learn(self, episode):
        self.rl_glue.rl_episode(self.max_episode_steps)

        if episode % self.episode_eval_freq == 0:
            estimated_state_values = self.message("get approx value")
            self.msve_error[episode //
                            self.episode_eval_freq] = self.objective.value(
                                estimated_state_values)

    def save(self, path, data):
        np.save(path, data)

    def message(self, message):
        if message == "get approx value":
            current_theta = self.rl_glue.rl_agent_message("get weight vector")
            if self.agent_info.get("representations") == "TC":
                current_approx_v = np.sum(current_theta[self.state_features],
                                          axis=1)
            else:
                current_approx_v = np.dot(self.state_features, current_theta)
            return current_approx_v
        raise Exception("Unexpected message given.")