コード例 #1
0
ファイル: fig_9_4.py プロジェクト: plopd/plop-msc-thesis
def get_fig(name):
    num_states = 10000
    num_dims = 2
    num_features = 36
    states = np.random.uniform(0, 1, (num_states, num_dims))

    FR = get_representation(name=name,
                            **{
                                "order": 5,
                                "num_dims": num_dims,
                                "min_x": states.min(),
                                "max_x": states.max(),
                            })

    features = np.array([FR[states[i]] for i in range(num_states)])

    fig = plt.figure(figsize=(25, 25))
    fig.subplots_adjust(hspace=0.4, wspace=0.4)

    for i in range(1, num_features + 1):
        ax = fig.add_subplot(int(np.sqrt(num_features)),
                             int(np.sqrt(num_features)), i)
        ax.scatter(states[:, 0],
                   states[:, 1],
                   c=features[:, i - 1],
                   cmap="bone")
    plt.tight_layout()
    plt.show()
コード例 #2
0
def test_tabular_features(num_states):
    states = np.arange(num_states).reshape(-1, 1)
    TF = get_representation("TA", **{"num_states": num_states})

    tabular_features = np.vstack([TF[states[i]] for i in range(num_states)])
    assert np.array_equiv(tabular_features - np.eye(num_states),
                          np.zeros((num_states, num_states)))
コード例 #3
0
def test_fourier_features_num_features(num_states, order, out_features):
    states = np.arange(num_states).reshape(-1, 1)

    BF = get_representation("F", **{"order": order, "num_dims": 1})

    features = np.vstack([BF[states[i]] for i in range(num_states)])
    assert features.shape[1] == out_features
コード例 #4
0
ファイル: fig_9_3.py プロジェクト: plopd/plop-msc-thesis
def get_fig():
    num_states = 19
    num_dims = 1
    order = 9
    num_features = (order + 1)**num_dims
    states = np.arange(num_states).reshape((-1, num_dims))

    FR = get_representation(name="F",
                            **{
                                "order": order,
                                "num_dims": num_dims,
                                "min_x": 0,
                                "max_x": len(states) - 1,
                                "a": -1,
                                "b": 1,
                            })

    features = np.array([FR[states[i]] for i in range(num_states)])

    fig = plt.figure(figsize=(4, order * 3))
    fig.subplots_adjust(hspace=0.4, wspace=0.4)

    for i in range(1, num_features + 1):
        ax = fig.add_subplot(num_features, 1, i)
        ax.scatter(states[:, 0], features[:, i - 1], s=5)
        ax.set_xticks(np.arange(num_states).tolist())
        ax.set_xticklabels(np.arange(num_states).tolist())
        ax.set_yticks([-1, 1], [-1, 1])
    plt.show()
コード例 #5
0
def test_step_size_fourier_cosine_features(num_states):
    step_size = 0.5

    F = get_representation(
        "F", **{
            "min_x": 0,
            "max_x": num_states - 1,
            "a": 0,
            "b": 1,
            "num_dims": 1,
            "order": 3,
        })

    new_step_size = per_feature_step_size_fourier_KOT(step_size,
                                                      F.num_features, F.C)

    assert np.array_equiv(
        new_step_size,
        np.array([step_size, step_size, step_size / 2, step_size / 3]))

    F = get_representation(
        "F", **{
            "min_x": 0,
            "max_x": num_states - 1,
            "a": 0,
            "b": 1,
            "num_dims": 2,
            "order": 2,
        })
    new_step_size = per_feature_step_size_fourier_KOT(step_size,
                                                      F.num_features, F.C)

    assert np.array_equiv(
        new_step_size,
        np.array([
            step_size,
            step_size,
            step_size / 2,
            step_size,
            step_size / np.sqrt(2),
            step_size / np.sqrt(5),
            step_size / 2,
            step_size / np.sqrt(5),
            step_size / np.sqrt(8),
        ]),
    )
コード例 #6
0
ファイル: experiment.py プロジェクト: plopd/plop-msc-thesis
    def init(self):
        feature_representation = get_representation(
            name=self.agent_info.get("representations"), **self.agent_info)
        self.state_features = np.array([
            feature_representation[self.obs[i]] for i in range(self.num_obs)
        ]).reshape(self.num_obs, -1)

        self.rl_glue = RLGlue(self.env, self.agent)
        self.rl_glue.rl_init(self.agent_info, self.env_info)
コード例 #7
0
    def init(self):
        FR = get_representation(name=self.agent_info.get("representations"),
                                **self.agent_info)
        self.representations = np.array([
            FR[self.states[i]] for i in range(len(self.states))
        ]).reshape(len(self.states), FR.num_features)
        if self.experiment_info.get("save_representations"):
            path = path_exists(self.output_dir / "representations")
            self.save(path / f"repr_{self.id}", self.representations)

        self.rl_glue = RLGlue(self.env, self.agent)
        self.rl_glue.rl_init(self.agent_info, self.env_info)
コード例 #8
0
def test_dependent_features_all_columns_sum_up_to_in_features(num_states):
    states = np.arange(num_states).reshape(-1, 1)
    DF = get_representation("D",
                            unit_norm=False,
                            **{
                                "num_states": num_states,
                                "num_dims": num_states // 2 + 1
                            })
    dependent_features = np.vstack([DF[states[i]] for i in range(num_states)])
    assert np.array_equiv(
        np.sum(dependent_features, axis=0),
        np.ones(num_states // 2 + 1) * num_states // 2 + 1,
    )
コード例 #9
0
def test_random_features_binary_num_ones(num_states):
    states = np.arange(num_states).reshape(-1, 1)
    num_ones = num_states // 4 + 1
    RF = get_representation(
        "RB", **{
            "num_states": num_states,
            "seed": 0,
            "num_features": num_states // 2 + 1,
            "num_ones": num_ones,
        })

    random_features = np.vstack([RF[states[i]] for i in range(num_states)])
    for i in range(num_states):
        assert np.nonzero(random_features[i][0] == num_ones)
コード例 #10
0
def get_fig():
    num_states = 5
    num_dims = 1
    orders = [1, 2, 3]

    fig, axes = plt.subplots(ncols=1,
                             nrows=len(orders),
                             figsize=(7, 10),
                             sharey="row",
                             sharex="row")

    for row, order in enumerate(orders):
        num_features = (order + 1)**num_dims
        states = np.arange(num_states).reshape((-1, num_dims))

        kwargs_representation = {
            "order": order,
            "num_dims": num_dims,
            "min_x": 0,
            "max_x": len(states) - 1,
            "a": -1,
            "b": 1,
        }

        FR = get_representation(name="P",
                                **kwargs_representation,
                                unit_norm=True)
        print(FR.C.shape, FR.C)

        features = np.array([FR[states[i]] for i in range(num_states)])

        for i in range(1, num_features + 1):
            axes[row].scatter(states[:, 0], features[:, i - 1], s=5)
            axes[row].plot(states[:, 0], features[:, i - 1])
            axes[row].set_xticks(np.arange(num_states).tolist())
            axes[row].set_xticklabels(np.arange(num_states).tolist())
            axes[row].set_yticks(
                [
                    kwargs_representation.get("a"),
                    kwargs_representation.get("b")
                ],
                [
                    kwargs_representation.get("a"),
                    kwargs_representation.get("b")
                ],
            )
    plt.show()
コード例 #11
0
def test_same_random_feature_for_state_in_an_episode():
    states = np.array([1, 1, 2, 2, 2, 3, 3]).reshape(-1, 1)

    RF = get_representation(
        "RB", **{
            "num_states": 7,
            "seed": 6,
            "num_features": 7 // 2,
            "num_ones": 2
        })

    random_features = np.vstack([RF[states[i]] for i in range(7)])
    assert np.array_equiv(random_features[0], random_features[1])

    assert np.array_equiv(random_features[2], random_features[3])
    assert np.array_equiv(random_features[2], random_features[4])
    assert np.array_equiv(random_features[3], random_features[4])

    assert np.array_equiv(random_features[5], random_features[6])
コード例 #12
0
ファイル: TD.py プロジェクト: plopd/plop-msc-thesis
    def agent_init(self, agent_info):
        self.agent_info = agent_info
        self.step_size = agent_info.get("step_size")
        self.discount_rate = agent_info.get("discount_rate")
        self.trace_decay = agent_info.get("trace_decay")
        self.rand_generator = np.random.RandomState(agent_info.get("seed"))
        self.policy = agent_info.get("policy")
        self.FR = get_representation(
            name=agent_info.get("representations"), **agent_info
        )
        self.weights = np.zeros(self.FR.num_features)
        self.eligibility = np.zeros(self.FR.num_features)

        if agent_info.get("representations") == "F" and self.step_size is not None:
            self.step_size = per_feature_step_size_fourier_KOT(
                self.step_size, self.FR.num_features, self.FR.C
            )
        elif agent_info.get("representations") == "RB" and self.step_size is not None:
            self.step_size /= self.FR.num_ones
コード例 #13
0
ファイル: chain_test.py プロジェクト: plopd/plop-msc-thesis
def test_same_feature_representation_for_one_trial(representations):
    agent_info = {
        "num_states": 19,
        "algorithm": "ETD",
        "representations": representations,
        "num_features": 18,
        "num_ones": 10,
        "discount_rate": 0.95,
        "trace_decay": 0.5,
        "step_size": 0.0001,
        "interest": "UI",
        "policy": "random-chain",
    }
    env_info = {"env": "RandomWalk", "num_states": 19}
    num_states = agent_info.get("num_states")
    for seed in np.arange(10):
        agent_info["seed"] = seed
        states = np.arange(num_states).reshape(-1, 1)
        RF = get_representation(agent_info.get("representations"), **agent_info)
        rl_glue = RLGlue(
            get_environment(env_info["env"]), get_agent(agent_info["algorithm"])
        )
        random_features = np.vstack([RF[states[i]] for i in range(num_states)])
        rl_glue.rl_init(agent_info, env_info)
        max_steps_this_episode = 0
        for i in range(10):
            is_terminal = False

            rl_glue.rl_start()

            while (not is_terminal) and (
                (max_steps_this_episode == 0)
                or (rl_glue.num_steps < max_steps_this_episode)
            ):
                rl_step_result = rl_glue.rl_step()
                is_terminal = rl_step_result[3]
                last_state = rl_step_result[2]
                np.array_equiv(
                    rl_glue.agent.FR[last_state], random_features[last_state]
                )