def get_fig(name): num_states = 10000 num_dims = 2 num_features = 36 states = np.random.uniform(0, 1, (num_states, num_dims)) FR = get_representation(name=name, **{ "order": 5, "num_dims": num_dims, "min_x": states.min(), "max_x": states.max(), }) features = np.array([FR[states[i]] for i in range(num_states)]) fig = plt.figure(figsize=(25, 25)) fig.subplots_adjust(hspace=0.4, wspace=0.4) for i in range(1, num_features + 1): ax = fig.add_subplot(int(np.sqrt(num_features)), int(np.sqrt(num_features)), i) ax.scatter(states[:, 0], states[:, 1], c=features[:, i - 1], cmap="bone") plt.tight_layout() plt.show()
def test_tabular_features(num_states): states = np.arange(num_states).reshape(-1, 1) TF = get_representation("TA", **{"num_states": num_states}) tabular_features = np.vstack([TF[states[i]] for i in range(num_states)]) assert np.array_equiv(tabular_features - np.eye(num_states), np.zeros((num_states, num_states)))
def test_fourier_features_num_features(num_states, order, out_features): states = np.arange(num_states).reshape(-1, 1) BF = get_representation("F", **{"order": order, "num_dims": 1}) features = np.vstack([BF[states[i]] for i in range(num_states)]) assert features.shape[1] == out_features
def get_fig(): num_states = 19 num_dims = 1 order = 9 num_features = (order + 1)**num_dims states = np.arange(num_states).reshape((-1, num_dims)) FR = get_representation(name="F", **{ "order": order, "num_dims": num_dims, "min_x": 0, "max_x": len(states) - 1, "a": -1, "b": 1, }) features = np.array([FR[states[i]] for i in range(num_states)]) fig = plt.figure(figsize=(4, order * 3)) fig.subplots_adjust(hspace=0.4, wspace=0.4) for i in range(1, num_features + 1): ax = fig.add_subplot(num_features, 1, i) ax.scatter(states[:, 0], features[:, i - 1], s=5) ax.set_xticks(np.arange(num_states).tolist()) ax.set_xticklabels(np.arange(num_states).tolist()) ax.set_yticks([-1, 1], [-1, 1]) plt.show()
def test_step_size_fourier_cosine_features(num_states): step_size = 0.5 F = get_representation( "F", **{ "min_x": 0, "max_x": num_states - 1, "a": 0, "b": 1, "num_dims": 1, "order": 3, }) new_step_size = per_feature_step_size_fourier_KOT(step_size, F.num_features, F.C) assert np.array_equiv( new_step_size, np.array([step_size, step_size, step_size / 2, step_size / 3])) F = get_representation( "F", **{ "min_x": 0, "max_x": num_states - 1, "a": 0, "b": 1, "num_dims": 2, "order": 2, }) new_step_size = per_feature_step_size_fourier_KOT(step_size, F.num_features, F.C) assert np.array_equiv( new_step_size, np.array([ step_size, step_size, step_size / 2, step_size, step_size / np.sqrt(2), step_size / np.sqrt(5), step_size / 2, step_size / np.sqrt(5), step_size / np.sqrt(8), ]), )
def init(self): feature_representation = get_representation( name=self.agent_info.get("representations"), **self.agent_info) self.state_features = np.array([ feature_representation[self.obs[i]] for i in range(self.num_obs) ]).reshape(self.num_obs, -1) self.rl_glue = RLGlue(self.env, self.agent) self.rl_glue.rl_init(self.agent_info, self.env_info)
def init(self): FR = get_representation(name=self.agent_info.get("representations"), **self.agent_info) self.representations = np.array([ FR[self.states[i]] for i in range(len(self.states)) ]).reshape(len(self.states), FR.num_features) if self.experiment_info.get("save_representations"): path = path_exists(self.output_dir / "representations") self.save(path / f"repr_{self.id}", self.representations) self.rl_glue = RLGlue(self.env, self.agent) self.rl_glue.rl_init(self.agent_info, self.env_info)
def test_dependent_features_all_columns_sum_up_to_in_features(num_states): states = np.arange(num_states).reshape(-1, 1) DF = get_representation("D", unit_norm=False, **{ "num_states": num_states, "num_dims": num_states // 2 + 1 }) dependent_features = np.vstack([DF[states[i]] for i in range(num_states)]) assert np.array_equiv( np.sum(dependent_features, axis=0), np.ones(num_states // 2 + 1) * num_states // 2 + 1, )
def test_random_features_binary_num_ones(num_states): states = np.arange(num_states).reshape(-1, 1) num_ones = num_states // 4 + 1 RF = get_representation( "RB", **{ "num_states": num_states, "seed": 0, "num_features": num_states // 2 + 1, "num_ones": num_ones, }) random_features = np.vstack([RF[states[i]] for i in range(num_states)]) for i in range(num_states): assert np.nonzero(random_features[i][0] == num_ones)
def get_fig(): num_states = 5 num_dims = 1 orders = [1, 2, 3] fig, axes = plt.subplots(ncols=1, nrows=len(orders), figsize=(7, 10), sharey="row", sharex="row") for row, order in enumerate(orders): num_features = (order + 1)**num_dims states = np.arange(num_states).reshape((-1, num_dims)) kwargs_representation = { "order": order, "num_dims": num_dims, "min_x": 0, "max_x": len(states) - 1, "a": -1, "b": 1, } FR = get_representation(name="P", **kwargs_representation, unit_norm=True) print(FR.C.shape, FR.C) features = np.array([FR[states[i]] for i in range(num_states)]) for i in range(1, num_features + 1): axes[row].scatter(states[:, 0], features[:, i - 1], s=5) axes[row].plot(states[:, 0], features[:, i - 1]) axes[row].set_xticks(np.arange(num_states).tolist()) axes[row].set_xticklabels(np.arange(num_states).tolist()) axes[row].set_yticks( [ kwargs_representation.get("a"), kwargs_representation.get("b") ], [ kwargs_representation.get("a"), kwargs_representation.get("b") ], ) plt.show()
def test_same_random_feature_for_state_in_an_episode(): states = np.array([1, 1, 2, 2, 2, 3, 3]).reshape(-1, 1) RF = get_representation( "RB", **{ "num_states": 7, "seed": 6, "num_features": 7 // 2, "num_ones": 2 }) random_features = np.vstack([RF[states[i]] for i in range(7)]) assert np.array_equiv(random_features[0], random_features[1]) assert np.array_equiv(random_features[2], random_features[3]) assert np.array_equiv(random_features[2], random_features[4]) assert np.array_equiv(random_features[3], random_features[4]) assert np.array_equiv(random_features[5], random_features[6])
def agent_init(self, agent_info): self.agent_info = agent_info self.step_size = agent_info.get("step_size") self.discount_rate = agent_info.get("discount_rate") self.trace_decay = agent_info.get("trace_decay") self.rand_generator = np.random.RandomState(agent_info.get("seed")) self.policy = agent_info.get("policy") self.FR = get_representation( name=agent_info.get("representations"), **agent_info ) self.weights = np.zeros(self.FR.num_features) self.eligibility = np.zeros(self.FR.num_features) if agent_info.get("representations") == "F" and self.step_size is not None: self.step_size = per_feature_step_size_fourier_KOT( self.step_size, self.FR.num_features, self.FR.C ) elif agent_info.get("representations") == "RB" and self.step_size is not None: self.step_size /= self.FR.num_ones
def test_same_feature_representation_for_one_trial(representations): agent_info = { "num_states": 19, "algorithm": "ETD", "representations": representations, "num_features": 18, "num_ones": 10, "discount_rate": 0.95, "trace_decay": 0.5, "step_size": 0.0001, "interest": "UI", "policy": "random-chain", } env_info = {"env": "RandomWalk", "num_states": 19} num_states = agent_info.get("num_states") for seed in np.arange(10): agent_info["seed"] = seed states = np.arange(num_states).reshape(-1, 1) RF = get_representation(agent_info.get("representations"), **agent_info) rl_glue = RLGlue( get_environment(env_info["env"]), get_agent(agent_info["algorithm"]) ) random_features = np.vstack([RF[states[i]] for i in range(num_states)]) rl_glue.rl_init(agent_info, env_info) max_steps_this_episode = 0 for i in range(10): is_terminal = False rl_glue.rl_start() while (not is_terminal) and ( (max_steps_this_episode == 0) or (rl_glue.num_steps < max_steps_this_episode) ): rl_step_result = rl_glue.rl_step() is_terminal = rl_step_result[3] last_state = rl_step_result[2] np.array_equiv( rl_glue.agent.FR[last_state], random_features[last_state] )