class Reacher: def __init__(self): self.mean = 0 self.std = 1 self.dims = 52 self.lb = -1 * np.ones(self.dims) self.ub = 1 * np.ones(self.dims) self.counter = 0 self.env = FlattenObservation( FilterObservation(gym.make('FetchReach-v1'), ['observation', 'desired_goal'])) self.num_rollouts = 3 self.render = False self.policy_shape = (4, 13) #tunable hyper-parameters in LA-MCTS self.Cp = 10 self.leaf_size = 100 self.kernel_type = "linear" self.gamma_type = "auto" self.ninits = 30 print("===========initialization===========") print("mean:", self.mean) print("std:", self.std) print("dims:", self.dims) print("policy:", self.policy_shape) def __call__(self, x): self.counter += 1 assert len(x) == self.dims assert x.ndim == 1 assert np.all(x <= self.ub) and np.all(x >= self.lb) M = x.reshape(self.policy_shape) returns = [] observations = [] actions = [] for i in range(self.num_rollouts): obs = self.env.reset() done = False totalr = 0. steps = 0 while not done: # M = self.policy inputs = (obs - self.mean) / self.std action = np.dot(M, inputs) observations.append(obs) actions.append(action) obs, r, done, _ = self.env.step(action) totalr += r steps += 1 if self.render: self.env.render() returns.append(totalr) return np.mean(returns) * -1
def test_flatten_observation(env_id): env = gym.make(env_id) wrapped_env = FlattenObservation(env) obs = env.reset() wrapped_obs = wrapped_env.reset() assert len(obs.shape) == 3 assert len(wrapped_obs.shape) == 1 assert wrapped_obs.shape[0] == obs.shape[0] * obs.shape[1] * obs.shape[2]
def test_flattened_environment(self, observation_space, ordered_values): """ make sure that flattened observations occur in the order expected """ env = FakeEnvironment(observation_space=observation_space) wrapped_env = FlattenObservation(env) flattened = wrapped_env.reset() unflattened = unflatten(env.observation_space, flattened) original = env.observation self._check_observations(original, flattened, unflattened, ordered_values)
def test_flatten_observation(env_id): env = gym.make(env_id) wrapped_env = FlattenObservation(env) obs = env.reset() wrapped_obs = wrapped_env.reset() space = spaces.Tuple( (spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))) wrapped_space = spaces.Box(0, 1, [32 + 11 + 2], dtype=np.int64) assert space.contains(obs) assert wrapped_space.contains(wrapped_obs)
def test_flatten_observation(env_id): env = gym.make(env_id) wrapped_env = FlattenObservation(env) obs = env.reset() wrapped_obs = wrapped_env.reset() if env_id == 'Blackjack-v0': space = spaces.Tuple( (spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))) wrapped_space = spaces.Box(-np.inf, np.inf, [32 + 11 + 2], dtype=np.float32) elif env_id == 'KellyCoinflip-v0': space = spaces.Tuple( (spaces.Box(0, 250.0, [1], dtype=np.float32), spaces.Discrete(300 + 1))) wrapped_space = spaces.Box(-np.inf, np.inf, [1 + (300 + 1)], dtype=np.float32) assert space.contains(obs) assert wrapped_space.contains(wrapped_obs)
def test_nested_dicts_ravel(self, observation_space, flat_shape): env = FakeEnvironment(observation_space=observation_space) wrapped_env = FlattenObservation(FilterObservation(env, env.obs_keys)) obs = wrapped_env.reset() assert obs.shape == wrapped_env.observation_space.shape
# Define and parameterize the reference generator for the current reference reference_generator=WienerProcessReferenceGenerator( reference_state='i', sigma_range=(3e-3, 3e-2)), # Defines which variables to plot via the builtin dashboard monitor visualization=MotorDashboard(state_plots=['i', 'omega']), ) # Now, the environment will output states and references separately state, ref = env.reset() # For data processing we sometimes want to flatten the env output, # which means that the env will only output one array that contains states and references consecutively env = FlattenObservation(env) obs = env.reset() # Read the number of possible actions for the given env # this allows us to define a proper learning agent for this task nb_actions = env.action_space.n window_length = 1 # Define an artificial neural network to be used within the agent model = Sequential() # The network's input fits the observation space of the env model.add( Flatten(input_shape=(window_length, ) + env.observation_space.shape)) model.add(Dense(16, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(4, activation='relu'))