def build_low_level_ghavamzadeh(alg, params, mdp): # FeaturesL high = [150, 150, np.pi] low = [0, 0, -np.pi] n_tiles = [5, 5, 10] low = np.array(low, dtype=np.float) high = np.array(high, dtype=np.float) n_tilings = 3 tilingsL = Tiles.generate(n_tilings=n_tilings, n_tiles=n_tiles, low=low, high=high) featuresL = Features(tilings=tilingsL) mdp_info_agentL = MDPInfo(observation_space=spaces.Box( low=np.array([0, 0]), high=np.array([150, 150]), shape=(2, )), action_space=mdp.info.action_space, gamma=0.99, horizon=10000) input_shape = (featuresL.size, ) approximator = Regressor(LinearApproximator, input_shape=input_shape, output_shape=mdp.info.action_space.shape) std = np.array([3e-2]) pi = DiagonalGaussianPolicy(mu=approximator, std=std) agent = alg(pi, mdp_info_agentL, features=featuresL, **params) return agent
def __init__(self, name, horizon, gamma): self.__name__ = name # MPD creation self.env = gym.make(self.__name__) self.env._max_episode_steps = np.inf # Hack to ignore gym time limit. # MDP properties assert not isinstance(self.env.observation_space, gym_spaces.MultiDiscrete) assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete) action_space = self._convert_gym_space(self.env.action_space) observation_space = self._convert_gym_space(self.env.observation_space) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = self._convert_action_function else: self._convert_action = self._no_convert if isinstance(observation_space, Discrete) and len(observation_space.size) > 1: self._convert_state = self._convert_state_function else: self._convert_state = self._no_convert super(Gym, self).__init__(mdp_info)
def __init__(self, **kwargs): # Define environment properties high_x = np.array([5.0, 5.0, np.pi]) low_x = -high_x high_u = np.array([1.0, 3.0]) low_u = -high_u observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) gamma = 0.9 horizon = 400 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) hz = 10.0 super(TurtlebotGazebo, self).__init__('turtlebot_gazebo', mdp_info, hz, **kwargs) # subscribe to /cmd_vel topic to publish the setpoint self._pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1) # subscribe to /gazebo/model_states to get the position of the turtlebot model_state_service_name = '/gazebo/get_model_state' rospy.wait_for_service(model_state_service_name) self._model_state_service = rospy.ServiceProxy(model_state_service_name, GetModelState)
def build_discretized_agent(alg, params, n, optim, loss, mdp, eps, n_features, use_cuda): high = mdp.info.observation_space.high low = mdp.info.observation_space.low observation_space = spaces.Box(low=low, high=high) action_space = spaces.Discrete(n) mdp_info = MDPInfo(observation_space=observation_space, action_space=action_space, gamma=mdp.info.gamma, horizon=mdp.info.horizon) pi = Boltzmann(eps) approximator_params = dict(network=Network, optimizer=optim, loss=loss, n_features=n_features, input_shape=mdp_info.observation_space.shape, output_shape=mdp_info.action_space.size, n_actions=mdp_info.action_space.n, use_cuda=use_cuda) agent = alg(PyTorchApproximator, pi, mdp_info, approximator_params=approximator_params, **params) return agent
def __init__(self, n_steps_action=3, viz_speed=100, small=False): self.__name__ = 'ShipSteeringMultiGate' self.n_steps_action = n_steps_action self.viz_speed = viz_speed # MDP parameters self.no_of_gates = 4 self.small = small self.field_size = 500 if small else 1000 low = np.array([0, 0, -np.pi, -np.pi / 12., 0]) high = np.array([ self.field_size, self.field_size, np.pi, np.pi / 12., self.no_of_gates ]) self.omega_max = np.array([np.pi / 12.]) self._v = 3. self._T = 5. self._dt = .2 gate_1s = np.array([75, 175]) if small else np.array([150, 350]) gate_1e = np.array([125, 175]) if small else np.array([250, 350]) gate_1 = np.array([gate_1s, gate_1e]) gate_2s = np.array([150, 300]) if small else np.array([300, 600]) gate_2e = np.array([200, 300]) if small else np.array([400, 600]) gate_2 = np.array([gate_2s, gate_2e]) gate_3s = np.array([250, 350]) if small else np.array([500, 700]) gate_3e = np.array([300, 350]) if small else np.array([600, 700]) gate_3 = np.array([gate_3s, gate_3e]) gate_4s = np.array([150, 425]) if small else np.array([300, 850]) gate_4e = np.array([200, 425]) if small else np.array([400, 850]) gate_4 = np.array([gate_4s, gate_4e]) self._gate_list = gate_1, gate_2, gate_3, gate_4 # MDP properties observation_space = spaces.Box(low=low, high=high) action_space = spaces.Box(low=-self.omega_max, high=self.omega_max) horizon = 5000 gamma = .99 self._out_reward = -10000 self.correct_order = False mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(self.field_size, self.field_size, background=(66, 131, 237)) super(ShipSteeringMultiGate, self).__init__(mdp_info)
def __init__(self, domain_name, task_name, horizon, gamma, task_kwargs=None, dt=.01, width_screen=480, height_screen=480, camera_id=0): """ Constructor. Args: domain_name (str): name of the environment; task_name (str): name of the task of the environment; horizon (int): the horizon; gamma (float): the discount factor; task_kwargs (dict, None): parameters of the task; dt (float, .01): duration of a control step; width_screen (int, 480): width of the screen; height_screen (int, 480): height of the screen; camera_id (int, 0): position of camera to render the environment; """ # MDP creation if task_kwargs is None: task_kwargs = dict() task_kwargs['time_limit'] = np.inf # Hack to ignore dm_control time limit. self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs) # MDP properties action_space = self._convert_action_space(self.env.action_spec()) observation_space = self._convert_observation_space(self.env.observation_spec()) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) self._viewer = ImageViewer((width_screen, height_screen), dt) self._camera_id = camera_id super().__init__(mdp_info)
def __init__(self, name, width=84, height=84, ends_at_life=False): """ Constructor. Args: name (str): id name of the Atari game in Gym; width (int, 84): width of the screen; height (int, 84): height of the screen; ends_at_life (bool, False): whether the episode ends when a life is lost or not. """ self.__name__ = name # MPD creation self.env = gym.make(self.__name__) # MDP parameters self.img_size = (width, height) self._episode_ends_at_life = ends_at_life self._max_lives = self.env.env.ale.lives() self._lives = self._max_lives # MDP properties action_space = Discrete(self.env.action_space.n) observation_space = Box(low=0., high=255., shape=(self.img_size[1], self.img_size[0])) horizon = np.inf gamma = .99 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super(Atari, self).__init__(mdp_info)
def __init__(self, A, B, Q, R, random_init=False, gamma=0.9, horizon=50): """ Constructor. Args: A (np.ndarray): the state dynamics matrix; B (np.ndarray): the action dynamics matrix; Q (np.ndarray): reward weight matrix for state; R (np.ndarray): reward weight matrix for action; random_init (bool, False): start from a random state; gamma (float, 0.9): discount factor; horizon (int, 50): horizon of the mdp. """ self.A = A self.B = B self.Q = Q self.R = R self.random_init = random_init # MDP properties high_x = np.inf * np.ones(A.shape[0]) low_x = -high_x high_u = np.inf * np.ones(B.shape[0]) low_u = -high_u observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super(LQR, self).__init__(mdp_info)
def __init__(self, small=True): self.__name__ = 'ShipSteering' # MDP parameters self.field_size = 150 if small else 1000 low = np.array([0, 0, -np.pi, -np.pi / 12.]) high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.]) self.omega_max = np.array([np.pi / 12.]) self._v = 3. self._T = 5. self._dt = .2 self._gate_s = np.empty(2) self._gate_e = np.empty(2) self._gate_s[0] = 100 if small else 900 self._gate_s[1] = 120 if small else 920 self._gate_e[0] = 120 if small else 920 self._gate_e[1] = 100 if small else 900 # MDP properties observation_space = spaces.Box(low=low, high=high) action_space = spaces.Box(low=-self.omega_max, high=self.omega_max) horizon = 5000 gamma = .99 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super(ShipSteering, self).__init__(mdp_info)
def __init__(self, grid_map_file, height_window=84, width_window=84): self.__name__ = 'GridWorldPixelGenerator' self.window_size = (width_window, height_window) self._symbols = { '.': 0., 'S': 63.75, '*': 127.5, '#': 191.25, 'G': 255. } self._grid, start, goal = self._generate(grid_map_file) self._initial_grid = deepcopy(self._grid) height = self._grid.shape[0] width = self._grid.shape[1] assert height_window % height == 0 and width_window % width == 0 # MDP properties observation_space = spaces.Box(low=0., high=255., shape=(self.window_size[1], self.window_size[0])) action_space = spaces.Discrete(5) horizon = 100 gamma = .9 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super(GridWorldPixelGenerator, self).__init__(mdp_info, height, width, start, goal)
def __init__(self, small=True, hard=False): """ Constructor. Args: small (bool, True): whether to use a small state space or not. hard (bool, False): whether to use -100 as reward for going outside or -10000. With -100 reward the environment is considerably harder. """ self.__name__ = 'ShipSteeringStraight' # MDP parameters self.field_size = 150 low = np.array([0, 0, -np.pi, -np.pi / 12.]) high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.]) self.omega_max = np.array([np.pi / 12.]) self._v = 3. self._T = 5. self._dt = .2 self.goal_pos = np.array([140, 75]) self._out_reward = -100 self._success_reward = 100 # MDP properties observation_space = spaces.Box(low=low, high=high) action_space = spaces.Box(low=-self.omega_max, high=self.omega_max) horizon = 5000 gamma = .99 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super(ShipSteeringStraight, self).__init__(mdp_info)
def __init__(self, height, width, goal, start=(0, 0)): # MDP properties observation_space = spaces.Discrete(height * width) action_space = spaces.Discrete(4) horizon = 100 gamma = .9 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info, height, width, start, goal)
def __init__(self, start=None, goal=None, goal_threshold=.1, noise_step=.025, noise_reward=0, reward_goal=0., thrust=.05, puddle_center=None, puddle_width=None, gamma=.99, horizon=5000): """ Constructor. Args: start (np.array, None): starting position of the agent; goal (np.array, None): goal position; goal_threshold (float, .1): distance threshold of the agent from the goal to consider it reached; noise_step (float, .025): noise in actions; noise_reward (float, 0): standard deviation of gaussian noise in reward; reward_goal (float, 0): reward obtained reaching goal state; thrust (float, .05): distance walked during each action; puddle_center (np.array, None): center of the puddle; puddle_width (np.array, None): width of the puddle; """ # MDP parameters self._start = np.array([.2, .4]) if start is None else start self._goal = np.array([1., 1.]) if goal is None else goal self._goal_threshold = goal_threshold self._noise_step = noise_step self._noise_reward = noise_reward self._reward_goal = reward_goal self._thrust = thrust puddle_center = [[.3, .6], [.4, .5], [.8, .9] ] if puddle_center is None else puddle_center self._puddle_center = [np.array(center) for center in puddle_center] puddle_width = [[.1, .03], [.03, .1], [.03, .1] ] if puddle_width is None else puddle_width self._puddle_width = [np.array(width) for width in puddle_width] self._actions = [np.zeros(2) for _ in range(5)] for i in range(4): self._actions[i][i // 2] = thrust * (i % 2 * 2 - 1) # MDP properties action_space = Discrete(5) observation_space = Box(0., 1., shape=(2, )) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._pixels = None self._viewer = Viewer(1.0, 1.0) super().__init__(mdp_info)
def __init__(self, height=3, width=3, goal=(0, 2), start=(2, 0)): # MDP properties observation_space = spaces.Discrete(height * width) action_space = spaces.Discrete(4) horizon = np.inf gamma = .95 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super(GridWorldVanHasselt, self).__init__(mdp_info, height, width, start, goal)
def build_high_level_agent(alg, params, mdp, epsilon): pi = EpsGreedy(epsilon=epsilon, ) mdp_info_high = MDPInfo(observation_space=spaces.Discrete(16), action_space=spaces.Discrete(4), gamma=mdp.info.gamma, horizon=100) agent = alg(pi, mdp_info_high, **params) return agent
def __init__(self, name, width=84, height=84, ends_at_life=False, max_pooling=True, history_length=4, max_no_op_actions=30): """ Constructor. Args: name (str): id name of the Atari game in Gym; width (int, 84): width of the screen; height (int, 84): height of the screen; ends_at_life (bool, False): whether the episode ends when a life is lost or not; max_pooling (bool, True): whether to do max-pooling or average-pooling of the last two frames when using NoFrameskip; history_length (int, 4): number of frames to form a state; max_no_op_actions (int, 30): maximum number of no-op action to execute at the beginning of an episode. """ # MPD creation if 'NoFrameskip' in name: self.env = MaxAndSkip(gym.make(name), history_length, max_pooling) else: self.env = gym.make(name) # MDP parameters self.img_size = (width, height) self._episode_ends_at_life = ends_at_life self._max_lives = self.env.unwrapped.ale.lives() self._lives = self._max_lives self._force_fire = None self._real_reset = True self._max_no_op_actions = max_no_op_actions self._history_length = history_length self._current_no_op = None assert self.env.unwrapped.get_action_meanings()[0] == 'NOOP' # MDP properties action_space = Discrete(self.env.action_space.n) observation_space = Box(low=0., high=255., shape=(self.img_size[1], self.img_size[0])) horizon = np.inf # the gym time limit is used. gamma = .99 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def build_high_level_agent(alg, params, mdp): epsilon = Parameter(value=0.1) pi = EpsGreedy(epsilon=epsilon) gamma = 1.0 mdp_info_agentH = MDPInfo(observation_space=spaces.Discrete(400), action_space=spaces.Discrete(8), gamma=gamma, horizon=10000) agent = alg(policy=pi, mdp_info=mdp_info_agentH, **params) return agent
def __init__(self, grid_map): self.__name__ = 'GridWorldGenerator' self._grid, height, width, start, goal = self._generate(grid_map) # MDP properties observation_space = spaces.Discrete(height * width) action_space = spaces.Discrete(4) horizon = 100 gamma = .9 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super(GridWorldGenerator, self).__init__(mdp_info, height, width, start, goal)
def __init__(self, A, B, Q, R, max_pos=np.inf, max_action=np.inf, random_init=False, episodic=False, gamma=0.9, horizon=50): """ Constructor. Args: A (np.ndarray): the state dynamics matrix; B (np.ndarray): the action dynamics matrix; Q (np.ndarray): reward weight matrix for state; R (np.ndarray): reward weight matrix for action; max_pos (float, np.inf): maximum value of the state; max_action (float, np.inf): maximum value of the action; random_init (bool, False): start from a random state; episodic (bool, False): end the episode when the state goes over the threshold; gamma (float, 0.9): discount factor; horizon (int, 50): horizon of the mdp. """ self.A = A self.B = B self.Q = Q self.R = R self._max_pos = max_pos self._max_action = max_action self._episodic = episodic self.random_init = random_init # MDP properties high_x = self._max_pos * np.ones(A.shape[0]) low_x = -high_x high_u = self._max_action * np.ones(B.shape[0]) low_u = -high_u observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, m=2., M=8., l=.5, g=9.8, mu=1e-2, max_u=50., noise_u=10., horizon=3000, gamma=.95): """ Constructor. Args: m (float, 2.0): mass of the pendulum; M (float, 8.0): mass of the cart; l (float, .5): length of the pendulum; g (float, 9.8): gravity acceleration constant; mu (float, 1e-2): friction constant of the pendulum; max_u (float, 50.): maximum allowed input torque; noise_u (float, 10.): maximum noise on the action; horizon (int, 3000): horizon of the problem; gamma (int, .95): discount factor. """ # MDP parameters self._m = m self._M = M self._l = l self._g = g self._alpha = 1 / (self._m + self._M) self._mu = mu self._dt = .1 self._max_u = max_u self._noise_u = noise_u high = np.array([np.inf, np.inf]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(3) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(2.5 * l, 2.5 * l) self._last_u = None self._state = None super().__init__(mdp_info)
def build_high_level_agent(alg, params, mdp, mu, sigma): features = Features(basis_list=[PolynomialBasis()]) approximator = Regressor(LinearApproximator, input_shape=(features.size, ), output_shape=(2, )) approximator.set_weights(mu) pi1 = DiagonalGaussianPolicy(mu=approximator, std=sigma) lim = mdp.info.observation_space.high[0] mdp_info_agent = MDPInfo(observation_space=mdp.info.observation_space, action_space=spaces.Box(0, lim, (2, )), gamma=1.0, horizon=100) agent = alg(pi1, mdp_info_agent, features=features, **params) return agent
def __init__(self, random_start=False, m=1., l=1., g=9.8, mu=1e-2, max_u=5., horizon=5000, gamma=.99): """ Constructor. Args: random_start (bool, False): whether to start from a random position or from the horizontal one; m (float, 1.0): mass of the pendulum; l (float, 1.0): length of the pendulum; g (float, 9.8): gravity acceleration constant; mu (float, 1e-2): friction constant of the pendulum; max_u (float, 5.0): maximum allowed input torque; horizon (int, 5000): horizon of the problem; gamma (int, .99): discount factor. """ # MDP parameters self._m = m self._l = l self._g = g self._mu = mu self._random = random_start self._dt = .01 self._max_u = max_u self._max_omega = 5 / 2 * np.pi high = np.array([np.pi, self._max_omega]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Box(low=np.array([-max_u]), high=np.array([max_u])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(2.5 * l, 2.5 * l) self._last_u = None super().__init__(mdp_info)
def build_low_level_agent(alg, params, mdp): features = Features( basis_list=[PolynomialBasis(dimensions=[0], degrees=[1])]) pi = DeterministicControlPolicy(weights=np.array([0])) mu = np.zeros(pi.weights_size) sigma = 1e-3 * np.ones(pi.weights_size) distribution = GaussianDiagonalDistribution(mu, sigma) mdp_info_agent2 = MDPInfo(observation_space=spaces.Box( -np.pi, np.pi, (1, )), action_space=mdp.info.action_space, gamma=mdp.info.gamma, horizon=100) agent = alg(distribution, pi, mdp_info_agent2, features=features, **params) return agent
def build_high_level_agent(alg, params, mdp, mu, std): tilings = Tiles.generate(n_tilings=1, n_tiles=[10, 10], low=mdp.info.observation_space.low[:2], high=mdp.info.observation_space.high[:2]) features = Features(tilings=tilings) input_shape = (features.size, ) mu_approximator = Regressor(LinearApproximator, input_shape=input_shape, output_shape=(1, )) std_approximator = Regressor(LinearApproximator, input_shape=input_shape, output_shape=(1, )) w_mu = mu * np.ones(mu_approximator.weights_size) mu_approximator.set_weights(w_mu) w_std = std * np.ones(std_approximator.weights_size) mu_approximator.set_weights(w_std) pi = StateLogStdGaussianPolicy(mu=mu_approximator, log_std=std_approximator) obs_low = np.array( [mdp.info.observation_space.low[0], mdp.info.observation_space.low[1]]) obs_high = np.array([ mdp.info.observation_space.high[0], mdp.info.observation_space.high[1] ]) mdp_info_agent1 = MDPInfo(observation_space=spaces.Box(obs_low, obs_high, shape=(2, )), action_space=spaces.Box( mdp.info.observation_space.low[2], mdp.info.observation_space.high[2], shape=(1, )), gamma=1, horizon=10) agent = alg(policy=pi, mdp_info=mdp_info_agent1, features=features, **params) return agent
def __init__(self, random_start=False, goal_distance=1.0): """ Constructor. Args: random_start: whether to start from a random position or from the horizontal one """ # MDP parameters gamma = 0.99 self.Mr = 0.3 * 2 self.Mp = 2.55 self.Ip = 2.6e-2 self.Ir = 4.54e-4 * 2 self.l = 13.8e-2 self.r = 5.5e-2 self.dt = 1e-2 self.g = 9.81 self.max_u = 5 self._random = random_start self._goal_distance = goal_distance high = np.array([2 * self._goal_distance, np.pi, 15, 75]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Box(low=np.array([-self.max_u]), high=np.array([self.max_u])) horizon = 1500 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization env_width = 4 * goal_distance env_height = 2.5 * 2 * self.l width = 800 height = int(width * env_height / env_width) self._viewer = Viewer(env_width, env_height, width, height) super(SegwayLinearMotion, self).__init__(mdp_info)
def __init__(self, name, width=84, height=84, ends_at_life=False, max_pooling=True): """ Constructor. Args: name (str): id name of the Atari game in Gym; width (int, 84): width of the screen; height (int, 84): height of the screen; ends_at_life (bool, False): whether the episode ends when a life is lost or not; max_pooling (bool, True): whether to do max-pooling or average-pooling of the last two frames when using NoFrameskip. """ # MPD creation if 'NoFrameskip' in name: skip = 3 if 'SpaceInvaders' in name else 4 self.env = MaxAndSkip(gym.make(name), skip, max_pooling) else: self.env = gym.make(name) # MDP parameters self.img_size = (width, height) self._episode_ends_at_life = ends_at_life self._max_lives = self.env.unwrapped.ale.lives() self._lives = self._max_lives self._force_fire = None self._real_reset = True # MDP properties action_space = Discrete(self.env.action_space.n) observation_space = Box(low=0., high=255., shape=(self.img_size[1], self.img_size[0])) horizon = np.inf gamma = .99 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super(Atari, self).__init__(mdp_info)
def __init__(self, small=True, n_steps_action=3): """ Constructor. Args: small (bool, True): whether to use a small state space or not. n_steps_action (int, 3): number of integration intervals for each step of the mdp. """ # MDP parameters self.field_size = 150 if small else 1000 low = np.array([0, 0, -np.pi, -np.pi / 12.]) high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.]) self.omega_max = np.array([np.pi / 12.]) self._v = 3. self._T = 5. self._dt = .2 self._gate_s = np.empty(2) self._gate_e = np.empty(2) self._gate_s[0] = 100 if small else 350 self._gate_s[1] = 120 if small else 400 self._gate_e[0] = 120 if small else 450 self._gate_e[1] = 100 if small else 400 self._out_reward = -100 self._success_reward = 0 self._small = small self._state = None self.n_steps_action = n_steps_action # MDP properties observation_space = spaces.Box(low=low, high=high) action_space = spaces.Box(low=-self.omega_max, high=self.omega_max) horizon = 5000 gamma = .99 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(self.field_size, self.field_size, background=(66, 131, 237)) super(ShipSteering, self).__init__(mdp_info)
def __init__(self): self.__name__ = 'CarOnHill' # MDP parameters self.max_pos = 1. self.max_velocity = 3. high = np.array([self.max_pos, self.max_velocity]) self._g = 9.81 self._m = 1 self._dt = .1 self._discrete_actions = [-4., 4.] # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(2) horizon = 100 gamma = .95 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super(CarOnHill, self).__init__(mdp_info)
def __init__(self, horizon=100, gamma=.95): """ Constructor. """ # MDP parameters self.max_pos = 1. self.max_velocity = 3. high = np.array([self.max_pos, self.max_velocity]) self._g = 9.81 self._m = 1. self._dt = .1 self._discrete_actions = [-4., 4.] # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(2) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def build_agent_low(alg, params, std, mdp): approximator = Regressor(LinearApproximator, input_shape=(3, ), output_shape=(1, )) n_weights = approximator.weights_size mu = np.zeros(n_weights) sigma = std * np.ones(n_weights) pi = DeterministicControlPolicy(approximator) dist = GaussianDiagonalDistribution(mu, sigma) # Agent Low mdp_info = MDPInfo( observation_space=spaces.Box( low=mdp.info.observation_space.low[1:], # FIXME FALSE high=mdp.info.observation_space.high[1:], # FIXME FALSE ), action_space=mdp.info.action_space, gamma=mdp.info.gamma, horizon=mdp.info.horizon) return alg(dist, pi, mdp_info, **params)