def build_low_level_ghavamzadeh(alg, params, mdp):
    # FeaturesL
    high = [150, 150, np.pi]
    low = [0, 0, -np.pi]
    n_tiles = [5, 5, 10]
    low = np.array(low, dtype=np.float)
    high = np.array(high, dtype=np.float)
    n_tilings = 3

    tilingsL = Tiles.generate(n_tilings=n_tilings,
                              n_tiles=n_tiles,
                              low=low,
                              high=high)

    featuresL = Features(tilings=tilingsL)

    mdp_info_agentL = MDPInfo(observation_space=spaces.Box(
        low=np.array([0, 0]), high=np.array([150, 150]), shape=(2, )),
                              action_space=mdp.info.action_space,
                              gamma=0.99,
                              horizon=10000)

    input_shape = (featuresL.size, )
    approximator = Regressor(LinearApproximator,
                             input_shape=input_shape,
                             output_shape=mdp.info.action_space.shape)

    std = np.array([3e-2])
    pi = DiagonalGaussianPolicy(mu=approximator, std=std)

    agent = alg(pi, mdp_info_agentL, features=featuresL, **params)

    return agent
예제 #2
0
    def __init__(self, name, horizon, gamma):
        self.__name__ = name

        # MPD creation
        self.env = gym.make(self.__name__)

        self.env._max_episode_steps = np.inf  # Hack to ignore gym time limit.

        # MDP properties
        assert not isinstance(self.env.observation_space,
                              gym_spaces.MultiDiscrete)
        assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete)

        action_space = self._convert_gym_space(self.env.action_space)
        observation_space = self._convert_gym_space(self.env.observation_space)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = self._convert_action_function
        else:
            self._convert_action = self._no_convert

        if isinstance(observation_space,
                      Discrete) and len(observation_space.size) > 1:
            self._convert_state = self._convert_state_function
        else:
            self._convert_state = self._no_convert

        super(Gym, self).__init__(mdp_info)
예제 #3
0
    def __init__(self, **kwargs):

        # Define environment properties
        high_x = np.array([5.0, 5.0, np.pi])
        low_x = -high_x

        high_u = np.array([1.0, 3.0])
        low_u = -high_u

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)

        gamma = 0.9
        horizon = 400

        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        hz = 10.0

        super(TurtlebotGazebo, self).__init__('turtlebot_gazebo', mdp_info, hz, **kwargs)

        # subscribe to /cmd_vel topic to publish the setpoint
        self._pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1)

        # subscribe to /gazebo/model_states to get the position of the turtlebot
        model_state_service_name = '/gazebo/get_model_state'
        rospy.wait_for_service(model_state_service_name)
        self._model_state_service = rospy.ServiceProxy(model_state_service_name, GetModelState)
예제 #4
0
def build_discretized_agent(alg, params, n, optim, loss, mdp, eps, n_features,
                            use_cuda):
    high = mdp.info.observation_space.high
    low = mdp.info.observation_space.low

    observation_space = spaces.Box(low=low, high=high)
    action_space = spaces.Discrete(n)

    mdp_info = MDPInfo(observation_space=observation_space,
                       action_space=action_space,
                       gamma=mdp.info.gamma,
                       horizon=mdp.info.horizon)

    pi = Boltzmann(eps)

    approximator_params = dict(network=Network,
                               optimizer=optim,
                               loss=loss,
                               n_features=n_features,
                               input_shape=mdp_info.observation_space.shape,
                               output_shape=mdp_info.action_space.size,
                               n_actions=mdp_info.action_space.n,
                               use_cuda=use_cuda)

    agent = alg(PyTorchApproximator,
                pi,
                mdp_info,
                approximator_params=approximator_params,
                **params)

    return agent
예제 #5
0
    def __init__(self, n_steps_action=3, viz_speed=100, small=False):

        self.__name__ = 'ShipSteeringMultiGate'
        self.n_steps_action = n_steps_action
        self.viz_speed = viz_speed

        # MDP parameters
        self.no_of_gates = 4

        self.small = small

        self.field_size = 500 if small else 1000
        low = np.array([0, 0, -np.pi, -np.pi / 12., 0])
        high = np.array([
            self.field_size, self.field_size, np.pi, np.pi / 12.,
            self.no_of_gates
        ])
        self.omega_max = np.array([np.pi / 12.])
        self._v = 3.
        self._T = 5.
        self._dt = .2

        gate_1s = np.array([75, 175]) if small else np.array([150, 350])
        gate_1e = np.array([125, 175]) if small else np.array([250, 350])

        gate_1 = np.array([gate_1s, gate_1e])

        gate_2s = np.array([150, 300]) if small else np.array([300, 600])
        gate_2e = np.array([200, 300]) if small else np.array([400, 600])

        gate_2 = np.array([gate_2s, gate_2e])

        gate_3s = np.array([250, 350]) if small else np.array([500, 700])
        gate_3e = np.array([300, 350]) if small else np.array([600, 700])

        gate_3 = np.array([gate_3s, gate_3e])

        gate_4s = np.array([150, 425]) if small else np.array([300, 850])
        gate_4e = np.array([200, 425]) if small else np.array([400, 850])

        gate_4 = np.array([gate_4s, gate_4e])

        self._gate_list = gate_1, gate_2, gate_3, gate_4

        # MDP properties
        observation_space = spaces.Box(low=low, high=high)
        action_space = spaces.Box(low=-self.omega_max, high=self.omega_max)
        horizon = 5000
        gamma = .99
        self._out_reward = -10000
        self.correct_order = False

        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(self.field_size,
                              self.field_size,
                              background=(66, 131, 237))

        super(ShipSteeringMultiGate, self).__init__(mdp_info)
예제 #6
0
    def __init__(self, domain_name, task_name, horizon, gamma, task_kwargs=None,
                 dt=.01, width_screen=480, height_screen=480, camera_id=0):
        """
        Constructor.

        Args:
             domain_name (str): name of the environment;
             task_name (str): name of the task of the environment;
             horizon (int): the horizon;
             gamma (float): the discount factor;
             task_kwargs (dict, None): parameters of the task;
             dt (float, .01): duration of a control step;
             width_screen (int, 480): width of the screen;
             height_screen (int, 480): height of the screen;
             camera_id (int, 0): position of camera to render the environment;

        """
        # MDP creation
        if task_kwargs is None:
            task_kwargs = dict()
        task_kwargs['time_limit'] = np.inf  # Hack to ignore dm_control time limit.

        self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs)

        # MDP properties
        action_space = self._convert_action_space(self.env.action_spec())
        observation_space = self._convert_observation_space(self.env.observation_spec())
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        self._viewer = ImageViewer((width_screen, height_screen), dt)
        self._camera_id = camera_id

        super().__init__(mdp_info)
예제 #7
0
    def __init__(self, name, width=84, height=84, ends_at_life=False):
        """
        Constructor.

        Args:
             name (str): id name of the Atari game in Gym;
             width (int, 84): width of the screen;
             height (int, 84): height of the screen;
             ends_at_life (bool, False): whether the episode ends when a life is
                lost or not.

        """
        self.__name__ = name

        # MPD creation
        self.env = gym.make(self.__name__)

        # MDP parameters
        self.img_size = (width, height)
        self._episode_ends_at_life = ends_at_life
        self._max_lives = self.env.env.ale.lives()
        self._lives = self._max_lives

        # MDP properties
        action_space = Discrete(self.env.action_space.n)
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(self.img_size[1], self.img_size[0]))
        horizon = np.inf
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super(Atari, self).__init__(mdp_info)
예제 #8
0
    def __init__(self, A, B, Q, R, random_init=False, gamma=0.9, horizon=50):
        """
        Constructor.

            Args:
                A (np.ndarray): the state dynamics matrix;
                B (np.ndarray): the action dynamics matrix;
                Q (np.ndarray): reward weight matrix for state;
                R (np.ndarray): reward weight matrix for action;
                random_init (bool, False): start from a random state;
                gamma (float, 0.9): discount factor;
                horizon (int, 50): horizon of the mdp.

        """
        self.A = A
        self.B = B
        self.Q = Q
        self.R = R
        self.random_init = random_init

        # MDP properties
        high_x = np.inf * np.ones(A.shape[0])
        low_x = -high_x

        high_u = np.inf * np.ones(B.shape[0])
        low_u = -high_u

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super(LQR, self).__init__(mdp_info)
예제 #9
0
    def __init__(self, small=True):
        self.__name__ = 'ShipSteering'

        # MDP parameters
        self.field_size = 150 if small else 1000
        low = np.array([0, 0, -np.pi, -np.pi / 12.])
        high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.])
        self.omega_max = np.array([np.pi / 12.])
        self._v = 3.
        self._T = 5.
        self._dt = .2
        self._gate_s = np.empty(2)
        self._gate_e = np.empty(2)
        self._gate_s[0] = 100 if small else 900
        self._gate_s[1] = 120 if small else 920
        self._gate_e[0] = 120 if small else 920
        self._gate_e[1] = 100 if small else 900

        # MDP properties
        observation_space = spaces.Box(low=low, high=high)
        action_space = spaces.Box(low=-self.omega_max, high=self.omega_max)
        horizon = 5000
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super(ShipSteering, self).__init__(mdp_info)
예제 #10
0
    def __init__(self, grid_map_file, height_window=84, width_window=84):
        self.__name__ = 'GridWorldPixelGenerator'

        self.window_size = (width_window, height_window)

        self._symbols = {
            '.': 0.,
            'S': 63.75,
            '*': 127.5,
            '#': 191.25,
            'G': 255.
        }

        self._grid, start, goal = self._generate(grid_map_file)
        self._initial_grid = deepcopy(self._grid)
        height = self._grid.shape[0]
        width = self._grid.shape[1]

        assert height_window % height == 0 and width_window % width == 0

        # MDP properties
        observation_space = spaces.Box(low=0.,
                                       high=255.,
                                       shape=(self.window_size[1],
                                              self.window_size[0]))
        action_space = spaces.Discrete(5)
        horizon = 100
        gamma = .9
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super(GridWorldPixelGenerator, self).__init__(mdp_info, height, width,
                                                      start, goal)
예제 #11
0
    def __init__(self, small=True, hard=False):
        """
        Constructor.

        Args:
             small (bool, True): whether to use a small state space or not.
             hard (bool, False): whether to use -100 as reward for going
                                 outside or -10000. With -100 reward the
                                 environment is considerably harder.

        """
        self.__name__ = 'ShipSteeringStraight'

        # MDP parameters
        self.field_size = 150
        low = np.array([0, 0, -np.pi, -np.pi / 12.])
        high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.])
        self.omega_max = np.array([np.pi / 12.])
        self._v = 3.
        self._T = 5.
        self._dt = .2
        self.goal_pos = np.array([140, 75])
        self._out_reward = -100
        self._success_reward = 100

        # MDP properties
        observation_space = spaces.Box(low=low, high=high)
        action_space = spaces.Box(low=-self.omega_max, high=self.omega_max)
        horizon = 5000
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super(ShipSteeringStraight, self).__init__(mdp_info)
예제 #12
0
    def __init__(self, height, width, goal, start=(0, 0)):
        # MDP properties
        observation_space = spaces.Discrete(height * width)
        action_space = spaces.Discrete(4)
        horizon = 100
        gamma = .9
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info, height, width, start, goal)
예제 #13
0
    def __init__(self,
                 start=None,
                 goal=None,
                 goal_threshold=.1,
                 noise_step=.025,
                 noise_reward=0,
                 reward_goal=0.,
                 thrust=.05,
                 puddle_center=None,
                 puddle_width=None,
                 gamma=.99,
                 horizon=5000):
        """
        Constructor.

        Args:
            start (np.array, None): starting position of the agent;
            goal (np.array, None): goal position;
            goal_threshold (float, .1): distance threshold of the agent from the
                goal to consider it reached;
            noise_step (float, .025): noise in actions;
            noise_reward (float, 0): standard deviation of gaussian noise in reward;
            reward_goal (float, 0): reward obtained reaching goal state;
            thrust (float, .05): distance walked during each action;
            puddle_center (np.array, None): center of the puddle;
            puddle_width (np.array, None): width of the puddle;

        """
        # MDP parameters
        self._start = np.array([.2, .4]) if start is None else start
        self._goal = np.array([1., 1.]) if goal is None else goal
        self._goal_threshold = goal_threshold
        self._noise_step = noise_step
        self._noise_reward = noise_reward
        self._reward_goal = reward_goal
        self._thrust = thrust
        puddle_center = [[.3, .6], [.4, .5], [.8, .9]
                         ] if puddle_center is None else puddle_center
        self._puddle_center = [np.array(center) for center in puddle_center]
        puddle_width = [[.1, .03], [.03, .1], [.03, .1]
                        ] if puddle_width is None else puddle_width
        self._puddle_width = [np.array(width) for width in puddle_width]

        self._actions = [np.zeros(2) for _ in range(5)]
        for i in range(4):
            self._actions[i][i // 2] = thrust * (i % 2 * 2 - 1)

        # MDP properties
        action_space = Discrete(5)
        observation_space = Box(0., 1., shape=(2, ))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._pixels = None
        self._viewer = Viewer(1.0, 1.0)

        super().__init__(mdp_info)
예제 #14
0
    def __init__(self, height=3, width=3, goal=(0, 2), start=(2, 0)):
        # MDP properties
        observation_space = spaces.Discrete(height * width)
        action_space = spaces.Discrete(4)
        horizon = np.inf
        gamma = .95
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super(GridWorldVanHasselt, self).__init__(mdp_info, height, width,
                                                  start, goal)
예제 #15
0
def build_high_level_agent(alg, params, mdp, epsilon):
    pi = EpsGreedy(epsilon=epsilon, )
    mdp_info_high = MDPInfo(observation_space=spaces.Discrete(16),
                            action_space=spaces.Discrete(4),
                            gamma=mdp.info.gamma,
                            horizon=100)

    agent = alg(pi, mdp_info_high, **params)

    return agent
예제 #16
0
파일: atari.py 프로젝트: yushu-liu/mushroom
    def __init__(self,
                 name,
                 width=84,
                 height=84,
                 ends_at_life=False,
                 max_pooling=True,
                 history_length=4,
                 max_no_op_actions=30):
        """
        Constructor.

        Args:
            name (str): id name of the Atari game in Gym;
            width (int, 84): width of the screen;
            height (int, 84): height of the screen;
            ends_at_life (bool, False): whether the episode ends when a life is
               lost or not;
            max_pooling (bool, True): whether to do max-pooling or
                average-pooling of the last two frames when using NoFrameskip;
            history_length (int, 4): number of frames to form a state;
            max_no_op_actions (int, 30): maximum number of no-op action to
                execute at the beginning of an episode.

        """
        # MPD creation
        if 'NoFrameskip' in name:
            self.env = MaxAndSkip(gym.make(name), history_length, max_pooling)
        else:
            self.env = gym.make(name)

        # MDP parameters
        self.img_size = (width, height)
        self._episode_ends_at_life = ends_at_life
        self._max_lives = self.env.unwrapped.ale.lives()
        self._lives = self._max_lives
        self._force_fire = None
        self._real_reset = True
        self._max_no_op_actions = max_no_op_actions
        self._history_length = history_length
        self._current_no_op = None

        assert self.env.unwrapped.get_action_meanings()[0] == 'NOOP'

        # MDP properties
        action_space = Discrete(self.env.action_space.n)
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(self.img_size[1], self.img_size[0]))
        horizon = np.inf  # the gym time limit is used.
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
def build_high_level_agent(alg, params, mdp):
    epsilon = Parameter(value=0.1)
    pi = EpsGreedy(epsilon=epsilon)
    gamma = 1.0
    mdp_info_agentH = MDPInfo(observation_space=spaces.Discrete(400),
                              action_space=spaces.Discrete(8),
                              gamma=gamma,
                              horizon=10000)

    agent = alg(policy=pi, mdp_info=mdp_info_agentH, **params)

    return agent
예제 #18
0
    def __init__(self, grid_map):
        self.__name__ = 'GridWorldGenerator'

        self._grid, height, width, start, goal = self._generate(grid_map)

        # MDP properties
        observation_space = spaces.Discrete(height * width)
        action_space = spaces.Discrete(4)
        horizon = 100
        gamma = .9
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super(GridWorldGenerator, self).__init__(mdp_info, height, width,
                                                 start, goal)
예제 #19
0
파일: lqr.py 프로젝트: Capri2014/mushroom
    def __init__(self,
                 A,
                 B,
                 Q,
                 R,
                 max_pos=np.inf,
                 max_action=np.inf,
                 random_init=False,
                 episodic=False,
                 gamma=0.9,
                 horizon=50):
        """
        Constructor.

            Args:
                A (np.ndarray): the state dynamics matrix;
                B (np.ndarray): the action dynamics matrix;
                Q (np.ndarray): reward weight matrix for state;
                R (np.ndarray): reward weight matrix for action;
                max_pos (float, np.inf): maximum value of the state;
                max_action (float, np.inf): maximum value of the action;
                random_init (bool, False): start from a random state;
                episodic (bool, False): end the episode when the state goes over
                the threshold;
                gamma (float, 0.9): discount factor;
                horizon (int, 50): horizon of the mdp.

        """
        self.A = A
        self.B = B
        self.Q = Q
        self.R = R
        self._max_pos = max_pos
        self._max_action = max_action
        self._episodic = episodic
        self.random_init = random_init

        # MDP properties
        high_x = self._max_pos * np.ones(A.shape[0])
        low_x = -high_x

        high_u = self._max_action * np.ones(B.shape[0])
        low_u = -high_u

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
예제 #20
0
    def __init__(self,
                 m=2.,
                 M=8.,
                 l=.5,
                 g=9.8,
                 mu=1e-2,
                 max_u=50.,
                 noise_u=10.,
                 horizon=3000,
                 gamma=.95):
        """
        Constructor.

        Args:
            m (float, 2.0): mass of the pendulum;
            M (float, 8.0): mass of the cart;
            l (float, .5): length of the pendulum;
            g (float, 9.8): gravity acceleration constant;
            mu (float, 1e-2): friction constant of the pendulum;
            max_u (float, 50.): maximum allowed input torque;
            noise_u (float, 10.): maximum noise on the action;
            horizon (int, 3000): horizon of the problem;
            gamma (int, .95): discount factor.

        """
        # MDP parameters
        self._m = m
        self._M = M
        self._l = l
        self._g = g
        self._alpha = 1 / (self._m + self._M)
        self._mu = mu
        self._dt = .1
        self._max_u = max_u
        self._noise_u = noise_u
        high = np.array([np.inf, np.inf])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(3)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(2.5 * l, 2.5 * l)
        self._last_u = None
        self._state = None

        super().__init__(mdp_info)
def build_high_level_agent(alg, params, mdp, mu, sigma):
    features = Features(basis_list=[PolynomialBasis()])
    approximator = Regressor(LinearApproximator,
                             input_shape=(features.size, ),
                             output_shape=(2, ))
    approximator.set_weights(mu)

    pi1 = DiagonalGaussianPolicy(mu=approximator, std=sigma)

    lim = mdp.info.observation_space.high[0]
    mdp_info_agent = MDPInfo(observation_space=mdp.info.observation_space,
                             action_space=spaces.Box(0, lim, (2, )),
                             gamma=1.0,
                             horizon=100)
    agent = alg(pi1, mdp_info_agent, features=features, **params)

    return agent
예제 #22
0
    def __init__(self,
                 random_start=False,
                 m=1.,
                 l=1.,
                 g=9.8,
                 mu=1e-2,
                 max_u=5.,
                 horizon=5000,
                 gamma=.99):
        """
        Constructor.

        Args:
            random_start (bool, False): whether to start from a random position
                or from the horizontal one;
            m (float, 1.0): mass of the pendulum;
            l (float, 1.0): length of the pendulum;
            g (float, 9.8): gravity acceleration constant;
            mu (float, 1e-2): friction constant of the pendulum;
            max_u (float, 5.0): maximum allowed input torque;
            horizon (int, 5000): horizon of the problem;
            gamma (int, .99): discount factor.

        """
        # MDP parameters
        self._m = m
        self._l = l
        self._g = g
        self._mu = mu
        self._random = random_start
        self._dt = .01
        self._max_u = max_u
        self._max_omega = 5 / 2 * np.pi
        high = np.array([np.pi, self._max_omega])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Box(low=np.array([-max_u]),
                                  high=np.array([max_u]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(2.5 * l, 2.5 * l)
        self._last_u = None

        super().__init__(mdp_info)
예제 #23
0
def build_low_level_agent(alg, params, mdp):
    features = Features(
        basis_list=[PolynomialBasis(dimensions=[0], degrees=[1])])

    pi = DeterministicControlPolicy(weights=np.array([0]))
    mu = np.zeros(pi.weights_size)
    sigma = 1e-3 * np.ones(pi.weights_size)
    distribution = GaussianDiagonalDistribution(mu, sigma)

    mdp_info_agent2 = MDPInfo(observation_space=spaces.Box(
        -np.pi, np.pi, (1, )),
                              action_space=mdp.info.action_space,
                              gamma=mdp.info.gamma,
                              horizon=100)
    agent = alg(distribution, pi, mdp_info_agent2, features=features, **params)

    return agent
예제 #24
0
def build_high_level_agent(alg, params, mdp, mu, std):
    tilings = Tiles.generate(n_tilings=1,
                             n_tiles=[10, 10],
                             low=mdp.info.observation_space.low[:2],
                             high=mdp.info.observation_space.high[:2])
    features = Features(tilings=tilings)

    input_shape = (features.size, )

    mu_approximator = Regressor(LinearApproximator,
                                input_shape=input_shape,
                                output_shape=(1, ))
    std_approximator = Regressor(LinearApproximator,
                                 input_shape=input_shape,
                                 output_shape=(1, ))

    w_mu = mu * np.ones(mu_approximator.weights_size)
    mu_approximator.set_weights(w_mu)

    w_std = std * np.ones(std_approximator.weights_size)
    mu_approximator.set_weights(w_std)

    pi = StateLogStdGaussianPolicy(mu=mu_approximator,
                                   log_std=std_approximator)

    obs_low = np.array(
        [mdp.info.observation_space.low[0], mdp.info.observation_space.low[1]])
    obs_high = np.array([
        mdp.info.observation_space.high[0], mdp.info.observation_space.high[1]
    ])
    mdp_info_agent1 = MDPInfo(observation_space=spaces.Box(obs_low,
                                                           obs_high,
                                                           shape=(2, )),
                              action_space=spaces.Box(
                                  mdp.info.observation_space.low[2],
                                  mdp.info.observation_space.high[2],
                                  shape=(1, )),
                              gamma=1,
                              horizon=10)
    agent = alg(policy=pi,
                mdp_info=mdp_info_agent1,
                features=features,
                **params)

    return agent
예제 #25
0
    def __init__(self, random_start=False, goal_distance=1.0):
        """
        Constructor.

        Args:
            random_start: whether to start from a random position or from the
                          horizontal one

        """
        # MDP parameters

        gamma = 0.99

        self.Mr = 0.3 * 2
        self.Mp = 2.55
        self.Ip = 2.6e-2
        self.Ir = 4.54e-4 * 2
        self.l = 13.8e-2
        self.r = 5.5e-2
        self.dt = 1e-2
        self.g = 9.81
        self.max_u = 5

        self._random = random_start
        self._goal_distance = goal_distance

        high = np.array([2 * self._goal_distance, np.pi, 15, 75])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Box(low=np.array([-self.max_u]),
                                  high=np.array([self.max_u]))
        horizon = 1500
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        env_width = 4 * goal_distance
        env_height = 2.5 * 2 * self.l
        width = 800
        height = int(width * env_height / env_width)

        self._viewer = Viewer(env_width, env_height, width, height)

        super(SegwayLinearMotion, self).__init__(mdp_info)
예제 #26
0
    def __init__(self,
                 name,
                 width=84,
                 height=84,
                 ends_at_life=False,
                 max_pooling=True):
        """
        Constructor.

        Args:
            name (str): id name of the Atari game in Gym;
            width (int, 84): width of the screen;
            height (int, 84): height of the screen;
            ends_at_life (bool, False): whether the episode ends when a life is
               lost or not;
            max_pooling (bool, True): whether to do max-pooling or
                average-pooling of the last two frames when using NoFrameskip.

        """
        # MPD creation
        if 'NoFrameskip' in name:
            skip = 3 if 'SpaceInvaders' in name else 4
            self.env = MaxAndSkip(gym.make(name), skip, max_pooling)
        else:
            self.env = gym.make(name)

        # MDP parameters
        self.img_size = (width, height)
        self._episode_ends_at_life = ends_at_life
        self._max_lives = self.env.unwrapped.ale.lives()
        self._lives = self._max_lives
        self._force_fire = None
        self._real_reset = True

        # MDP properties
        action_space = Discrete(self.env.action_space.n)
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(self.img_size[1], self.img_size[0]))
        horizon = np.inf
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super(Atari, self).__init__(mdp_info)
예제 #27
0
    def __init__(self, small=True, n_steps_action=3):
        """
        Constructor.

        Args:
             small (bool, True): whether to use a small state space or not.
             n_steps_action (int, 3): number of integration intervals for each
                                      step of the mdp.

        """
        # MDP parameters
        self.field_size = 150 if small else 1000
        low = np.array([0, 0, -np.pi, -np.pi / 12.])
        high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.])
        self.omega_max = np.array([np.pi / 12.])
        self._v = 3.
        self._T = 5.
        self._dt = .2
        self._gate_s = np.empty(2)
        self._gate_e = np.empty(2)
        self._gate_s[0] = 100 if small else 350
        self._gate_s[1] = 120 if small else 400
        self._gate_e[0] = 120 if small else 450
        self._gate_e[1] = 100 if small else 400
        self._out_reward = -100
        self._success_reward = 0
        self._small = small
        self._state = None
        self.n_steps_action = n_steps_action

        # MDP properties
        observation_space = spaces.Box(low=low, high=high)
        action_space = spaces.Box(low=-self.omega_max, high=self.omega_max)
        horizon = 5000
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(self.field_size,
                              self.field_size,
                              background=(66, 131, 237))

        super(ShipSteering, self).__init__(mdp_info)
예제 #28
0
    def __init__(self):
        self.__name__ = 'CarOnHill'

        # MDP parameters
        self.max_pos = 1.
        self.max_velocity = 3.
        high = np.array([self.max_pos, self.max_velocity])
        self._g = 9.81
        self._m = 1
        self._dt = .1
        self._discrete_actions = [-4., 4.]

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(2)
        horizon = 100
        gamma = .95
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super(CarOnHill, self).__init__(mdp_info)
예제 #29
0
    def __init__(self, horizon=100, gamma=.95):
        """
        Constructor.

        """
        # MDP parameters
        self.max_pos = 1.
        self.max_velocity = 3.
        high = np.array([self.max_pos, self.max_velocity])
        self._g = 9.81
        self._m = 1.
        self._dt = .1
        self._discrete_actions = [-4., 4.]

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(2)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
예제 #30
0
def build_agent_low(alg, params, std, mdp):
    approximator = Regressor(LinearApproximator,
                             input_shape=(3, ),
                             output_shape=(1, ))
    n_weights = approximator.weights_size
    mu = np.zeros(n_weights)
    sigma = std * np.ones(n_weights)
    pi = DeterministicControlPolicy(approximator)
    dist = GaussianDiagonalDistribution(mu, sigma)

    # Agent Low
    mdp_info = MDPInfo(
        observation_space=spaces.Box(
            low=mdp.info.observation_space.low[1:],  # FIXME FALSE
            high=mdp.info.observation_space.high[1:],  # FIXME FALSE
        ),
        action_space=mdp.info.action_space,
        gamma=mdp.info.gamma,
        horizon=mdp.info.horizon)

    return alg(dist, pi, mdp_info, **params)