Ejemplo n.º 1
0
    def __init__(self, name, horizon, gamma):
        """
        Constructor.

        Args:
             name (str): gym id of the environment;
             horizon (int): the horizon;
             gamma (float): the discount factor.

        """
        # MDP creation
        if '- ' + name in pybullet_envs.getList():
            import pybullet
            pybullet.connect(pybullet.DIRECT)

        self.env = gym.make(name)

        self.env._max_episode_steps = np.inf  # Hack to ignore gym time limit.

        # MDP properties
        assert not isinstance(self.env.observation_space,
                              gym_spaces.MultiDiscrete)
        assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete)

        action_space = self._convert_gym_space(self.env.action_space)
        observation_space = self._convert_gym_space(self.env.observation_space)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        super().__init__(mdp_info)
Ejemplo n.º 2
0
    def __init__(self, horizon=100, gamma=.95):
        """
        Constructor.

        Args:
            horizon (int, 100): horizon of the problem;
            gamma (float, .95): discount factor.

        """
        # MDP parameters
        self.max_pos = 1.
        self.max_velocity = 3.
        high = np.array([self.max_pos, self.max_velocity])
        self._g = 9.81
        self._m = 1.
        self._dt = .1
        self._discrete_actions = [-4., 4.]

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(2)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(1, 1)

        super().__init__(mdp_info)
Ejemplo n.º 3
0
    def __init__(self, **kwargs):

        # Define environment properties
        high_x = np.array([5.0, 5.0, np.pi])
        low_x = -high_x

        high_u = np.array([1.0, 3.0])
        low_u = -high_u

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)

        gamma = 0.9
        horizon = 400

        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        hz = 10.0

        super(TurtlebotGazebo, self).__init__('turtlebot_gazebo', mdp_info, hz, **kwargs)

        # subscribe to /cmd_vel topic to publish the setpoint
        self._pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1)

        # subscribe to /gazebo/model_states to get the position of the turtlebot
        model_state_service_name = '/gazebo/get_model_state'
        rospy.wait_for_service(model_state_service_name)
        self._model_state_service = rospy.ServiceProxy(model_state_service_name, GetModelState)
    def __init__(self, gamma=0.995, horizon=1500, debug_gui=False, kine_type='analytical'):
        self.simulation = AirHockeyBase(gamma, horizon, debug_gui)
        self.kin_type = kine_type

        # [X, Y, Pitch, Yaw, Psi]
        low = np.array([0.6, -0.5]) #, np.deg2rad(-180), np.deg2rad(-10), 0])
        high = np.array([1.1, 0.5]) #, np.deg2rad(180), np.deg2rad(75), 0])
        action_space = Box(low, high)

        observation_space = self.simulation._mdp_info.observation_space
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)
        super().__init__(mdp_info)

        self.table_surface_height = 0.162
        self.iiwa_global_config = [1, -1, 1]
        self.fixed_parameters = [0., np.deg2rad(-35), 1.7]
        # self.fixed_parameters = [0., np.deg2rad(-35), 0]

        self.get_initial_state()

        if self.kin_type == 'analytical':
            self.kinematics = Kinematics()
        elif self.kin_type == 'pykdl' and KDLInterface is not None:
            self.kinematics = KDLInterface(self.simulation.robot_path, 7, 'front_base', 'F_link_ee',
                                           np.array(IIWA_JOINT_MIN_LIMITS), np.array(IIWA_JOINT_MAX_LIMITS),
                                           np.array([0., 0., -9.81]))
        else:
            raise NotImplementedError
Ejemplo n.º 5
0
    def __init__(self,
                 start=None,
                 goal=None,
                 goal_threshold=.1,
                 noise_step=.025,
                 noise_reward=0,
                 reward_goal=0.,
                 thrust=.05,
                 puddle_center=None,
                 puddle_width=None,
                 gamma=.99,
                 horizon=5000):
        """
        Constructor.

        Args:
            start (np.array, None): starting position of the agent;
            goal (np.array, None): goal position;
            goal_threshold (float, .1): distance threshold of the agent from the
                goal to consider it reached;
            noise_step (float, .025): noise in actions;
            noise_reward (float, 0): standard deviation of gaussian noise in reward;
            reward_goal (float, 0): reward obtained reaching goal state;
            thrust (float, .05): distance walked during each action;
            puddle_center (np.array, None): center of the puddle;
            puddle_width (np.array, None): width of the puddle;
            gamma (float, .99): discount factor.
            horizon (int, 5000): horizon of the problem;

        """
        # MDP parameters
        self._start = np.array([.2, .4]) if start is None else start
        self._goal = np.array([1., 1.]) if goal is None else goal
        self._goal_threshold = goal_threshold
        self._noise_step = noise_step
        self._noise_reward = noise_reward
        self._reward_goal = reward_goal
        self._thrust = thrust
        puddle_center = [[.3, .6], [.4, .5], [.8, .9]
                         ] if puddle_center is None else puddle_center
        self._puddle_center = [np.array(center) for center in puddle_center]
        puddle_width = [[.1, .03], [.03, .1], [.03, .1]
                        ] if puddle_width is None else puddle_width
        self._puddle_width = [np.array(width) for width in puddle_width]

        self._actions = [np.zeros(2) for _ in range(5)]
        for i in range(4):
            self._actions[i][i // 2] = thrust * (i % 2 * 2 - 1)

        # MDP properties
        action_space = Discrete(5)
        observation_space = Box(0., 1., shape=(2, ))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._pixels = None
        self._viewer = Viewer(1.0, 1.0)

        super().__init__(mdp_info)
Ejemplo n.º 6
0
    def __init__(self, height=3, width=3, goal=(0, 2), start=(2, 0)):
        # MDP properties
        observation_space = spaces.Discrete(height * width)
        action_space = spaces.Discrete(4)
        horizon = np.inf
        gamma = .95
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info, height, width, start, goal)
Ejemplo n.º 7
0
    def __init__(self, name, horizon, gamma, wrappers=None, wrappers_args=None,
                 **env_args):
        """
        Constructor.

        Args:
             name (str): gym id of the environment;
             horizon (int): the horizon;
             gamma (float): the discount factor;
             wrappers (list): list of wrappers to apply over the environment. It
                is possible to pass arguments to the wrappers by providing
                a tuple with two elements: the gym wrapper class and a
                dictionary containing the parameters needed by the wrapper
                constructor;
            wrappers_args (list): list of list of arguments for each wrapper;
            ** env_args: other gym environment parameters.

        """

        # MDP creation
        self._not_pybullet = True
        self._first = True
        if pybullet_found and '- ' + name in pybullet_envs.getList():
            import pybullet
            pybullet.connect(pybullet.DIRECT)
            self._not_pybullet = False

        self.env = gym.make(name, **env_args)

        if wrappers is not None:
            if wrappers_args is None:
                wrappers_args = [dict()] * len(wrappers)
            for wrapper, args in zip(wrappers, wrappers_args):
                if isinstance(wrapper, tuple):
                    self.env = wrapper[0](self.env, *args, **wrapper[1])
                else:
                    self.env = wrapper(self.env, *args, **env_args)

        self.env._max_episode_steps = np.inf  # Hack to ignore gym time limit.

        # MDP properties
        assert not isinstance(self.env.observation_space,
                              gym_spaces.MultiDiscrete)
        assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete)

        action_space = self._convert_gym_space(self.env.action_space)
        observation_space = self._convert_gym_space(self.env.observation_space)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        super().__init__(mdp_info)
Ejemplo n.º 8
0
    def __init__(self,
                 name,
                 width=84,
                 height=84,
                 ends_at_life=False,
                 max_pooling=True,
                 history_length=4,
                 max_no_op_actions=30):
        """
        Constructor.

        Args:
            name (str): id name of the Atari game in Gym;
            width (int, 84): width of the screen;
            height (int, 84): height of the screen;
            ends_at_life (bool, False): whether the episode ends when a life is
               lost or not;
            max_pooling (bool, True): whether to do max-pooling or
                average-pooling of the last two frames when using NoFrameskip;
            history_length (int, 4): number of frames to form a state;
            max_no_op_actions (int, 30): maximum number of no-op action to
                execute at the beginning of an episode.

        """
        # MPD creation
        if 'NoFrameskip' in name:
            self.env = MaxAndSkip(gym.make(name), history_length, max_pooling)
        else:
            self.env = gym.make(name)

        # MDP parameters
        self._img_size = (width, height)
        self._episode_ends_at_life = ends_at_life
        self._max_lives = self.env.unwrapped.ale.lives()
        self._lives = self._max_lives
        self._force_fire = None
        self._real_reset = True
        self._max_no_op_actions = max_no_op_actions
        self._history_length = history_length
        self._current_no_op = None

        assert self.env.unwrapped.get_action_meanings()[0] == 'NOOP'

        # MDP properties
        action_space = Discrete(self.env.action_space.n)
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(history_length, self._img_size[1],
                                       self._img_size[0]))
        horizon = np.inf  # the gym time limit is used.
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
Ejemplo n.º 9
0
    def __init__(self,
                 A,
                 B,
                 Q,
                 R,
                 max_pos=np.inf,
                 max_action=np.inf,
                 random_init=False,
                 episodic=False,
                 gamma=0.9,
                 horizon=50,
                 initial_state=None):
        """
        Constructor.

            Args:
                A (np.ndarray): the state dynamics matrix;
                B (np.ndarray): the action dynamics matrix;
                Q (np.ndarray): reward weight matrix for state;
                R (np.ndarray): reward weight matrix for action;
                max_pos (float, np.inf): maximum value of the state;
                max_action (float, np.inf): maximum value of the action;
                random_init (bool, False): start from a random state;
                episodic (bool, False): end the episode when the state goes over
                the threshold;
                gamma (float, 0.9): discount factor;
                horizon (int, 50): horizon of the mdp.

        """
        self.A = A
        self.B = B
        self.Q = Q
        self.R = R
        self._max_pos = max_pos
        self._max_action = max_action
        self._episodic = episodic
        self.random_init = random_init

        self._initial_state = initial_state

        # MDP properties
        high_x = self._max_pos * np.ones(A.shape[0])
        low_x = -high_x

        high_u = self._max_action * np.ones(B.shape[1])
        low_u = -high_u

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
Ejemplo n.º 10
0
    def __init__(self,
                 m=2.,
                 M=8.,
                 l=.5,
                 g=9.8,
                 mu=1e-2,
                 max_u=50.,
                 noise_u=10.,
                 horizon=3000,
                 gamma=.95):
        """
        Constructor.

        Args:
            m (float, 2.0): mass of the pendulum;
            M (float, 8.0): mass of the cart;
            l (float, .5): length of the pendulum;
            g (float, 9.8): gravity acceleration constant;
            max_u (float, 50.): maximum allowed input torque;
            noise_u (float, 10.): maximum noise on the action;
            horizon (int, 3000): horizon of the problem;
            gamma (float, .95): discount factor.

        """
        # MDP parameters
        self._m = m
        self._M = M
        self._l = l
        self._g = g
        self._alpha = 1 / (self._m + self._M)
        self._mu = mu
        self._dt = .1
        self._max_u = max_u
        self._noise_u = noise_u
        high = np.array([np.inf, np.inf])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(3)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(2.5 * l, 2.5 * l)
        self._last_u = None
        self._state = None

        super().__init__(mdp_info)
Ejemplo n.º 11
0
    def __init__(self, items, gamma, horizon, trans_model_abs_path, item_dist=None):
        # MDP parameters

        # 1) discrete actions: list of item names or representing integers
        # 2) actions on n-dimensional space: list of a pair of min and max values per action
        self.items = items
        self.action_dim = len(self.items)
        if item_dist is None:
            if len(self.items.shape) == 1:
                if 'none' in self.items:
                    self.item_dist = np.zeros(self.action_dim)
                    self.item_dist[1:] = 1/(self.action_dim-1)
                else:
                    self.item_dist = 1/(self.action_dim)
            else:
                self.item_dist = None
        else:
            self.item_dist = item_dist
        self.gamma = gamma    ## discount factor
        self.horizon = horizon    ## time limit to long
        self.trans_model = ModelMaker(FlexibleTorchModel, model_dir_path=trans_model_abs_path)
        self.trans_model_params = self.trans_model.model.state_dict()
        tmp = list(self.trans_model_params.keys())
        key = list(filter(lambda x: '0.weight' in x, tmp))[0]
        self.state_dim = self.trans_model_params[key].shape[1] - self.action_dim
        if 'none' in self.items:
            self.state_dim += 1

        MM_VAL = 100
        self.min_point = np.ones(self.state_dim) * -MM_VAL
        self.max_point = np.ones(self.state_dim) * MM_VAL
        
        if len(self.items.shape) == 1:
            self._discrete_actions = list(range(self.action_dim))
        else:
            self._discrete_actions = None

        # MDP properties
        observation_space = spaces.Box(low=self.min_point, high=self.max_point)
        if len(self.items.shape) == 1:
            action_space = spaces.Discrete(self.action_dim)
        else:
            action_space = spaces.Box(low=self.items[0][0], high=self.items[0][1])
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
Ejemplo n.º 12
0
    def __init__(self, game, horizon, gamma, obs_shape=None, **env_args):
        self._close_at_stop = True

        self.env = retro.make(game=game, **env_args)

        self.env._max_episode_steps = np.inf

        observation_space = None
        if (obs_shape == None):
            observation_space = self._convert_gym_space(
                self.env.observation_space)
        else:
            observation_space = Discrete(obs_shape)

        action_space = self._convert_gym_space(self.env.action_space)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
Ejemplo n.º 13
0
    def __init__(self, small=True, n_steps_action=3):
        """
        Constructor.

        Args:
             small (bool, True): whether to use a small state space or not.
             n_steps_action (int, 3): number of integration intervals for each
                                      step of the mdp.

        """
        # MDP parameters
        self.field_size = 150 if small else 1000
        low = np.array([0, 0, -np.pi, -np.pi / 12.])
        high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.])
        self.omega_max = np.array([np.pi / 12.])
        self._v = 3.
        self._T = 5.
        self._dt = .2
        self._gate_s = np.empty(2)
        self._gate_e = np.empty(2)
        self._gate_s[0] = 100 if small else 350
        self._gate_s[1] = 120 if small else 400
        self._gate_e[0] = 120 if small else 450
        self._gate_e[1] = 100 if small else 400
        self._out_reward = -100
        self._success_reward = 0
        self._small = small
        self._state = None
        self.n_steps_action = n_steps_action

        # MDP properties
        observation_space = spaces.Box(low=low, high=high)
        action_space = spaces.Box(low=-self.omega_max, high=self.omega_max)
        horizon = 5000
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(self.field_size,
                              self.field_size,
                              background=(66, 131, 237))

        super().__init__(mdp_info)
Ejemplo n.º 14
0
    def __init__(self,
                 domain_name,
                 task_name,
                 horizon,
                 gamma,
                 task_kwargs=None,
                 dt=.01,
                 width_screen=480,
                 height_screen=480,
                 camera_id=0):
        """
        Constructor.

        Args:
             domain_name (str): name of the environment;
             task_name (str): name of the task of the environment;
             horizon (int): the horizon;
             gamma (float): the discount factor;
             task_kwargs (dict, None): parameters of the task;
             dt (float, .01): duration of a control step;
             width_screen (int, 480): width of the screen;
             height_screen (int, 480): height of the screen;
             camera_id (int, 0): position of camera to render the environment;

        """
        # MDP creation
        if task_kwargs is None:
            task_kwargs = dict()
        task_kwargs[
            'time_limit'] = np.inf  # Hack to ignore dm_control time limit.

        self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs)

        # MDP properties
        action_space = self._convert_action_space(self.env.action_spec())
        observation_space = self._convert_observation_space(
            self.env.observation_spec())
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        self._viewer = ImageViewer((width_screen, height_screen), dt)
        self._camera_id = camera_id

        super().__init__(mdp_info)
Ejemplo n.º 15
0
    def __init__(self, name, horizon, gamma, **kwargs):
        """
        Constructor.

        Args:
             name (str): gym id of the environment;
             horizon (int): the horizon;
             gamma (float): the discount factor.

        """
        # MDP creation
        self._close_at_stop = True
        if '- ' + name in pybullet_envs.getList():
            import pybullet
            pybullet.connect(pybullet.DIRECT)
            self._close_at_stop = False

        self.env = gym.make(name)

        # set to 100Hz for pendulum
        if name == 'Pendulum-ID-v1' or name == 'Cartpole-ID-v1':
            self.env._max_episode_steps = 1000
            self.env.unwrapped._dt = 0.01
            self.env.unwrapped._sigma = 1e-4

        self.env._max_episode_steps = np.inf  # Hack to ignore gym time limit.

        # MDP properties
        assert not isinstance(self.env.observation_space,
                              gym_spaces.MultiDiscrete)
        assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete)

        action_space = self._convert_gym_space(self.env.action_space)
        observation_space = self._convert_gym_space(self.env.observation_space)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        super().__init__(mdp_info)
Ejemplo n.º 16
0
    def __init__(self, m, g, a, horizon=100, gamma=.95):
        """
        Constructor.

        """
        # MDP parameters
        self.max_pos = 1.
        self.max_velocity = 3.
        high = np.array([self.max_pos, self.max_velocity])
        self._g = g
        self._m = m
        self._dt = .1
        self._discrete_actions = [-a, a]

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(2)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
Ejemplo n.º 17
0
    def __init__(self, random_start=False, m=1., l=1., g=9.8, mu=1e-2,
                 max_u=5., horizon=5000, gamma=.99):
        """
        Constructor.

        Args:
            random_start (bool, False): whether to start from a random position
                or from the horizontal one;
            m (float, 1.0): mass of the pendulum;
            l (float, 1.0): length of the pendulum;
            g (float, 9.8): gravity acceleration constant;
            mu (float, 1e-2): friction constant of the pendulum;
            max_u (float, 5.0): maximum allowed input torque;
            horizon (int, 5000): horizon of the problem;
            gamma (int, .99): discount factor.

        """
        # MDP parameters
        self._m = m
        self._l = l
        self._g = g
        self._mu = mu
        self._random = random_start
        self._dt = .01
        self._max_u = max_u
        self._max_omega = 5 / 2 * np.pi
        high = np.array([np.pi, self._max_omega])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Box(low=np.array([-max_u]),
                                  high=np.array([max_u]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(2.5 * l, 2.5 * l)
        self._last_u = None

        super().__init__(mdp_info)
Ejemplo n.º 18
0
    def __init__(self, random_start=False):
        """
        Constructor.

        Args:
            random_start (bool, False): whether to start from a random position
                or from the horizontal one.

        """
        # MDP parameters
        gamma = 0.97

        self._Mr = 0.3 * 2
        self._Mp = 2.55
        self._Ip = 2.6e-2
        self._Ir = 4.54e-4 * 2
        self._l = 13.8e-2
        self._r = 5.5e-2
        self._dt = 1e-2
        self._g = 9.81
        self._max_u = 5

        self._random = random_start

        high = np.array([-np.pi / 2, 15, 75])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Box(low=np.array([-self._max_u]),
                                  high=np.array([self._max_u]))
        horizon = 300
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(5 * self._l, 5 * self._l)
        self._last_x = 0

        super().__init__(mdp_info)
Ejemplo n.º 19
0
    def __init__(self, **kwargs):

        # Define environment properties
        high_x = np.array([11.088889122009277, 11.088889122009277, np.pi])
        low_x = np.array([0, 0, -np.pi])

        high_u = np.array([2.0, 2.0])
        low_u = np.array([0., -2.0])

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)

        gamma = 0.9
        horizon = 100

        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        hz = 10.0

        self._target = np.array([2.0, 2.0, 0.0])
        self._current_pose = np.zeros(3)

        super().__init__('turtlesim_env', mdp_info, hz, **kwargs)

        # subscribe to /turtle1/cmd_vel topic to publish the setpoint
        self._cmd = rospy.Publisher('/turtle1/cmd_vel', Twist, queue_size=1)

        # subscribe to /turtle1/pose to get the turtle pose
        self._pose = rospy.Subscriber('/turtle1/pose', Pose,
                                      self._pose_callback)

        # subscribe to the teleporting service to reset environment
        self._reset_service_name = '/turtle1/teleport_absolute'
        rospy.wait_for_service(self._reset_service_name)
        self._reset_service = rospy.ServiceProxy(self._reset_service_name,
                                                 TeleportAbsolute)
Ejemplo n.º 20
0
    def __init__(self, file_name, actuation_spec, observation_spec, gamma,
                 horizon, n_substeps=1, n_intermediate_steps=1, additional_data_spec=None,
                 collision_groups=None):
        """
        Constructor.

        Args:
            file_name (string): The path to the XML file with which the
                environment should be created;
            actuation_spec (list): A list specifying the names of the joints
                which should be controllable by the agent. Can be left empty
                when all actuators should be used;
            observation_spec (list): A list containing the names of data that
                should be made available to the agent as an observation and
                their type (ObservationType). An entry in the list is given by:
                (name, type);
            gamma (float): The discounting factor of the environment;
            horizon (int): The maximum horizon for the environment;
            n_substeps (int): The number of substeps to use by the MuJoCo
                simulator. An action given by the agent will be applied for
                n_substeps before the agent receives the next observation and
                can act accordingly;
            n_intermediate_steps (int): The number of steps between every action
                taken by the agent. Similar to n_substeps but allows the user
                to modify, control and access intermediate states.
            additional_data_spec (list): A list containing the data fields of
                interest, which should be read from or written to during
                simulation. The entries are given as the following tuples:
                (key, name, type) key is a string for later referencing in the
                "read_data" and "write_data" methods. The name is the name of
                the object in the XML specification and the type is the
                ObservationType;
            collision_groups (list): A list containing groups of geoms for
                which collisions should be checked during simulation via
                ``check_collision``. The entries are given as:
                ``(key, geom_names)``, where key is a string for later
                referencing in the "check_collision" method, and geom_names is
                a list of geom names in the XML specification.
        """
        # Create the simulation
        self.sim = mujoco_py.MjSim(mujoco_py.load_model_from_path(file_name),
                                   nsubsteps=n_substeps)

        self.n_intermediate_steps = n_intermediate_steps
        self.viewer = None
        self._state = None

        # Read the actuation spec and build the mapping between actions and ids
        # as well as their limits
        if len(actuation_spec) == 0:
            self.action_indices = [i for i in range(0, len(self.sim.model._actuator_name2id))]
        else:
            self.action_indices = []
            for name in actuation_spec:
                self.action_indices.append(self.sim.model._actuator_name2id[name])

        low = []
        high = []
        for index in self.action_indices:
            if self.sim.model.actuator_ctrllimited[index]:
                low.append(self.sim.model.actuator_ctrlrange[index][0])
                high.append(self.sim.model.actuator_ctrlrange[index][1])
            else:
                low.append(-np.inf)
                high.append(np.inf)
        action_space = Box(np.array(low), np.array(high))

        # Read the observation spec to build a mapping at every step. It is
        # ensured that the values appear in the order they are specified.
        if len(observation_spec) == 0:
            raise AttributeError("No Environment observations were specified. "
                                 "Add at least one observation to the observation_spec.")
        else:
            self.observation_map = observation_spec

        # We can only specify limits for the joint positions, all other
        # information can be potentially unbounded
        low = []
        high = []
        for name, ot in self.observation_map:
            obs_count = len(self._observation_map(name, ot))
            if obs_count == 1 and ot == ObservationType.JOINT_POS:
                joint_range = self.sim.model.jnt_range[self.sim.model._joint_name2id[name]]
                if not(joint_range[0] == joint_range[1] == 0.0):
                    low.append(joint_range[0])
                    high.append(joint_range[1])
                else:
                    low.extend(-np.inf)
                    high.extend(np.inf)
            else:
                low.extend([-np.inf] * obs_count)
                high.extend([np.inf] * obs_count)
        observation_space = Box(np.array(low), np.array(high))

        # Pre-process the additional data to allow easier writing and reading
        # to and from arrays in MuJoCo
        self.additional_data = {}
        for key, name, ot in additional_data_spec:
            self.additional_data[key] = (name, ot)

        # Pre-process the collision groups for "fast" detection of contacts
        self.collision_groups = {}
        if self.collision_groups is not None:
            for name, geom_names in collision_groups:
                self.collision_groups[name] = {self.sim.model._geom_name2id[geom_name]
                                               for geom_name in geom_names}

        # Finally, we create the MDP information and call the constructor of
        # the parent class
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)
        super().__init__(mdp_info)
Ejemplo n.º 21
0
    def __init__(self, database, area, observation, delta_degree=0.1, speed=5., gamma=1, start_hour=8, end_hour=22, days_loaded=1, train_days=None, add_time=False, allow_wait=False, project_dir=None):
        # self.area = area
        self.next_event = None
        self.time = None
        self.position = None
        self.done = False
        self.speed = speed
        self.departures = None
        self.spot_states = None
        self.violation_times = None
        self.potential_violation_times = None
        self.allowed_durations = None
        self.delayed_arrival = None
        self.observation = observation
        self.render_images = None
        self.position_plots = None
        self.start_hour = start_hour
        self.end_hour = end_hour
        self.add_time = add_time
        self.days_loaded = days_loaded
        self.days_consumed = np.inf
        self.day = None
        self.end_day = None

        self.train_days = train_days if train_days is not None else list(range(1, 356))

        conn = sqlite3.connect('file:%s?mode=ro' % database, uri=True)
        cursor = conn.cursor()
        if area is None:
            self.areas = []
        elif isinstance(area, list) or isinstance(area, tuple):
            self.areas = area
        else:
            self.areas = [area]
        if len(self.areas) == 0:
            where_areas = " 1 = 1 "  # if no area specified, use all!
        else:
            where_areas = " or ".join(map(lambda a: " Area=\"" + a + "\" ", self.areas))

        cursor.execute("SELECT min(lat) as south, max(lat) as north, min(lon) as west, max(lon) as east "
                       "from devices where " + where_areas)

        row = cursor.fetchone()

        sqlx = 'select DeviceID, ArrivalTime, DepartureTime, duration*60 ' \
               'from events join durations on durations.sign=events.sign ' \
               'where ' + where_areas + ' and "Vehicle Present"="True" order by ArrivalTime'
        self.all_events = pd.read_sql(sqlx, conn, coerce_float=True, params=None)
        self.events = None
        self.event_idx = None

        dx = max(0.01, row[1] - row[0]) * delta_degree
        dy = max(0.01, row[3] - row[2]) * delta_degree
        self.north = row[1] + dx
        self.south = row[0] - dx
        self.west = row[2] - dy
        self.east = row[3] + dy

        if project_dir is None:
            project_dir = os.getcwd()
        data_dir = os.path.join(project_dir, "data")
        area_hash = hashlib.md5(json.dumps(sorted(area)).encode("utf8")).hexdigest()

        graph_file = 'graph_' + area_hash + '.gml'
        try:
            self.graph = ox.load_graphml(graph_file, folder=data_dir)
        except FileNotFoundError:
            self.graph = ox.graph_from_bbox(
                south=self.south, north=self.north, west=self.west, east=self.east, network_type='walk')
            # ox.plot_graph(self.graph)
            ox.save_graphml(self.graph, filename=graph_file, folder=data_dir)
        print("graph nodes: ", len(self.graph.nodes), ", edges: ", len(self.graph.edges))
        self.actions = tuple(self.graph.edges)
        if allow_wait:
            self.actions = self.actions + ('wait', )
        self.edge_indices = {edge: i for i, edge in enumerate(self.actions)}
        self.edge_lengths = nx.get_edge_attributes(self.graph, 'length')

        self.edge_resources = dict()
        self.resource_edges = dict()
        self.devices = dict()
        self.device_ordering = []
        self.spot_plots = dict()

        devices_file = os.path.join(data_dir, "devices_" + area_hash + ".pckl")
        try:
            with open(devices_file, "rb") as f:
                self.devices, self.device_ordering, self.edge_resources, self.resource_edges = pickle.load(f)
        except:
            print("cannot load devices file", devices_file)
            fig, ax = ox.plot_graph(self.graph, show=False, close=False)
            res = cursor.execute("SELECT lat, lon, DeviceID " +
                                 "from devices " +
                                 "where " + where_areas)
            for lat, lon, device_id, *vio_times in tqdm(res.fetchall()):
                # g = ox.truncate_graph_bbox(self.graph, lat+dy, lat-dy, lon+dx, lon-dx, True, True)
                g = ox.truncate_graph_dist(self.graph, ox.get_nearest_node(self.graph, (lat, lon)), 500, retain_all=True)
                nearest = ox.get_nearest_edge(g, (lat, lon))
                edge = (nearest[1], nearest[2])
                self.device_ordering.append(device_id)
                self.devices[device_id] = (lat, lon, edge)
                if edge in self.edge_resources:
                    edge_resources = self.edge_resources[edge]
                else:
                    edge_resources = []
                    self.edge_resources[edge] = edge_resources
                edge_resources.append(device_id)
                self.resource_edges[device_id] = edge
                ax.plot(*nearest[0].xy, color='blue')
                ax.scatter(lon, lat, s=2, c="red")

            with open(devices_file, "wb") as f:
                pickle.dump((self.devices, self.device_ordering, self.edge_resources, self.resource_edges), f)
            plt.show()

        print(len(self.devices), "parking devices")

        mdp_info = MDPInfo(Discrete(2), Discrete(len(self.actions)), gamma, np.inf)
        super().__init__(mdp_info)

        cursor.close()
        conn.close()
Ejemplo n.º 22
0
    def __init__(self,
                 files,
                 actuation_spec,
                 observation_spec,
                 gamma,
                 horizon,
                 timestep=1 / 240,
                 n_intermediate_steps=1,
                 debug_gui=False,
                 **viewer_params):
        """
        Constructor.

        Args:
            files (list): Paths to the URDF files to load;
            actuation_spec (list): A list of tuples specifying the names of the
                joints which should be controllable by the agent and tehir control mode.
                 Can be left empty when all actuators should be used in position control;
            observation_spec (list): A list containing the names of data that
                should be made available to the agent as an observation and
                their type (ObservationType). An entry in the list is given by:
                (name, type);
            gamma (float): The discounting factor of the environment;
            horizon (int): The maximum horizon for the environment;
            timestep (float, 0.00416666666): The timestep used by the PyBullet
                simulator;
            n_intermediate_steps (int): The number of steps between every action
                taken by the agent. Allows the user to modify, control and
                access intermediate states;
            **viewer_params: other parameters to be passed to the viewer.
                See PyBulletViewer documentation for the available options.

        """

        # Store simulation parameters
        self._timestep = timestep
        self._n_intermediate_steps = n_intermediate_steps

        # Create the simulation and viewer
        if debug_gui:
            self._client = BulletClient(connection_mode=pybullet.GUI)
        else:
            self._client = BulletClient(connection_mode=pybullet.DIRECT)
        self._client.setTimeStep(self._timestep)
        self._client.setGravity(0, 0, -9.81)
        self._client.setAdditionalSearchPath(pybullet_data.getDataPath())

        self._viewer = PyBulletViewer(
            self._client, self._timestep * self._n_intermediate_steps,
            **viewer_params)
        self._state = None

        # Load model and create access maps
        self._model_map = dict()
        for file_name, kwargs in files.items():
            model_id = self._client.loadURDF(file_name, **kwargs)
            model_name = self._client.getBodyInfo(model_id)[1].decode('UTF-8')
            self._model_map[model_name] = model_id
        self._model_map.update(self._custom_load_models())

        self._joint_map = dict()
        self._link_map = dict()
        for model_id in self._model_map.values():
            for joint_id in range(self._client.getNumJoints(model_id)):
                joint_data = self._client.getJointInfo(model_id, joint_id)
                if joint_data[2] != pybullet.JOINT_FIXED:
                    joint_name = joint_data[1].decode('UTF-8')
                    self._joint_map[joint_name] = (model_id, joint_id)
                link_name = joint_data[12].decode('UTF-8')
                self._link_map[link_name] = (model_id, joint_id)

        # Read the actuation spec and build the mapping between actions and ids
        # as well as their limits
        assert (len(actuation_spec) > 0)
        self._action_data = list()
        for name, mode in actuation_spec:
            if name in self._joint_map:
                data = self._joint_map[name] + (mode, )
                self._action_data.append(data)

        low, high = self._compute_action_limits()
        action_space = Box(np.array(low), np.array(high))

        # Read the observation spec to build a mapping at every step. It is
        # ensured that the values appear in the order they are specified.
        if len(observation_spec) == 0:
            raise AttributeError(
                "No Environment observations were specified. "
                "Add at least one observation to the observation_spec.")

        self._observation_map = observation_spec
        self._observation_indices_map = dict()

        # We can only specify limits for the joint positions, all other
        # information can be potentially unbounded
        low, high = self._compute_observation_limits()
        observation_space = Box(low, high)

        # Finally, we create the MDP information and call the constructor of
        # the parent class
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)
        super().__init__(mdp_info)

        # Save initial state of the MDP
        self._initial_state = self._client.saveState()
Ejemplo n.º 23
0
    def __init__(self, physicsClientId=1, render=True):
        print('=====init======')
        self.showSimulation = False
        self.ll, self.ul, self.jr, self.rs = [], [], [], []
        self._states = [0, 1, 2, 3]
        self.current_t = 0
        self.state = 1
        self._state = np.array([0])
        self.waittimer = 10
        self.boxId = None
        self.robotId = None
        self.tableId = None
        self.planeId = None
        self.rob_start_pos = [-0.5, 0, 1.]
        self.rob_start_orn = p.getQuaternionFromEuler([m.pi, 0, 0])
        self.obj_start_pos = [-0.5, 0, 0.73175]
        self.obj_top = [-0.5, 0, 0.8384935]
        self.obj_start_orn = p.getQuaternionFromEuler([0, 0, m.pi / 2])
        self.end_eff_idx = 6
        self.robot_left_finger_idx = 7
        self.robot_right_finger_idx = 8
        self._use_IK = 0
        self._control_eu_or_quat = 0
        self.joint_action_space = 7
        self._joint_name_to_ids = {}
        self._renders = render
        self.urdfroot = currentdir
        self._physics_client_id = physicsClientId
        self._p = p
        self.first_ep = True
        self.grasp_attemp_pos = []
        #print("urdrfoot:",self.urdfroot)
        #self.seed()

        # TODO
        gamma = 1.
        horizon = 300
        high = np.array([-0.25, 0.1, 0.91])
        low = np.array([-0.75, -0.1, 0.91])
        self._max_u = 0.37
        self._min_u = 0.30
        #observation_space = np.array([0, 0, 0])
        #action_space = self.sample_action()

        observation_space = spaces.Box(low=low, high=high)
        action_space = spaces.Box(low=np.array([-self._max_u]),
                                  high=np.array([self._min_u]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)

        if self._renders:
            self._physics_client_id = p.connect(p.SHARED_MEMORY)
            if self.showSimulation:
                if self._physics_client_id < 0:
                    self._physics_client_id = p.connect(p.GUI)
                else:
                    p.connect(p.DIRECT)
            else:
                p.connect(p.DIRECT)

        # TODO
        # self.planeId = p.loadURDF(pybullet_data.getDataPath() + "/plane.urdf")
        # #
        # # # Place table
        # self.tableId = p.loadURDF(pybullet_data.getDataPath() + '/table/table.urdf')
        # #
        # # # Place robot
        # #self.robotId = p.loadURDF(self.urdfroot + "/robot/panda_visual_arm_grasper.urdf",
        #                           #self.rob_start_pos, self.rob_start_orn)
        # self.robotId = p.loadURDF(currentdir + "/environment-master/panda_robot_grasper/panda_visual_arm_grasper.udrf",
        #                            self.rob_start_pos, self.rob_start_orn)
        # #
        # # # Place ycb object
        # self.boxId = p.loadURDF(self.urdfroot + '/003_cracker_box/model_normalized.urdf', self.obj_start_pos, self.obj_start_orn)

        #p.resetDebugVisualizerCamera(1.5, 245, -56, [-0.5, 0, 0.61])

        # Add user debug parameters, comment back for debug slider use
        """self.control_prismatic_joint1 = p.addUserDebugParameter('control_prismatic_joint1', -.5, .5, 0)  # 0