Example #1
0
    def __init__(self, **kwargs):

        # Define environment properties
        high_x = np.array([5.0, 5.0, np.pi])
        low_x = -high_x

        high_u = np.array([1.0, 3.0])
        low_u = -high_u

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)

        gamma = 0.9
        horizon = 400

        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        hz = 10.0

        super(TurtlebotGazebo, self).__init__('turtlebot_gazebo', mdp_info, hz, **kwargs)

        # subscribe to /cmd_vel topic to publish the setpoint
        self._pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1)

        # subscribe to /gazebo/model_states to get the position of the turtlebot
        model_state_service_name = '/gazebo/get_model_state'
        rospy.wait_for_service(model_state_service_name)
        self._model_state_service = rospy.ServiceProxy(model_state_service_name, GetModelState)
Example #2
0
    def __init__(self,
                 A,
                 B,
                 Q,
                 R,
                 max_pos=np.inf,
                 max_action=np.inf,
                 random_init=False,
                 episodic=False,
                 gamma=0.9,
                 horizon=50,
                 initial_state=None):
        """
        Constructor.

            Args:
                A (np.ndarray): the state dynamics matrix;
                B (np.ndarray): the action dynamics matrix;
                Q (np.ndarray): reward weight matrix for state;
                R (np.ndarray): reward weight matrix for action;
                max_pos (float, np.inf): maximum value of the state;
                max_action (float, np.inf): maximum value of the action;
                random_init (bool, False): start from a random state;
                episodic (bool, False): end the episode when the state goes over
                the threshold;
                gamma (float, 0.9): discount factor;
                horizon (int, 50): horizon of the mdp.

        """
        self.A = A
        self.B = B
        self.Q = Q
        self.R = R
        self._max_pos = max_pos
        self._max_action = max_action
        self._episodic = episodic
        self.random_init = random_init

        self._initial_state = initial_state

        # MDP properties
        high_x = self._max_pos * np.ones(A.shape[0])
        low_x = -high_x

        high_u = self._max_action * np.ones(B.shape[1])
        low_u = -high_u

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
Example #3
0
    def __init__(self, horizon=100, gamma=.95):
        """
        Constructor.

        Args:
            horizon (int, 100): horizon of the problem;
            gamma (float, .95): discount factor.

        """
        # MDP parameters
        self.max_pos = 1.
        self.max_velocity = 3.
        high = np.array([self.max_pos, self.max_velocity])
        self._g = 9.81
        self._m = 1.
        self._dt = .1
        self._discrete_actions = [-4., 4.]

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(2)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(1, 1)

        super().__init__(mdp_info)
Example #4
0
    def __init__(self, items, gamma, horizon, trans_model_abs_path, item_dist=None):
        # MDP parameters

        # 1) discrete actions: list of item names or representing integers
        # 2) actions on n-dimensional space: list of a pair of min and max values per action
        self.items = items
        self.action_dim = len(self.items)
        if item_dist is None:
            if len(self.items.shape) == 1:
                if 'none' in self.items:
                    self.item_dist = np.zeros(self.action_dim)
                    self.item_dist[1:] = 1/(self.action_dim-1)
                else:
                    self.item_dist = 1/(self.action_dim)
            else:
                self.item_dist = None
        else:
            self.item_dist = item_dist
        self.gamma = gamma    ## discount factor
        self.horizon = horizon    ## time limit to long
        self.trans_model = ModelMaker(FlexibleTorchModel, model_dir_path=trans_model_abs_path)
        self.trans_model_params = self.trans_model.model.state_dict()
        tmp = list(self.trans_model_params.keys())
        key = list(filter(lambda x: '0.weight' in x, tmp))[0]
        self.state_dim = self.trans_model_params[key].shape[1] - self.action_dim
        if 'none' in self.items:
            self.state_dim += 1

        MM_VAL = 100
        self.min_point = np.ones(self.state_dim) * -MM_VAL
        self.max_point = np.ones(self.state_dim) * MM_VAL
        
        if len(self.items.shape) == 1:
            self._discrete_actions = list(range(self.action_dim))
        else:
            self._discrete_actions = None

        # MDP properties
        observation_space = spaces.Box(low=self.min_point, high=self.max_point)
        if len(self.items.shape) == 1:
            action_space = spaces.Discrete(self.action_dim)
        else:
            action_space = spaces.Box(low=self.items[0][0], high=self.items[0][1])
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
Example #5
0
    def __init__(self, small=True, n_steps_action=3):
        """
        Constructor.

        Args:
             small (bool, True): whether to use a small state space or not.
             n_steps_action (int, 3): number of integration intervals for each
                                      step of the mdp.

        """
        # MDP parameters
        self.field_size = 150 if small else 1000
        low = np.array([0, 0, -np.pi, -np.pi / 12.])
        high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.])
        self.omega_max = np.array([np.pi / 12.])
        self._v = 3.
        self._T = 5.
        self._dt = .2
        self._gate_s = np.empty(2)
        self._gate_e = np.empty(2)
        self._gate_s[0] = 100 if small else 350
        self._gate_s[1] = 120 if small else 400
        self._gate_e[0] = 120 if small else 450
        self._gate_e[1] = 100 if small else 400
        self._out_reward = -100
        self._success_reward = 0
        self._small = small
        self._state = None
        self.n_steps_action = n_steps_action

        # MDP properties
        observation_space = spaces.Box(low=low, high=high)
        action_space = spaces.Box(low=-self.omega_max, high=self.omega_max)
        horizon = 5000
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(self.field_size,
                              self.field_size,
                              background=(66, 131, 237))

        super().__init__(mdp_info)
    def __init__(self, **kwargs):

        # Define environment properties
        high_x = np.array([11.088889122009277, 11.088889122009277, np.pi])
        low_x = np.array([0, 0, -np.pi])

        high_u = np.array([2.0, 2.0])
        low_u = np.array([0., -2.0])

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)

        gamma = 0.9
        horizon = 100

        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        hz = 10.0

        self._target = np.array([5.0, 5.0])
        self._current_pose = np.zeros(3)

        super().__init__('turtlesim_env', mdp_info, hz, **kwargs)

        # subscribe to /turtle1/cmd_vel topic to publish the setpoint
        self._cmd = rospy.Publisher('/turtle1/cmd_vel', Twist, queue_size=1)

        # subscribe to /turtle1/pose to get the turtle pose
        self._pose = rospy.Subscriber('/turtle1/pose', Pose,
                                      self._pose_callback)

        # connect to the teleporting service to reset environment
        self._reset_service_name = '/turtle1/teleport_absolute'
        rospy.wait_for_service(self._reset_service_name)
        self._reset_service = rospy.ServiceProxy(self._reset_service_name,
                                                 TeleportAbsolute)

        # connect to the clear service to cleanup the traces
        self._clear_service_name = '/clear'
        rospy.wait_for_service(self._clear_service_name)
        self._clear_service = rospy.ServiceProxy(self._clear_service_name,
                                                 Empty)
    def __init__(self, random_start=False, m=1., l=1., g=9.8, mu=1e-2,
                 max_u=5., horizon=5000, gamma=.99):
        """
        Constructor.

        Args:
            random_start (bool, False): whether to start from a random position
                or from the horizontal one;
            m (float, 1.0): mass of the pendulum;
            l (float, 1.0): length of the pendulum;
            g (float, 9.8): gravity acceleration constant;
            mu (float, 1e-2): friction constant of the pendulum;
            max_u (float, 5.0): maximum allowed input torque;
            horizon (int, 5000): horizon of the problem;
            gamma (int, .99): discount factor.

        """
        # MDP parameters
        self._m = m
        self._l = l
        self._g = g
        self._mu = mu
        self._random = random_start
        self._dt = .01
        self._max_u = max_u
        self._max_omega = 5 / 2 * np.pi
        high = np.array([np.pi, self._max_omega])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Box(low=np.array([-max_u]),
                                  high=np.array([max_u]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(2.5 * l, 2.5 * l)
        self._last_u = None

        super().__init__(mdp_info)
Example #8
0
    def __init__(self, random_start=False):
        """
        Constructor.

        Args:
            random_start (bool, False): whether to start from a random position
                or from the horizontal one.

        """
        # MDP parameters
        gamma = 0.97

        self._Mr = 0.3 * 2
        self._Mp = 2.55
        self._Ip = 2.6e-2
        self._Ir = 4.54e-4 * 2
        self._l = 13.8e-2
        self._r = 5.5e-2
        self._dt = 1e-2
        self._g = 9.81
        self._max_u = 5

        self._random = random_start

        high = np.array([-np.pi / 2, 15, 75])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Box(low=np.array([-self._max_u]),
                                  high=np.array([self._max_u]))
        horizon = 300
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(5 * self._l, 5 * self._l)
        self._last_x = 0

        super().__init__(mdp_info)
Example #9
0
    def __init__(self,
                 m=2.,
                 M=8.,
                 l=.5,
                 g=9.8,
                 mu=1e-2,
                 max_u=50.,
                 noise_u=10.,
                 horizon=3000,
                 gamma=.95):
        """
        Constructor.

        Args:
            m (float, 2.0): mass of the pendulum;
            M (float, 8.0): mass of the cart;
            l (float, .5): length of the pendulum;
            g (float, 9.8): gravity acceleration constant;
            max_u (float, 50.): maximum allowed input torque;
            noise_u (float, 10.): maximum noise on the action;
            horizon (int, 3000): horizon of the problem;
            gamma (float, .95): discount factor.

        """
        # MDP parameters
        self._m = m
        self._M = M
        self._l = l
        self._g = g
        self._alpha = 1 / (self._m + self._M)
        self._mu = mu
        self._dt = .1
        self._max_u = max_u
        self._noise_u = noise_u
        high = np.array([np.inf, np.inf])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(3)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(2.5 * l, 2.5 * l)
        self._last_u = None
        self._state = None

        super().__init__(mdp_info)
Example #10
0
    def __init__(self, m, g, a, horizon=100, gamma=.95):
        """
        Constructor.

        """
        # MDP parameters
        self.max_pos = 1.
        self.max_velocity = 3.
        high = np.array([self.max_pos, self.max_velocity])
        self._g = g
        self._m = m
        self._dt = .1
        self._discrete_actions = [-a, a]

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(2)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
Example #11
0
    def __init__(self, gamma=0.99, horizon=2000, n_intermediate_steps=10,
                 use_muscles=True, goal_reward=None, goal_reward_params=None,
                 obs_avg_window=1, act_avg_window=1):
        """
        Constructor.

        Args:
            gamma (float, 0.99): discount factor for the environment;
            horizon (int, 2000): horizon for the environment;
            n_intermediate_steps (int, 10): number of steps to apply the same
                action to the environment and wait for the next observation;
            use_muscles (bool): if external muscle simulation should be used
                for actions. If not apply torques directly to the joints;
            goal_reward (string, None): type of trajectory used for training
                Options available:
                    'trajectory'  - Use trajectory in assets/GaitTrajectory.npz
                                    as reference;
                    'vel_profile' - Velocity goal for the center of mass of the
                                    model to follow. The goal is given by a
                                    VelocityProfile instance (or subclass).
                                    And should be included in the
                                    ``goal_reward_params``;
                    'max_vel'     - Tries to achieve the maximum possible
                                    velocity;
                    None          - Follows no goal(just tries to survive);
            goal_reward_params (dict, None): params needed for creation goal
                reward;
            obs_avg_window (int, 1): size of window used to average
                observations;
            act_avg_window (int, 1): size of window used to average actions.

        """
        self.use_muscles = use_muscles
        self.goal_reward = goal_reward
        self.act_avg_window = act_avg_window
        self.obs_avg_window = obs_avg_window

        model_path = Path(__file__).resolve().parent.parent / "data" / "humanoid_gait" / "human7segment.xml"

        action_spec = ["right_hip_frontal", "right_hip_sagittal",
                       "right_knee", "right_ankle", "left_hip_frontal",
                       "left_hip_sagittal", "left_knee", "left_ankle",
                       ]

        observation_spec = [("root", ObservationType.JOINT_POS),
                            ("right_hip_frontal", ObservationType.JOINT_POS),
                            ("right_hip_sagittal", ObservationType.JOINT_POS),
                            ("right_knee", ObservationType.JOINT_POS),
                            ("right_ankle", ObservationType.JOINT_POS),
                            ("left_hip_frontal", ObservationType.JOINT_POS),
                            ("left_hip_sagittal", ObservationType.JOINT_POS),
                            ("left_knee", ObservationType.JOINT_POS),
                            ("left_ankle", ObservationType.JOINT_POS),

                            ("root", ObservationType.JOINT_VEL),
                            ("right_hip_frontal", ObservationType.JOINT_VEL),
                            ("right_hip_sagittal", ObservationType.JOINT_VEL),
                            ("right_knee", ObservationType.JOINT_VEL),
                            ("right_ankle", ObservationType.JOINT_VEL),
                            ("left_hip_frontal", ObservationType.JOINT_VEL),
                            ("left_hip_sagittal", ObservationType.JOINT_VEL),
                            ("left_knee", ObservationType.JOINT_VEL),
                            ("left_ankle", ObservationType.JOINT_VEL),
                            ]

        additional_data_spec = []

        collision_groups = [("floor", ["floor"]),
                            ("left_foot", ["left_foot"]),
                            ("right_foot", ["right_foot"])
                            ]

        super().__init__(model_path.as_posix(), action_spec, observation_spec, gamma=gamma,
                         horizon=horizon, n_substeps=1,
                         n_intermediate_steps=n_intermediate_steps,
                         additional_data_spec=additional_data_spec,
                         collision_groups=collision_groups)

        if use_muscles:
            self.external_actuator = MuscleSimulation(self.sim)
            self.info.action_space = spaces.Box(
                *self.external_actuator.get_action_space())
        else:
            self.external_actuator = NoExternalSimulation()

        low, high = self.info.action_space.low.copy(),\
                    self.info.action_space.high.copy()
        self.norm_act_mean = (high + low) / 2.0
        self.norm_act_delta = (high - low) / 2.0
        self.info.action_space.low[:] = -1.0
        self.info.action_space.high[:] = 1.0

        if goal_reward_params is None:
            goal_reward_params = dict()

        if goal_reward == "trajectory":
            control_dt = self.sim.model.opt.timestep * self.n_intermediate_steps
            self.goal_reward = CompleteTrajectoryReward(self.sim, control_dt,
                                                        **goal_reward_params)
        elif goal_reward == "vel_profile":
            self.goal_reward = VelocityProfileReward(self.sim, **goal_reward_params)
        elif goal_reward == "max_vel":
            self.goal_reward = MaxVelocityReward(self.sim, **goal_reward_params)
        elif goal_reward is None:
            self.goal_reward = NoGoalReward()
        else:
            raise NotImplementedError("The specified goal reward has not been"
                                      "implemented: ", goal_reward)

        if isinstance(self.goal_reward, HumanoidTrajectory):
            self.reward_weights = dict(live_reward=0.10, goal_reward=0.40,
                                       traj_vel_reward=0.50,
                                       move_cost=0.10, fall_cost=0.00)
        else:
            self.reward_weights = dict(live_reward=0.10, goal_reward=0.90,
                                       traj_vel_reward=0.00,
                                       move_cost=0.10, fall_cost=0.00)

        self.info.observation_space = spaces.Box(*self._get_observation_space())

        self.mean_grf = RunningAveragedWindow(shape=(6,),
                                              window_size=n_intermediate_steps)
        self.mean_vel = RunningExpWeightedAverage(shape=(3,), alpha=0.005)
        self.mean_obs = RunningAveragedWindow(
            shape=self.info.observation_space.shape,
            window_size=obs_avg_window
        )
        self.mean_act = RunningAveragedWindow(
            shape=self.info.action_space.shape, window_size=act_avg_window)
Example #12
0
    def __init__(self, physicsClientId=1, render=True):
        print('=====init======')
        self.showSimulation = False
        self.ll, self.ul, self.jr, self.rs = [], [], [], []
        self._states = [0, 1, 2, 3]
        self.current_t = 0
        self.state = 1
        self._state = np.array([0])
        self.waittimer = 10
        self.boxId = None
        self.robotId = None
        self.tableId = None
        self.planeId = None
        self.rob_start_pos = [-0.5, 0, 1.]
        self.rob_start_orn = p.getQuaternionFromEuler([m.pi, 0, 0])
        self.obj_start_pos = [-0.5, 0, 0.73175]
        self.obj_top = [-0.5, 0, 0.8384935]
        self.obj_start_orn = p.getQuaternionFromEuler([0, 0, m.pi / 2])
        self.end_eff_idx = 6
        self.robot_left_finger_idx = 7
        self.robot_right_finger_idx = 8
        self._use_IK = 0
        self._control_eu_or_quat = 0
        self.joint_action_space = 7
        self._joint_name_to_ids = {}
        self._renders = render
        self.urdfroot = currentdir
        self._physics_client_id = physicsClientId
        self._p = p
        self.first_ep = True
        self.grasp_attemp_pos = []
        #print("urdrfoot:",self.urdfroot)
        #self.seed()

        # TODO
        gamma = 1.
        horizon = 300
        high = np.array([-0.25, 0.1, 0.91])
        low = np.array([-0.75, -0.1, 0.91])
        self._max_u = 0.37
        self._min_u = 0.30
        #observation_space = np.array([0, 0, 0])
        #action_space = self.sample_action()

        observation_space = spaces.Box(low=low, high=high)
        action_space = spaces.Box(low=np.array([-self._max_u]),
                                  high=np.array([self._min_u]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)

        if self._renders:
            self._physics_client_id = p.connect(p.SHARED_MEMORY)
            if self.showSimulation:
                if self._physics_client_id < 0:
                    self._physics_client_id = p.connect(p.GUI)
                else:
                    p.connect(p.DIRECT)
            else:
                p.connect(p.DIRECT)

        # TODO
        # self.planeId = p.loadURDF(pybullet_data.getDataPath() + "/plane.urdf")
        # #
        # # # Place table
        # self.tableId = p.loadURDF(pybullet_data.getDataPath() + '/table/table.urdf')
        # #
        # # # Place robot
        # #self.robotId = p.loadURDF(self.urdfroot + "/robot/panda_visual_arm_grasper.urdf",
        #                           #self.rob_start_pos, self.rob_start_orn)
        # self.robotId = p.loadURDF(currentdir + "/environment-master/panda_robot_grasper/panda_visual_arm_grasper.udrf",
        #                            self.rob_start_pos, self.rob_start_orn)
        # #
        # # # Place ycb object
        # self.boxId = p.loadURDF(self.urdfroot + '/003_cracker_box/model_normalized.urdf', self.obj_start_pos, self.obj_start_orn)

        #p.resetDebugVisualizerCamera(1.5, 245, -56, [-0.5, 0, 0.61])

        # Add user debug parameters, comment back for debug slider use
        """self.control_prismatic_joint1 = p.addUserDebugParameter('control_prismatic_joint1', -.5, .5, 0)  # 0