def __init__(self, **kwargs): # Define environment properties high_x = np.array([5.0, 5.0, np.pi]) low_x = -high_x high_u = np.array([1.0, 3.0]) low_u = -high_u observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) gamma = 0.9 horizon = 400 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) hz = 10.0 super(TurtlebotGazebo, self).__init__('turtlebot_gazebo', mdp_info, hz, **kwargs) # subscribe to /cmd_vel topic to publish the setpoint self._pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1) # subscribe to /gazebo/model_states to get the position of the turtlebot model_state_service_name = '/gazebo/get_model_state' rospy.wait_for_service(model_state_service_name) self._model_state_service = rospy.ServiceProxy(model_state_service_name, GetModelState)
def __init__(self, A, B, Q, R, max_pos=np.inf, max_action=np.inf, random_init=False, episodic=False, gamma=0.9, horizon=50, initial_state=None): """ Constructor. Args: A (np.ndarray): the state dynamics matrix; B (np.ndarray): the action dynamics matrix; Q (np.ndarray): reward weight matrix for state; R (np.ndarray): reward weight matrix for action; max_pos (float, np.inf): maximum value of the state; max_action (float, np.inf): maximum value of the action; random_init (bool, False): start from a random state; episodic (bool, False): end the episode when the state goes over the threshold; gamma (float, 0.9): discount factor; horizon (int, 50): horizon of the mdp. """ self.A = A self.B = B self.Q = Q self.R = R self._max_pos = max_pos self._max_action = max_action self._episodic = episodic self.random_init = random_init self._initial_state = initial_state # MDP properties high_x = self._max_pos * np.ones(A.shape[0]) low_x = -high_x high_u = self._max_action * np.ones(B.shape[1]) low_u = -high_u observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, horizon=100, gamma=.95): """ Constructor. Args: horizon (int, 100): horizon of the problem; gamma (float, .95): discount factor. """ # MDP parameters self.max_pos = 1. self.max_velocity = 3. high = np.array([self.max_pos, self.max_velocity]) self._g = 9.81 self._m = 1. self._dt = .1 self._discrete_actions = [-4., 4.] # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(2) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(1, 1) super().__init__(mdp_info)
def __init__(self, items, gamma, horizon, trans_model_abs_path, item_dist=None): # MDP parameters # 1) discrete actions: list of item names or representing integers # 2) actions on n-dimensional space: list of a pair of min and max values per action self.items = items self.action_dim = len(self.items) if item_dist is None: if len(self.items.shape) == 1: if 'none' in self.items: self.item_dist = np.zeros(self.action_dim) self.item_dist[1:] = 1/(self.action_dim-1) else: self.item_dist = 1/(self.action_dim) else: self.item_dist = None else: self.item_dist = item_dist self.gamma = gamma ## discount factor self.horizon = horizon ## time limit to long self.trans_model = ModelMaker(FlexibleTorchModel, model_dir_path=trans_model_abs_path) self.trans_model_params = self.trans_model.model.state_dict() tmp = list(self.trans_model_params.keys()) key = list(filter(lambda x: '0.weight' in x, tmp))[0] self.state_dim = self.trans_model_params[key].shape[1] - self.action_dim if 'none' in self.items: self.state_dim += 1 MM_VAL = 100 self.min_point = np.ones(self.state_dim) * -MM_VAL self.max_point = np.ones(self.state_dim) * MM_VAL if len(self.items.shape) == 1: self._discrete_actions = list(range(self.action_dim)) else: self._discrete_actions = None # MDP properties observation_space = spaces.Box(low=self.min_point, high=self.max_point) if len(self.items.shape) == 1: action_space = spaces.Discrete(self.action_dim) else: action_space = spaces.Box(low=self.items[0][0], high=self.items[0][1]) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, small=True, n_steps_action=3): """ Constructor. Args: small (bool, True): whether to use a small state space or not. n_steps_action (int, 3): number of integration intervals for each step of the mdp. """ # MDP parameters self.field_size = 150 if small else 1000 low = np.array([0, 0, -np.pi, -np.pi / 12.]) high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.]) self.omega_max = np.array([np.pi / 12.]) self._v = 3. self._T = 5. self._dt = .2 self._gate_s = np.empty(2) self._gate_e = np.empty(2) self._gate_s[0] = 100 if small else 350 self._gate_s[1] = 120 if small else 400 self._gate_e[0] = 120 if small else 450 self._gate_e[1] = 100 if small else 400 self._out_reward = -100 self._success_reward = 0 self._small = small self._state = None self.n_steps_action = n_steps_action # MDP properties observation_space = spaces.Box(low=low, high=high) action_space = spaces.Box(low=-self.omega_max, high=self.omega_max) horizon = 5000 gamma = .99 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(self.field_size, self.field_size, background=(66, 131, 237)) super().__init__(mdp_info)
def __init__(self, **kwargs): # Define environment properties high_x = np.array([11.088889122009277, 11.088889122009277, np.pi]) low_x = np.array([0, 0, -np.pi]) high_u = np.array([2.0, 2.0]) low_u = np.array([0., -2.0]) observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) gamma = 0.9 horizon = 100 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) hz = 10.0 self._target = np.array([5.0, 5.0]) self._current_pose = np.zeros(3) super().__init__('turtlesim_env', mdp_info, hz, **kwargs) # subscribe to /turtle1/cmd_vel topic to publish the setpoint self._cmd = rospy.Publisher('/turtle1/cmd_vel', Twist, queue_size=1) # subscribe to /turtle1/pose to get the turtle pose self._pose = rospy.Subscriber('/turtle1/pose', Pose, self._pose_callback) # connect to the teleporting service to reset environment self._reset_service_name = '/turtle1/teleport_absolute' rospy.wait_for_service(self._reset_service_name) self._reset_service = rospy.ServiceProxy(self._reset_service_name, TeleportAbsolute) # connect to the clear service to cleanup the traces self._clear_service_name = '/clear' rospy.wait_for_service(self._clear_service_name) self._clear_service = rospy.ServiceProxy(self._clear_service_name, Empty)
def __init__(self, random_start=False, m=1., l=1., g=9.8, mu=1e-2, max_u=5., horizon=5000, gamma=.99): """ Constructor. Args: random_start (bool, False): whether to start from a random position or from the horizontal one; m (float, 1.0): mass of the pendulum; l (float, 1.0): length of the pendulum; g (float, 9.8): gravity acceleration constant; mu (float, 1e-2): friction constant of the pendulum; max_u (float, 5.0): maximum allowed input torque; horizon (int, 5000): horizon of the problem; gamma (int, .99): discount factor. """ # MDP parameters self._m = m self._l = l self._g = g self._mu = mu self._random = random_start self._dt = .01 self._max_u = max_u self._max_omega = 5 / 2 * np.pi high = np.array([np.pi, self._max_omega]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Box(low=np.array([-max_u]), high=np.array([max_u])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(2.5 * l, 2.5 * l) self._last_u = None super().__init__(mdp_info)
def __init__(self, random_start=False): """ Constructor. Args: random_start (bool, False): whether to start from a random position or from the horizontal one. """ # MDP parameters gamma = 0.97 self._Mr = 0.3 * 2 self._Mp = 2.55 self._Ip = 2.6e-2 self._Ir = 4.54e-4 * 2 self._l = 13.8e-2 self._r = 5.5e-2 self._dt = 1e-2 self._g = 9.81 self._max_u = 5 self._random = random_start high = np.array([-np.pi / 2, 15, 75]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Box(low=np.array([-self._max_u]), high=np.array([self._max_u])) horizon = 300 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(5 * self._l, 5 * self._l) self._last_x = 0 super().__init__(mdp_info)
def __init__(self, m=2., M=8., l=.5, g=9.8, mu=1e-2, max_u=50., noise_u=10., horizon=3000, gamma=.95): """ Constructor. Args: m (float, 2.0): mass of the pendulum; M (float, 8.0): mass of the cart; l (float, .5): length of the pendulum; g (float, 9.8): gravity acceleration constant; max_u (float, 50.): maximum allowed input torque; noise_u (float, 10.): maximum noise on the action; horizon (int, 3000): horizon of the problem; gamma (float, .95): discount factor. """ # MDP parameters self._m = m self._M = M self._l = l self._g = g self._alpha = 1 / (self._m + self._M) self._mu = mu self._dt = .1 self._max_u = max_u self._noise_u = noise_u high = np.array([np.inf, np.inf]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(3) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(2.5 * l, 2.5 * l) self._last_u = None self._state = None super().__init__(mdp_info)
def __init__(self, m, g, a, horizon=100, gamma=.95): """ Constructor. """ # MDP parameters self.max_pos = 1. self.max_velocity = 3. high = np.array([self.max_pos, self.max_velocity]) self._g = g self._m = m self._dt = .1 self._discrete_actions = [-a, a] # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(2) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, gamma=0.99, horizon=2000, n_intermediate_steps=10, use_muscles=True, goal_reward=None, goal_reward_params=None, obs_avg_window=1, act_avg_window=1): """ Constructor. Args: gamma (float, 0.99): discount factor for the environment; horizon (int, 2000): horizon for the environment; n_intermediate_steps (int, 10): number of steps to apply the same action to the environment and wait for the next observation; use_muscles (bool): if external muscle simulation should be used for actions. If not apply torques directly to the joints; goal_reward (string, None): type of trajectory used for training Options available: 'trajectory' - Use trajectory in assets/GaitTrajectory.npz as reference; 'vel_profile' - Velocity goal for the center of mass of the model to follow. The goal is given by a VelocityProfile instance (or subclass). And should be included in the ``goal_reward_params``; 'max_vel' - Tries to achieve the maximum possible velocity; None - Follows no goal(just tries to survive); goal_reward_params (dict, None): params needed for creation goal reward; obs_avg_window (int, 1): size of window used to average observations; act_avg_window (int, 1): size of window used to average actions. """ self.use_muscles = use_muscles self.goal_reward = goal_reward self.act_avg_window = act_avg_window self.obs_avg_window = obs_avg_window model_path = Path(__file__).resolve().parent.parent / "data" / "humanoid_gait" / "human7segment.xml" action_spec = ["right_hip_frontal", "right_hip_sagittal", "right_knee", "right_ankle", "left_hip_frontal", "left_hip_sagittal", "left_knee", "left_ankle", ] observation_spec = [("root", ObservationType.JOINT_POS), ("right_hip_frontal", ObservationType.JOINT_POS), ("right_hip_sagittal", ObservationType.JOINT_POS), ("right_knee", ObservationType.JOINT_POS), ("right_ankle", ObservationType.JOINT_POS), ("left_hip_frontal", ObservationType.JOINT_POS), ("left_hip_sagittal", ObservationType.JOINT_POS), ("left_knee", ObservationType.JOINT_POS), ("left_ankle", ObservationType.JOINT_POS), ("root", ObservationType.JOINT_VEL), ("right_hip_frontal", ObservationType.JOINT_VEL), ("right_hip_sagittal", ObservationType.JOINT_VEL), ("right_knee", ObservationType.JOINT_VEL), ("right_ankle", ObservationType.JOINT_VEL), ("left_hip_frontal", ObservationType.JOINT_VEL), ("left_hip_sagittal", ObservationType.JOINT_VEL), ("left_knee", ObservationType.JOINT_VEL), ("left_ankle", ObservationType.JOINT_VEL), ] additional_data_spec = [] collision_groups = [("floor", ["floor"]), ("left_foot", ["left_foot"]), ("right_foot", ["right_foot"]) ] super().__init__(model_path.as_posix(), action_spec, observation_spec, gamma=gamma, horizon=horizon, n_substeps=1, n_intermediate_steps=n_intermediate_steps, additional_data_spec=additional_data_spec, collision_groups=collision_groups) if use_muscles: self.external_actuator = MuscleSimulation(self.sim) self.info.action_space = spaces.Box( *self.external_actuator.get_action_space()) else: self.external_actuator = NoExternalSimulation() low, high = self.info.action_space.low.copy(),\ self.info.action_space.high.copy() self.norm_act_mean = (high + low) / 2.0 self.norm_act_delta = (high - low) / 2.0 self.info.action_space.low[:] = -1.0 self.info.action_space.high[:] = 1.0 if goal_reward_params is None: goal_reward_params = dict() if goal_reward == "trajectory": control_dt = self.sim.model.opt.timestep * self.n_intermediate_steps self.goal_reward = CompleteTrajectoryReward(self.sim, control_dt, **goal_reward_params) elif goal_reward == "vel_profile": self.goal_reward = VelocityProfileReward(self.sim, **goal_reward_params) elif goal_reward == "max_vel": self.goal_reward = MaxVelocityReward(self.sim, **goal_reward_params) elif goal_reward is None: self.goal_reward = NoGoalReward() else: raise NotImplementedError("The specified goal reward has not been" "implemented: ", goal_reward) if isinstance(self.goal_reward, HumanoidTrajectory): self.reward_weights = dict(live_reward=0.10, goal_reward=0.40, traj_vel_reward=0.50, move_cost=0.10, fall_cost=0.00) else: self.reward_weights = dict(live_reward=0.10, goal_reward=0.90, traj_vel_reward=0.00, move_cost=0.10, fall_cost=0.00) self.info.observation_space = spaces.Box(*self._get_observation_space()) self.mean_grf = RunningAveragedWindow(shape=(6,), window_size=n_intermediate_steps) self.mean_vel = RunningExpWeightedAverage(shape=(3,), alpha=0.005) self.mean_obs = RunningAveragedWindow( shape=self.info.observation_space.shape, window_size=obs_avg_window ) self.mean_act = RunningAveragedWindow( shape=self.info.action_space.shape, window_size=act_avg_window)
def __init__(self, physicsClientId=1, render=True): print('=====init======') self.showSimulation = False self.ll, self.ul, self.jr, self.rs = [], [], [], [] self._states = [0, 1, 2, 3] self.current_t = 0 self.state = 1 self._state = np.array([0]) self.waittimer = 10 self.boxId = None self.robotId = None self.tableId = None self.planeId = None self.rob_start_pos = [-0.5, 0, 1.] self.rob_start_orn = p.getQuaternionFromEuler([m.pi, 0, 0]) self.obj_start_pos = [-0.5, 0, 0.73175] self.obj_top = [-0.5, 0, 0.8384935] self.obj_start_orn = p.getQuaternionFromEuler([0, 0, m.pi / 2]) self.end_eff_idx = 6 self.robot_left_finger_idx = 7 self.robot_right_finger_idx = 8 self._use_IK = 0 self._control_eu_or_quat = 0 self.joint_action_space = 7 self._joint_name_to_ids = {} self._renders = render self.urdfroot = currentdir self._physics_client_id = physicsClientId self._p = p self.first_ep = True self.grasp_attemp_pos = [] #print("urdrfoot:",self.urdfroot) #self.seed() # TODO gamma = 1. horizon = 300 high = np.array([-0.25, 0.1, 0.91]) low = np.array([-0.75, -0.1, 0.91]) self._max_u = 0.37 self._min_u = 0.30 #observation_space = np.array([0, 0, 0]) #action_space = self.sample_action() observation_space = spaces.Box(low=low, high=high) action_space = spaces.Box(low=np.array([-self._max_u]), high=np.array([self._min_u])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info) if self._renders: self._physics_client_id = p.connect(p.SHARED_MEMORY) if self.showSimulation: if self._physics_client_id < 0: self._physics_client_id = p.connect(p.GUI) else: p.connect(p.DIRECT) else: p.connect(p.DIRECT) # TODO # self.planeId = p.loadURDF(pybullet_data.getDataPath() + "/plane.urdf") # # # # # Place table # self.tableId = p.loadURDF(pybullet_data.getDataPath() + '/table/table.urdf') # # # # # Place robot # #self.robotId = p.loadURDF(self.urdfroot + "/robot/panda_visual_arm_grasper.urdf", # #self.rob_start_pos, self.rob_start_orn) # self.robotId = p.loadURDF(currentdir + "/environment-master/panda_robot_grasper/panda_visual_arm_grasper.udrf", # self.rob_start_pos, self.rob_start_orn) # # # # # Place ycb object # self.boxId = p.loadURDF(self.urdfroot + '/003_cracker_box/model_normalized.urdf', self.obj_start_pos, self.obj_start_orn) #p.resetDebugVisualizerCamera(1.5, 245, -56, [-0.5, 0, 0.61]) # Add user debug parameters, comment back for debug slider use """self.control_prismatic_joint1 = p.addUserDebugParameter('control_prismatic_joint1', -.5, .5, 0) # 0