def __init__(self, x_threshold=5, theta_threshold=0.314, noise=0.001): self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "cartpole.world")) self._agent = self._world.get_agent() logger.info("joint names: %s" % self._agent.get_joint_names()) self._x_threshold = x_threshold self._theta_threshold = theta_threshold high = np.array([ self._x_threshold * 2, np.finfo(np.float32).max, self._theta_threshold * 2, np.finfo(np.float32).max ]) self.noise = noise self.action_space = spaces.Box(-1, 1, shape=(1, ), dtype='float32') self.observation_space = spaces.Box(-high, high, dtype=np.float32) self._world.info()
def __init__(self, with_language=True, port=None): if port is None: port = 0 gazebo.initialize(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "pioneer2dx_camera.world")) self._agent = self._world.get_agent() assert self._agent is not None logger.info("joint names: %s" % self._agent.get_joint_names()) self._joint_names = self._agent.get_joint_names() self._teacher = teacher.Teacher(False) task_group = teacher.TaskGroup() task_group.add_task(GoalTask()) self._teacher.add_task_group(task_group) self._with_language = with_language # get observation dimension image = self._agent.get_camera_observation("camera") image = np.array(image, copy=False) if with_language: self._observation_space = gym.spaces.Dict( image=gym.spaces.Box(low=0, high=1, shape=image.shape, dtype=np.uint8), sentence=DiscreteSequence(256, 20)) self._action_space = gym.spaces.Dict( control=gym.spaces.Box(low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32), sentence=DiscreteSequence(256, 20)) else: self._observation_space = gym.spaces.Box(low=0, high=1, shape=image.shape, dtype=np.uint8) self._action_space = gym.spaces.Box(low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32)
def __init__(self, goal_name='beer', max_steps=100, reward_shaping=True, motion_loss=0.0000, use_internal_states_only=True, port=None): """ Args: goal_name (string): name of the object to lift off ground max_steps (int): episode will end when the agent exceeds the number of steps. reward_shaping (boolean): whether it adds distance based reward shaping. motion_loss (float): if not zero, it will add -motion_loss * || V_{all_joints} ||^2 in the reward when episode ends. use_internal_states_only (boolean): whether to only use internal states (joint positions and velocities) and goal positions, and not use camera sensors. port: Gazebo port, need to specify when run multiple environment in parallel """ if port is None: port = 0 gazebo.initialize(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "pr2.world")) self._agent = self._world.get_agent() #logger.info("joint names: %s" % self._agent.get_joint_names()) self._all_joints = self._agent.get_joint_names() # to avoid different parallel simulation has the same randomness random.seed(port) #self._world.info() # passive joints are joints that could move but have no actuators, are only indirectly # controled by the motion of active joints. # Though in the simulation, we could "control" through API, we chose to be more realistic. # from https://github.com/PR2/pr2_mechanism/blob/melodic-devel/pr2_mechanism_model/pr2.urdf passive_joints = set(["r_gripper_l_finger_joint", "r_gripper_r_finger_joint", "r_gripper_r_finger_tip_joint", "r_gripper_l_finger_tip_joint", "r_gripper_r_parallel_root_joint", "r_gripper_l_parallel_root_joint"]) # PR2 right arm has 7 DOF, gripper has 1 DoF, as specified on p18/p26 on PR2 manual # https://www.clearpathrobotics.com/wp-content/uploads/2014/08/pr2_manual_r321.pdf # we exclude rest of the joints unused_joints = set([ "r_gripper_motor_slider_joint", "r_gripper_motor_screw_joint", "r_gripper_r_screw_screw_joint", "r_gripper_l_screw_screw_joint", "r_gripper_r_parallel_tip_joint", "r_gripper_l_parallel_tip_joint"]) unused_joints = passive_joints.union(unused_joints) self._r_arm_joints = list( filter(lambda s: s.find('pr2::r_') != -1 and s.split("::")[-1] not in unused_joints, self._all_joints)) logger.info("joints in the right arm to control: %s" % self._r_arm_joints) joint_states = list(map(lambda s: self._agent.get_joint_state(s), self._r_arm_joints)) self._r_arm_joints_limits = list(map(lambda s: s.get_effort_limits()[0], joint_states)); logger.info('\n'.join(map(lambda s: str(s[0]) + ":" + str(s[1]), zip( self._r_arm_joints, self._r_arm_joints_limits)))) self._goal_name = goal_name self._goal = self._world.get_agent(self._goal_name) self._max_steps = max_steps self._steps_in_this_episode = 0 self._reward_shaping = reward_shaping self._resized_image_size = (128, 128) #(84, 84) self._use_internal_states_only = use_internal_states_only self._cum_reward = 0.0 self._motion_loss = motion_loss # a hack to work around gripper not open initially, might not need now. self._gripper_reward_dir = 1 self._gripper_upper_limit = 0.07 self._gripper_lower_limit = 0.01 # whether to move head cameras and gripper when episode starts. self._adjust_position_at_start = False obs = self._get_observation() self._prev_dist = self._get_finger_tip_distance() self._prev_gripper_pos = self._get_gripper_pos() if self._use_internal_states_only: self.observation_space = gym.spaces.Box( low=-np.inf, high=np.inf, shape=obs.shape, dtype=np.float32) else: self.observation_space = gym.spaces.Box( low=0, high=256.0, shape=obs.shape, dtype=np.float32) self.action_space = gym.spaces.Box( low=-1, high=1, shape=[len(self._r_arm_joints)], dtype=np.float32)
def __init__(self, with_language=False, image_with_internal_states=False, port=None, resized_image_size=None, data_format='channels_last'): """Create SimpleNavigation environment. Args: with_language (bool): whether to generate language for observation image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with the image. port (int): TCP/IP port for the simulation server resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) data_format (str): one of `channels_last` or `channels_first`. The ordering of the dimensions in the images. `channels_last` corresponds to images with shape `(height, width, channels)` while `channels_first` corresponds to images with shape `(channels, height, width)`. """ super(SimpleNavigation, self).__init__(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "pioneer2dx_camera.world")) self._agent = self._world.get_agent() self._rendering_cam_pose = "4 -4 3 0 0.4 2.3" assert self._agent is not None logging.debug("joint names: %s" % self._agent.get_joint_names()) self._all_joints = self._agent.get_joint_names() self._joint_names = list( filter(lambda s: s.find('wheel') != -1, self._all_joints)) self._teacher = teacher.Teacher(task_groups_exclusive=False) task_group = teacher.TaskGroup() task_group.add_task(GoalTask()) self._teacher.add_task_group(task_group) self._seq_length = 20 self._sentence_space = DiscreteSequence(self._teacher.vocab_size, self._seq_length) self._with_language = with_language self._image_with_internal_states = image_with_internal_states self._resized_image_size = resized_image_size assert data_format in ('channels_first', 'channels_last') self._data_format = data_format time.sleep(0.1) # Allow Gazebo threads to be fully ready self.reset() # Get observation dimension obs_sample = self._get_observation('hello') if self._with_language or self._image_with_internal_states: self._observation_space = self._construct_dict_space( obs_sample, self._teacher.vocab_size) else: self._observation_space = gym.spaces.Box( low=0, high=255, shape=obs_sample.shape, dtype=np.uint8) control_space = gym.spaces.Box( low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32) if with_language: self._action_space = gym.spaces.Dict( control=control_space, sentence=self._sentence_space) else: self._action_space = control_space
def __init__(self, use_pid=False, obs_stack=True, sub_steps=50, port=None): """ Args: use_pid (bool): use pid or direct force to control port: Gazebo port, need to specify when run multiple environment in parallel obs_stack (bool): Use staked multi step observation if True sub_steps (int): take how many simulator substeps during one gym step """ super(ICubWalk, self).__init__(port=port) self._sub_steps = sub_steps self._obs_stack = obs_stack self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "icub.world")) self._agent = self._world.get_agent('icub') logging.debug(self._world.info()) self._agent_joints = [ 'icub::iCub::l_leg::l_hip_pitch', 'icub::iCub::l_leg::l_hip_roll', 'icub::iCub::l_leg::l_hip_yaw', 'icub::iCub::l_leg::l_knee', 'icub::iCub::l_leg::l_ankle_pitch', 'icub::iCub::l_leg::l_ankle_roll', 'icub::iCub::r_leg::r_hip_pitch', 'icub::iCub::r_leg::r_hip_roll', 'icub::iCub::r_leg::r_hip_yaw', 'icub::iCub::r_leg::r_knee', 'icub::iCub::r_leg::r_ankle_pitch', 'icub::iCub::r_leg::r_ankle_roll', 'icub::iCub::torso_yaw', 'icub::iCub::torso_roll', 'icub::iCub::torso_pitch', 'icub::iCub::l_shoulder_pitch', 'icub::iCub::l_shoulder_roll', 'icub::iCub::l_shoulder_yaw', 'icub::iCub::l_elbow', 'icub::iCub::neck_pitch', 'icub::iCub::neck_roll', 'icub::iCub::r_shoulder_pitch', 'icub::iCub::r_shoulder_roll', 'icub::iCub::r_shoulder_yaw', 'icub::iCub::r_elbow', ] joint_states = list( map(lambda s: self._agent.get_joint_state(s), self._agent_joints)) self._joints_limits = list( map(lambda s: s.get_effort_limits()[0], joint_states)) if use_pid: for joint_index in range(len(self._agent_joints)): self._agent.set_pid_controller( self._agent_joints[joint_index], 'velocity', p=0.02, d=0.00001, max_force=self._joints_limits[joint_index]) self._agent_control_range = 3.0 * np.pi else: self._agent_control_range = np.array(self._joints_limits) logging.info("joints to control: %s" % self._agent_joints) self.action_space = gym.spaces.Box( low=-1.0, high=1.0, shape=[len(self._agent_joints)], dtype=np.float32) self.reset() obs = self._get_observation() if self._obs_stack: obs = np.concatenate((obs, obs), axis=0) self.observation_space = gym.spaces.Box( low=-100, high=100, shape=obs.shape, dtype=np.float32)
# Copyright (c) 2019 Horizon Robotics. All Rights Reserved. import random import social_bot.pygazebo as bot import numpy as np import matplotlib.pyplot as plt import time import os enable_camera = True show_image = enable_camera & False bot.initialize() if enable_camera: world = bot.new_world_from_file( "../python/social_bot/worlds/turtlebot_camera.world") else: world = bot.new_world_from_file( "../python/social_bot/worlds/turtlebot.world") world.info() agent = world.get_agent() print(agent.get_joint_names()) steps = 0 t0 = time.time() interval = 100 fig = None for i in range(10000000): steps += 1 agent.take_action({
def __init__(self, with_language=False, use_image_obs=False, agent_type='pioneer2dx_noplugin', goal_name='table', max_steps=160, port=None): """ Args: with_language (bool): the observation will be a dict with an extra sentence use_image_obs (bool): use image, or use internal states as observation poses in internal states observation are in world coordinate agent_type (string): select the agent robot, supporting pr2_differential, pioneer2dx_noplugin, turtlebot, and irobot create for now port: Gazebo port, need to specify when run multiple environment in parallel """ if port is None: port = 0 gazebo.initialize(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "grocery_ground.world")) self._object_types = [ 'coke_can', 'cube_20k', 'car_wheel', 'plastic_cup', 'beer', 'hammer' ] self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5))) self._pos_list.remove((0, 0)) self._world.info() self._world.insertModelFile('model://' + agent_type) self._world.step(20) time.sleep(0.1) # Avoid 'px!=0' error self._random_insert_objects() self._world.model_list_info() self._world.info() # Specify joints and sensors for the robots control_joints = { 'pr2_differential': [ 'pr2_differential::fl_caster_r_wheel_joint', 'pr2_differential::fr_caster_l_wheel_joint' ], 'pioneer2dx_noplugin': [ 'pioneer2dx_noplugin::left_wheel_hinge', 'pioneer2dx_noplugin::right_wheel_hinge' ], 'turtlebot': [ 'turtlebot::create::left_wheel', 'turtlebot::create::right_wheel' ], 'create': ['create::left_wheel', 'create::right_wheel'], } control_force = { 'pr2_differential': 20, 'pioneer2dx_noplugin': 0.5, 'turtlebot': 0.5, 'create': 0.5, } camera_sensor = { 'pr2_differential': 'default::pr2_differential::head_tilt_link::head_mount_prosilica_link_sensor', 'pioneer2dx_noplugin': ' ', 'turtlebot': 'default::turtlebot::kinect::link::camera', 'create': ' ', } self._agent = self._world.get_agent(agent_type) self._agent_joints = control_joints[agent_type] self._agent_control_force = control_force[agent_type] self._agent_camera = camera_sensor[agent_type] self._goal_name = goal_name self._goal = self._world.get_model(goal_name) logger.info("joints to control: %s" % self._agent_joints) self._teacher = teacher.Teacher(False) task_group = teacher.TaskGroup() self._teacher_task = GroceryGroundGoalTask(max_steps=max_steps, goal_name=self._goal_name, success_distance_thresh=0.5, fail_distance_thresh=3.0, reward_shaping=True, random_range=10.0) task_group.add_task(self._teacher_task) self._teacher.add_task_group(task_group) self._with_language = with_language self._use_image_obs = use_image_obs obs = self.reset() if self._use_image_obs: obs_data_space = gym.spaces.Box(low=0, high=255, shape=obs.shape, dtype=np.uint8) else: obs_data_space = gym.spaces.Box(low=-50, high=50, shape=obs.shape, dtype=np.float32) control_space = gym.spaces.Box(low=-self._agent_control_force, high=self._agent_control_force, shape=[len(self._agent_joints)], dtype=np.float32) if self._with_language: self.observation_space = gym.spaces.Dict(data=obs_data_space, sentence=DiscreteSequence( 128, 24)) self._action_space = gym.spaces.Dict(control=control_space, sentence=DiscreteSequence( 128, 24)) else: self.observation_space = obs_data_space self.action_space = control_space