def __init__(self, world_file=None, world_string=None, world_config=None, port=None, quiet=False): """ Args: world_file (str|None): world file path world_string (str|None): world xml string content, world_config (list[str]): list of str config `key=value` see `_xpath_modify_xml` for details port (int): Gazebo port quiet (bool) Set quiet output """ if port is None: port = 0 self._port = port # to avoid different parallel simulation has the same randomness random.seed(port) self._rendering_process = None self._rendering_camera = None # the default camera pose for rendering rgb_array, could be override self._rendering_cam_pose = "10 -10 6 0 0.4 2.4" gazebo.initialize(port=port, quiet=quiet) if world_file: world_file_abs_path = os.path.join(social_bot.get_world_dir(), world_file) world_string = gazebo.world_sdf(world_file_abs_path) if world_config: world_string = _xpath_modify_xml(world_string, world_config) self._world = gazebo.new_world_from_string(world_string)
def __init__(self, x_threshold=5, theta_threshold=0.314, noise=0.001): self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "cartpole.world")) self._agent = self._world.get_agent() logger.info("joint names: %s" % self._agent.get_joint_names()) self._x_threshold = x_threshold self._theta_threshold = theta_threshold high = np.array([ self._x_threshold * 2, np.finfo(np.float32).max, self._theta_threshold * 2, np.finfo(np.float32).max ]) self.noise = noise self.action_space = spaces.Box(-1, 1, shape=(1, ), dtype='float32') self.observation_space = spaces.Box(-high, high, dtype=np.float32) self._world.info()
def __init__(self, with_language=True, port=None): if port is None: port = 0 gazebo.initialize(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "pioneer2dx_camera.world")) self._agent = self._world.get_agent() assert self._agent is not None logger.info("joint names: %s" % self._agent.get_joint_names()) self._joint_names = self._agent.get_joint_names() self._teacher = teacher.Teacher(False) task_group = teacher.TaskGroup() task_group.add_task(GoalTask()) self._teacher.add_task_group(task_group) self._with_language = with_language # get observation dimension image = self._agent.get_camera_observation("camera") image = np.array(image, copy=False) if with_language: self._observation_space = gym.spaces.Dict( image=gym.spaces.Box(low=0, high=1, shape=image.shape, dtype=np.uint8), sentence=DiscreteSequence(256, 20)) self._action_space = gym.spaces.Dict( control=gym.spaces.Box(low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32), sentence=DiscreteSequence(256, 20)) else: self._observation_space = gym.spaces.Box(low=0, high=1, shape=image.shape, dtype=np.uint8) self._action_space = gym.spaces.Box(low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32)
def __init__(self, world_file=None, world_string=None, world_config=None, sim_time_precision=0.001, port=None, quiet=False): """ Args: world_file (str|None): world file path world_string (str|None): world xml string content, world_config (list[str]): list of str config `key=value` see `_modify_world_xml` for details sim_time_precision (float): the time precision of the simulator port (int): Gazebo port quiet (bool) Set quiet output """ os.environ["GAZEBO_MODEL_DATABASE_URI"] = "" if port is None: port = 0 self._port = port # This avoids different parallel simulations having the same randomness. # When calling from alf, alf.environments.utils.create_environment calls # env.seed() afterwards, which is the real seed being used. Not this one. random.seed(port) self._rendering_process = None self._rendering_camera = None # the default camera pose for rendering rgb_array, could be override self._rendering_cam_pose = "10 -10 6 0 0.4 2.4" gazebo.initialize(port=port, quiet=quiet) if world_file: world_file_abs_path = os.path.join(social_bot.get_world_dir(), world_file) world_string = gazebo.world_sdf(world_file_abs_path) if world_config: world_string = _modify_world_xml(world_string, world_config) self._world = gazebo.new_world_from_string(world_string)
def __init__(self, with_language=False, use_image_observation=False, image_with_internal_states=False, task_name='goal', agent_type='pioneer2dx_noplugin', world_time_precision=None, step_time=0.1, port=None, action_cost=0.0, resized_image_size=(64, 64), image_data_format='channels_last', vocab_sequence_length=20): """ Args: with_language (bool): The observation will be a dict with an extra sentence use_image_observation (bool): Use image, or use low-dimentional states as observation. Poses in the states observation are in world coordinate image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with image. Only affect if use_image_observation is true task_name (string): the teacher task, now there are 2 tasks, a simple goal task: 'goal' a simple kicking ball task: 'kickball' agent_type (string): Select the agent robot, supporting pr2_noplugin, pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now note that 'agent_type' should be the same str as the model's name world_time_precision (float|None): if not none, the time precision of simulator, i.e., the max_step_size defined in the agent cfg file, will be override. e.g., '0.002' for a 2ms sim step step_time (float): the peroid of one step of the environment. step_time / world_time_precision is how many simulator substeps during one environment step. for some complex agent, i.e., icub, using step_time of 0.05 is better port: Gazebo port, need to specify when run multiple environment in parallel action_cost (float): Add an extra action cost to reward, which helps to train an energy/forces efficency policy or reduce unnecessary movements resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) image_data_format (str): One of `channels_last` or `channels_first`. The ordering of the dimensions in the images. `channels_last` corresponds to images with shape `(height, width, channels)` while `channels_first` corresponds to images with shape `(channels, height, width)`. """ agent_cfgs = json.load( open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r')) agent_cfg = agent_cfgs[agent_type] wf_path = os.path.join(social_bot.get_world_dir(), "grocery_ground.world") with open(wf_path, 'r+') as world_file: world_string = self._insert_agent_to_world_file( world_file, agent_type) if world_time_precision is None: world_time_precision = agent_cfg['max_sim_step_time'] sub_steps = int(round(step_time / world_time_precision)) sim_time_cfg = [ "//physics//max_step_size=" + str(world_time_precision) ] super(GroceryGround, self).__init__(world_string=world_string, world_config=sim_time_cfg, port=port) self._teacher = teacher.Teacher(task_groups_exclusive=False) if task_name is None or task_name == 'goal': main_task = GroceryGroundGoalTask() elif task_name == 'kickball': main_task = GroceryGroundKickBallTask(step_time=step_time) else: logging.debug("upsupported task name: " + task_name) main_task_group = TaskGroup() main_task_group.add_task(main_task) self._teacher.add_task_group(main_task_group) if agent_type.find('icub') != -1: icub_aux_task_group = TaskGroup() icub_standing_task = ICubAuxiliaryTask(step_time=step_time) icub_aux_task_group.add_task(icub_standing_task) self._teacher.add_task_group(icub_aux_task_group) self._teacher._build_vocab_from_tasks() self._seq_length = vocab_sequence_length if self._teacher.vocab_size: # using MultiDiscrete instead of DiscreteSequence so gym # _spec_from_gym_space won't complain. self._sentence_space = gym.spaces.MultiDiscrete( [self._teacher.vocab_size] * self._seq_length) self._sub_steps = sub_steps self._world.step(20) self._agent = self._world.get_agent() for task_group in self._teacher.get_task_groups(): for task in task_group.get_tasks(): task.setup(self._world, agent_type) logging.debug(self._world.info()) self._agent_joints = agent_cfg['control_joints'] joint_states = list( map(lambda s: self._agent.get_joint_state(s), self._agent_joints)) self._joints_limits = list( map(lambda s: s.get_effort_limits()[0], joint_states)) if agent_cfg['use_pid']: for joint_index in range(len(self._agent_joints)): self._agent.set_pid_controller( self._agent_joints[joint_index], 'velocity', p=0.02, d=0.00001, max_force=self._joints_limits[joint_index]) self._agent_control_range = agent_cfg['pid_control_limit'] else: self._agent_control_range = np.array(self._joints_limits) self._agent_camera = agent_cfg['camera_sensor'] logging.debug("joints to control: %s" % self._agent_joints) self._action_cost = action_cost self._with_language = with_language self._use_image_obs = use_image_observation self._image_with_internal_states = self._use_image_obs and image_with_internal_states assert image_data_format in ('channels_first', 'channels_last') self._data_format = image_data_format self._resized_image_size = resized_image_size self._substep_time = world_time_precision self._control_space = gym.spaces.Box(low=-1.0, high=1.0, shape=[len(self._agent_joints)], dtype=np.float32) if self._with_language: self.action_space = gym.spaces.Dict(control=self._control_space, sentence=self._sentence_space) else: self.action_space = self._control_space self.reset() obs_sample = self._get_observation_with_sentence("hello") if self._with_language or self._image_with_internal_states: self.observation_space = self._construct_dict_space( obs_sample, self._teacher.vocab_size) elif self._use_image_obs: self.observation_space = gym.spaces.Box(low=0, high=255, shape=obs_sample.shape, dtype=np.uint8) else: self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=obs_sample.shape, dtype=np.float32)
def __init__(self, goal_name='beer', max_steps=100, reward_shaping=True, motion_loss=0.0000, use_internal_states_only=True, port=None): """ Args: goal_name (string): name of the object to lift off ground max_steps (int): episode will end when the agent exceeds the number of steps. reward_shaping (boolean): whether it adds distance based reward shaping. motion_loss (float): if not zero, it will add -motion_loss * || V_{all_joints} ||^2 in the reward when episode ends. use_internal_states_only (boolean): whether to only use internal states (joint positions and velocities) and goal positions, and not use camera sensors. port: Gazebo port, need to specify when run multiple environment in parallel """ if port is None: port = 0 gazebo.initialize(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "pr2.world")) self._agent = self._world.get_agent() #logger.info("joint names: %s" % self._agent.get_joint_names()) self._all_joints = self._agent.get_joint_names() # to avoid different parallel simulation has the same randomness random.seed(port) #self._world.info() # passive joints are joints that could move but have no actuators, are only indirectly # controled by the motion of active joints. # Though in the simulation, we could "control" through API, we chose to be more realistic. # from https://github.com/PR2/pr2_mechanism/blob/melodic-devel/pr2_mechanism_model/pr2.urdf passive_joints = set(["r_gripper_l_finger_joint", "r_gripper_r_finger_joint", "r_gripper_r_finger_tip_joint", "r_gripper_l_finger_tip_joint", "r_gripper_r_parallel_root_joint", "r_gripper_l_parallel_root_joint"]) # PR2 right arm has 7 DOF, gripper has 1 DoF, as specified on p18/p26 on PR2 manual # https://www.clearpathrobotics.com/wp-content/uploads/2014/08/pr2_manual_r321.pdf # we exclude rest of the joints unused_joints = set([ "r_gripper_motor_slider_joint", "r_gripper_motor_screw_joint", "r_gripper_r_screw_screw_joint", "r_gripper_l_screw_screw_joint", "r_gripper_r_parallel_tip_joint", "r_gripper_l_parallel_tip_joint"]) unused_joints = passive_joints.union(unused_joints) self._r_arm_joints = list( filter(lambda s: s.find('pr2::r_') != -1 and s.split("::")[-1] not in unused_joints, self._all_joints)) logger.info("joints in the right arm to control: %s" % self._r_arm_joints) joint_states = list(map(lambda s: self._agent.get_joint_state(s), self._r_arm_joints)) self._r_arm_joints_limits = list(map(lambda s: s.get_effort_limits()[0], joint_states)); logger.info('\n'.join(map(lambda s: str(s[0]) + ":" + str(s[1]), zip( self._r_arm_joints, self._r_arm_joints_limits)))) self._goal_name = goal_name self._goal = self._world.get_agent(self._goal_name) self._max_steps = max_steps self._steps_in_this_episode = 0 self._reward_shaping = reward_shaping self._resized_image_size = (128, 128) #(84, 84) self._use_internal_states_only = use_internal_states_only self._cum_reward = 0.0 self._motion_loss = motion_loss # a hack to work around gripper not open initially, might not need now. self._gripper_reward_dir = 1 self._gripper_upper_limit = 0.07 self._gripper_lower_limit = 0.01 # whether to move head cameras and gripper when episode starts. self._adjust_position_at_start = False obs = self._get_observation() self._prev_dist = self._get_finger_tip_distance() self._prev_gripper_pos = self._get_gripper_pos() if self._use_internal_states_only: self.observation_space = gym.spaces.Box( low=-np.inf, high=np.inf, shape=obs.shape, dtype=np.float32) else: self.observation_space = gym.spaces.Box( low=0, high=256.0, shape=obs.shape, dtype=np.float32) self.action_space = gym.spaces.Box( low=-1, high=1, shape=[len(self._r_arm_joints)], dtype=np.float32)
def __init__(self, with_language=False, image_with_internal_states=False, port=None, resized_image_size=None, data_format='channels_last'): """Create SimpleNavigation environment. Args: with_language (bool): whether to generate language for observation image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with the image. port (int): TCP/IP port for the simulation server resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) data_format (str): one of `channels_last` or `channels_first`. The ordering of the dimensions in the images. `channels_last` corresponds to images with shape `(height, width, channels)` while `channels_first` corresponds to images with shape `(channels, height, width)`. """ super(SimpleNavigation, self).__init__(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "pioneer2dx_camera.world")) self._agent = self._world.get_agent() self._rendering_cam_pose = "4 -4 3 0 0.4 2.3" assert self._agent is not None logging.debug("joint names: %s" % self._agent.get_joint_names()) self._all_joints = self._agent.get_joint_names() self._joint_names = list( filter(lambda s: s.find('wheel') != -1, self._all_joints)) self._teacher = teacher.Teacher(task_groups_exclusive=False) task_group = teacher.TaskGroup() task_group.add_task(GoalTask()) self._teacher.add_task_group(task_group) self._seq_length = 20 self._sentence_space = DiscreteSequence(self._teacher.vocab_size, self._seq_length) self._with_language = with_language self._image_with_internal_states = image_with_internal_states self._resized_image_size = resized_image_size assert data_format in ('channels_first', 'channels_last') self._data_format = data_format time.sleep(0.1) # Allow Gazebo threads to be fully ready self.reset() # Get observation dimension obs_sample = self._get_observation('hello') if self._with_language or self._image_with_internal_states: self._observation_space = self._construct_dict_space( obs_sample, self._teacher.vocab_size) else: self._observation_space = gym.spaces.Box( low=0, high=255, shape=obs_sample.shape, dtype=np.uint8) control_space = gym.spaces.Box( low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32) if with_language: self._action_space = gym.spaces.Dict( control=control_space, sentence=self._sentence_space) else: self._action_space = control_space
def __init__(self, use_pid=False, obs_stack=True, sub_steps=50, port=None): """ Args: use_pid (bool): use pid or direct force to control port: Gazebo port, need to specify when run multiple environment in parallel obs_stack (bool): Use staked multi step observation if True sub_steps (int): take how many simulator substeps during one gym step """ super(ICubWalk, self).__init__(port=port) self._sub_steps = sub_steps self._obs_stack = obs_stack self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "icub.world")) self._agent = self._world.get_agent('icub') logging.debug(self._world.info()) self._agent_joints = [ 'icub::iCub::l_leg::l_hip_pitch', 'icub::iCub::l_leg::l_hip_roll', 'icub::iCub::l_leg::l_hip_yaw', 'icub::iCub::l_leg::l_knee', 'icub::iCub::l_leg::l_ankle_pitch', 'icub::iCub::l_leg::l_ankle_roll', 'icub::iCub::r_leg::r_hip_pitch', 'icub::iCub::r_leg::r_hip_roll', 'icub::iCub::r_leg::r_hip_yaw', 'icub::iCub::r_leg::r_knee', 'icub::iCub::r_leg::r_ankle_pitch', 'icub::iCub::r_leg::r_ankle_roll', 'icub::iCub::torso_yaw', 'icub::iCub::torso_roll', 'icub::iCub::torso_pitch', 'icub::iCub::l_shoulder_pitch', 'icub::iCub::l_shoulder_roll', 'icub::iCub::l_shoulder_yaw', 'icub::iCub::l_elbow', 'icub::iCub::neck_pitch', 'icub::iCub::neck_roll', 'icub::iCub::r_shoulder_pitch', 'icub::iCub::r_shoulder_roll', 'icub::iCub::r_shoulder_yaw', 'icub::iCub::r_elbow', ] joint_states = list( map(lambda s: self._agent.get_joint_state(s), self._agent_joints)) self._joints_limits = list( map(lambda s: s.get_effort_limits()[0], joint_states)) if use_pid: for joint_index in range(len(self._agent_joints)): self._agent.set_pid_controller( self._agent_joints[joint_index], 'velocity', p=0.02, d=0.00001, max_force=self._joints_limits[joint_index]) self._agent_control_range = 3.0 * np.pi else: self._agent_control_range = np.array(self._joints_limits) logging.info("joints to control: %s" % self._agent_joints) self.action_space = gym.spaces.Box( low=-1.0, high=1.0, shape=[len(self._agent_joints)], dtype=np.float32) self.reset() obs = self._get_observation() if self._obs_stack: obs = np.concatenate((obs, obs), axis=0) self.observation_space = gym.spaces.Box( low=-100, high=100, shape=obs.shape, dtype=np.float32)
def __init__(self, agent_type='pioneer2dx_noplugin', world_name="play_ground.world", tasks=[GoalTask], with_language=False, with_agent_language=False, use_image_observation=False, image_with_internal_states=False, max_steps=200, world_time_precision=None, step_time=0.1, real_time_update_rate=0, port=None, action_cost=0.0, resized_image_size=(64, 64), vocab_sequence_length=20, action_wrapper=None): """ Args: agent_type (string): Select the agent robot, supporting pr2_noplugin, pioneer2dx_noplugin, turtlebot, youbot_noplugin and icub_with_hands for now note that 'agent_type' should be exactly the same string as the model's name at the beginning of model's sdf file world_name (string): Select the world file, e.g., empty.world, play_ground.world, grocery_ground.world tasks (list): a list of teacher.Task, e.g., GoalTask, KickingBallTask with_language (bool): The observation will be a dict with an extra sentence with_agent_language (bool): Include agent sentence in action space. use_image_observation (bool): Use image, or use low-dimentional states as observation. Poses in the states observation are in world coordinate max_steps (int): episode will end in so many steps, will be passed to the tasks image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with image. Only affect if use_image_observation is true world_time_precision (float|None): this parameter depends on the agent. if not none, the default time precision of simulator, i.e., the max_step_size defined in the agent cfg file, will be override. Note that pr2 and iCub requires a max_step_size <= 0.001, otherwise cannot train a successful policy. step_time (float): the peroid of one step() function of the environment in simulation. step_time is rounded to multiples of world_time_precision step_time / world_time_precision is how many simulator substeps during one environment step. for the tasks need higher control frequency (such as the tasks need walking by 2 legs), using a smaller step_time like 0.05 is better. experiments show that iCub can not learn how to walk in a 0.1 step_time real_time_update_rate (int): max update rate per second. There is no limit if this is set to 0 as default in the world file. If 1:1 real time is prefered (like playing or recording video), it should be set to 1.0/world_time_precision. port: Gazebo port, need to specify when run multiple environment in parallel action_cost (float): Add an extra action cost to reward, which helps to train an energy/forces efficency policy or reduce unnecessary movements resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) vocab_sequence_length (int): the length of encoded sequence action_wrapper (None|class): Some times primitive joints is not wanted, e.g., has redundant dimensions or offset. If not None, this is used to transform the agent actions. See ActionWrapper of gazebo_agent.py for example. """ self._action_cost = action_cost self._with_language = with_language self._seq_length = vocab_sequence_length self._with_agent_language = with_language and with_agent_language self._use_image_obs = use_image_observation self._image_with_internal_states = self._use_image_obs and image_with_internal_states # Load agent and world file with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r') as cfg_file: agent_cfgs = json.load(cfg_file) agent_cfg = agent_cfgs[agent_type] wd_path = os.path.join(social_bot.get_world_dir(), world_name) with open(wd_path, 'r+') as world_file: world_string = self._insert_agent_to_world_file( world_file, agent_type) if world_time_precision is None: world_time_precision = agent_cfg['max_sim_step_time'] self._sub_steps = int(round(step_time / world_time_precision)) self._step_time = world_time_precision * self._sub_steps sim_time_cfg = [ "//physics//max_step_size=" + str(world_time_precision), "//physics//real_time_update_rate=" + str(real_time_update_rate) ] super().__init__(world_string=world_string, world_config=sim_time_cfg, port=port) logging.debug(self._world.info()) # Setup agent self._agent = GazeboAgent( world=self._world, agent_type=agent_type, config=agent_cfg, with_language=with_language, with_agent_language=with_agent_language, vocab_sequence_length=self._seq_length, use_image_observation=use_image_observation, resized_image_size=resized_image_size, image_with_internal_states=image_with_internal_states, action_wrapper=action_wrapper) # Setup teacher and tasks self._teacher = teacher.Teacher(task_groups_exclusive=False) self._has_goal_task = False for task in tasks: if task == GoalTask: self._has_goal_task = True task_group = TaskGroup() task_group.add_task(task(env=self, max_steps=max_steps)) self._teacher.add_task_group(task_group) self._teacher._build_vocab_from_tasks() # Setup action space and observation space if self._teacher.vocab_size: sentence_space = gym.spaces.MultiDiscrete( [self._teacher.vocab_size] * self._seq_length) self._agent.set_sentence_space(sentence_space) self._control_space = self._agent.get_control_space() self.action_space = self._agent.get_action_space() self.reset() self.observation_space = self._agent.get_observation_space( self._teacher)
def __init__(self, with_language=False, use_image_observation=False, image_with_internal_states=False, random_goal=False, agent_type='pioneer2dx_noplugin', max_steps=200, port=None, resized_image_size=(64, 64), data_format='channels_last'): """ Args: with_language (bool): The observation will be a dict with an extra sentence use_image_observation (bool): Use image, or use low-dimentional states as observation. Poses in the states observation are in world coordinate image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with image. Only affect if use_image_observation is true random_goal (bool): If ture, teacher will randomly select goal from the object list each episode agent_type (string): Select the agent robot, supporting pr2_noplugin, pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now note that 'agent_type' should be the same str as the model's name port: Gazebo port, need to specify when run multiple environment in parallel resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) data_format (str): One of `channels_last` or `channels_first`. The ordering of the dimensions in the images. `channels_last` corresponds to images with shape `(height, width, channels)` while `channels_first` corresponds to images with shape `(channels, height, width)`. """ super(GroceryGround, self).__init__(port=port) self._teacher = teacher.Teacher(task_groups_exclusive=False) task_group = teacher.TaskGroup() self._teacher_task = GroceryGroundGoalTask(max_steps=max_steps, success_distance_thresh=0.5, fail_distance_thresh=3.0, reward_shaping=True, random_goal=random_goal, random_range=10.0) task_group.add_task(self._teacher_task) self._teacher.add_task_group(task_group) self._teacher.build_vocab_from_tasks() self._seq_length = 20 self._sentence_space = DiscreteSequence(self._teacher.vocab_size, self._seq_length) self._object_list = self._teacher_task.get_object_list() self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5))) self._pos_list.remove((0, 0)) wf_path = os.path.join(social_bot.get_world_dir(), "grocery_ground.world") with open(wf_path, 'r+') as world_file: world_string = self._insert_agent_to_world_file( world_file, agent_type) self._world = gazebo.new_world_from_string(world_string) self._world.step(20) self._insert_objects(self._object_list) self._agent = self._world.get_agent() logging.debug(self._world.info()) agent_cfgs = json.load( open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r')) agent_cfg = agent_cfgs[agent_type] self._agent_joints = agent_cfg['control_joints'] joint_states = list( map(lambda s: self._agent.get_joint_state(s), self._agent_joints)) self._joints_limits = list( map(lambda s: s.get_effort_limits()[0], joint_states)) if agent_cfg['use_pid']: for joint_index in range(len(self._agent_joints)): self._agent.set_pid_controller( self._agent_joints[joint_index], 'velocity', p=0.02, d=0.00001, max_force=self._joints_limits[joint_index]) self._agent_control_range = agent_cfg['pid_control_limit'] else: self._agent_control_range = np.array(self._joints_limits) self._agent_camera = agent_cfg['camera_sensor'] logging.debug("joints to control: %s" % self._agent_joints) self._with_language = with_language self._use_image_obs = use_image_observation self._image_with_internal_states = self._use_image_obs and image_with_internal_states assert data_format in ('channels_first', 'channels_last') self._data_format = data_format self._resized_image_size = resized_image_size self._random_goal = random_goal self.reset() obs_sample = self._get_observation("hello") if self._with_language or self._image_with_internal_states: self.observation_space = self._construct_dict_space( obs_sample, self._teacher.vocab_size) elif self._use_image_obs: self.observation_space = gym.spaces.Box(low=0, high=255, shape=obs_sample.shape, dtype=np.uint8) else: self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=obs_sample.shape, dtype=np.float32) self._control_space = gym.spaces.Box(low=-1.0, high=1.0, shape=[len(self._agent_joints)], dtype=np.float32) if self._with_language: self.action_space = gym.spaces.Dict(control=self._control_space, sentence=self._sentence_space) else: self.action_space = self._control_space
def __init__(self, with_language=False, use_image_obs=False, agent_type='pioneer2dx_noplugin', goal_name='table', max_steps=160, port=None): """ Args: with_language (bool): the observation will be a dict with an extra sentence use_image_obs (bool): use image, or use internal states as observation poses in internal states observation are in world coordinate agent_type (string): select the agent robot, supporting pr2_differential, pioneer2dx_noplugin, turtlebot, and irobot create for now port: Gazebo port, need to specify when run multiple environment in parallel """ if port is None: port = 0 gazebo.initialize(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "grocery_ground.world")) self._object_types = [ 'coke_can', 'cube_20k', 'car_wheel', 'plastic_cup', 'beer', 'hammer' ] self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5))) self._pos_list.remove((0, 0)) self._world.info() self._world.insertModelFile('model://' + agent_type) self._world.step(20) time.sleep(0.1) # Avoid 'px!=0' error self._random_insert_objects() self._world.model_list_info() self._world.info() # Specify joints and sensors for the robots control_joints = { 'pr2_differential': [ 'pr2_differential::fl_caster_r_wheel_joint', 'pr2_differential::fr_caster_l_wheel_joint' ], 'pioneer2dx_noplugin': [ 'pioneer2dx_noplugin::left_wheel_hinge', 'pioneer2dx_noplugin::right_wheel_hinge' ], 'turtlebot': [ 'turtlebot::create::left_wheel', 'turtlebot::create::right_wheel' ], 'create': ['create::left_wheel', 'create::right_wheel'], } control_force = { 'pr2_differential': 20, 'pioneer2dx_noplugin': 0.5, 'turtlebot': 0.5, 'create': 0.5, } camera_sensor = { 'pr2_differential': 'default::pr2_differential::head_tilt_link::head_mount_prosilica_link_sensor', 'pioneer2dx_noplugin': ' ', 'turtlebot': 'default::turtlebot::kinect::link::camera', 'create': ' ', } self._agent = self._world.get_agent(agent_type) self._agent_joints = control_joints[agent_type] self._agent_control_force = control_force[agent_type] self._agent_camera = camera_sensor[agent_type] self._goal_name = goal_name self._goal = self._world.get_model(goal_name) logger.info("joints to control: %s" % self._agent_joints) self._teacher = teacher.Teacher(False) task_group = teacher.TaskGroup() self._teacher_task = GroceryGroundGoalTask(max_steps=max_steps, goal_name=self._goal_name, success_distance_thresh=0.5, fail_distance_thresh=3.0, reward_shaping=True, random_range=10.0) task_group.add_task(self._teacher_task) self._teacher.add_task_group(task_group) self._with_language = with_language self._use_image_obs = use_image_obs obs = self.reset() if self._use_image_obs: obs_data_space = gym.spaces.Box(low=0, high=255, shape=obs.shape, dtype=np.uint8) else: obs_data_space = gym.spaces.Box(low=-50, high=50, shape=obs.shape, dtype=np.float32) control_space = gym.spaces.Box(low=-self._agent_control_force, high=self._agent_control_force, shape=[len(self._agent_joints)], dtype=np.float32) if self._with_language: self.observation_space = gym.spaces.Dict(data=obs_data_space, sentence=DiscreteSequence( 128, 24)) self._action_space = gym.spaces.Dict(control=control_space, sentence=DiscreteSequence( 128, 24)) else: self.observation_space = obs_data_space self.action_space = control_space