def __init__(self, env, max_steps, target=None, agent_init_pos=(0, 0), agent_pos_random_range=0, reward_weight=1.0): """ Args: env (gym.Env): an instance of Environment max_steps (int): episode will end in so many steps reward_weight (float): the weight of the reward, should be tuned accroding to reward range of other tasks target (string): this is the target icub should face towards, since you may want the agent interact with something agent_init_pos (tuple): the expected initial position of the agent pos_random_range (float): random range of the initial position """ super().__init__(env=env, max_steps=max_steps, reward_weight=reward_weight) self.task_vocab = ['icub'] self._target_name = target self._pre_agent_pos = np.array([0, 0, 0], dtype=np.float32) self._agent_init_pos = agent_init_pos self._random_range = agent_pos_random_range if self._target_name: self._target = self._world.get_model(self._target_name) with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r') as cfg_file: agent_cfgs = json.load(cfg_file) self._joints = agent_cfgs[self._agent.type]['control_joints']
def setup(self, world, agent_name): """ Setting things up during the initialization """ super().setup(world, agent_name) agent_cfgs = json.load( open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r')) self._joints = agent_cfgs[self._agent_name]['control_joints']
def setup(self, world, agent_name): """ Setting things up during the initialization """ super().setup(world, agent_name) if self._target_name: self._target = world.get_agent(self._target_name) with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r') as cfg_file: agent_cfgs = json.load(cfg_file) self._joints = agent_cfgs[self._agent_name]['control_joints']
def test_play_ground(self): with_language = True with_agent_language = False agents = [ 'pioneer2dx_noplugin', 'pr2_noplugin', 'icub', 'icub_with_hands', 'youbot_noplugin' ] with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r') as cfg_file: agent_cfgs = json.load(cfg_file) tasks = [GoalTask, KickingBallTask, Reaching3D, PickAndPlace, Stack] for task in tasks: for agent_type in task.compatible_agents: for with_language in [True, False]: for use_image_obs in [True, False]: for image_with_internal_states in [True, False]: agent_cfg = agent_cfgs[agent_type] test_tasks = [task] if agent_type.find('icub') != -1: test_tasks.append(ICubAuxiliaryTask) if agent_cfg[ 'camera_sensor'] == '' and use_image_obs: continue logging.info("Testing Case: Agent " + agent_type + ", Task " + str(test_tasks) + ", UseImage: " + str(use_image_obs)) env = PlayGround( with_language=with_language, with_agent_language=with_agent_language, use_image_observation=use_image_obs, image_with_internal_states= image_with_internal_states, agent_type=agent_type, tasks=test_tasks) step_cnt = 0 last_done_time = time.time() while step_cnt < 100 and (time.time() - last_done_time) < 5: actions = env._control_space.sample() if with_agent_language: actions = dict(control=actions, sentence="hello") env.step(actions) step_cnt += 1 step_per_sec = step_cnt / (time.time() - last_done_time) logging.info("Test Passed, FPS: " + str(step_per_sec)) env.close() gazebo.close()
def test_grocery(self): with_language = True agents = [ 'pioneer2dx_noplugin', 'pr2_noplugin', 'icub', 'icub_with_hands', 'youbot_noplugin' ] tasks = ['goal', 'kickball'] with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r') as cfg_file: agent_cfgs = json.load(cfg_file) for agent_type in agents: for task_name in tasks: for use_image_obs in [True, False]: agent_cfg = agent_cfgs[agent_type] if agent_cfg['camera_sensor'] == '' and use_image_obs: continue logging.info("Testing Case: Agent " + agent_type + ", Task " + task_name + ", UseImage: " + str(use_image_obs)) env = GroceryGround(with_language=with_language, use_image_observation=use_image_obs, image_with_internal_states=True, agent_type=agent_type, task_name=task_name) step_cnt = 0 last_done_time = time.time() while step_cnt < 500 and (time.time() - last_done_time) < 10: actions = env._control_space.sample() if with_language: actions = dict(control=actions, sentence="hello") env.step(actions) step_cnt += 1 step_per_sec = step_cnt / (time.time() - last_done_time) logging.info("Test Passed, FPS: " + str(step_per_sec)) env.close()
def __init__(self, with_language=False, use_image_observation=False, image_with_internal_states=False, task_name='goal', agent_type='pioneer2dx_noplugin', world_time_precision=None, step_time=0.1, port=None, action_cost=0.0, resized_image_size=(64, 64), image_data_format='channels_last', vocab_sequence_length=20): """ Args: with_language (bool): The observation will be a dict with an extra sentence use_image_observation (bool): Use image, or use low-dimentional states as observation. Poses in the states observation are in world coordinate image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with image. Only affect if use_image_observation is true task_name (string): the teacher task, now there are 2 tasks, a simple goal task: 'goal' a simple kicking ball task: 'kickball' agent_type (string): Select the agent robot, supporting pr2_noplugin, pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now note that 'agent_type' should be the same str as the model's name world_time_precision (float|None): if not none, the time precision of simulator, i.e., the max_step_size defined in the agent cfg file, will be override. e.g., '0.002' for a 2ms sim step step_time (float): the peroid of one step of the environment. step_time / world_time_precision is how many simulator substeps during one environment step. for some complex agent, i.e., icub, using step_time of 0.05 is better port: Gazebo port, need to specify when run multiple environment in parallel action_cost (float): Add an extra action cost to reward, which helps to train an energy/forces efficency policy or reduce unnecessary movements resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) image_data_format (str): One of `channels_last` or `channels_first`. The ordering of the dimensions in the images. `channels_last` corresponds to images with shape `(height, width, channels)` while `channels_first` corresponds to images with shape `(channels, height, width)`. """ agent_cfgs = json.load( open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r')) agent_cfg = agent_cfgs[agent_type] wf_path = os.path.join(social_bot.get_world_dir(), "grocery_ground.world") with open(wf_path, 'r+') as world_file: world_string = self._insert_agent_to_world_file( world_file, agent_type) if world_time_precision is None: world_time_precision = agent_cfg['max_sim_step_time'] sub_steps = int(round(step_time / world_time_precision)) sim_time_cfg = [ "//physics//max_step_size=" + str(world_time_precision) ] super(GroceryGround, self).__init__(world_string=world_string, world_config=sim_time_cfg, port=port) self._teacher = teacher.Teacher(task_groups_exclusive=False) if task_name is None or task_name == 'goal': main_task = GroceryGroundGoalTask() elif task_name == 'kickball': main_task = GroceryGroundKickBallTask(step_time=step_time) else: logging.debug("upsupported task name: " + task_name) main_task_group = TaskGroup() main_task_group.add_task(main_task) self._teacher.add_task_group(main_task_group) if agent_type.find('icub') != -1: icub_aux_task_group = TaskGroup() icub_standing_task = ICubAuxiliaryTask(step_time=step_time) icub_aux_task_group.add_task(icub_standing_task) self._teacher.add_task_group(icub_aux_task_group) self._teacher._build_vocab_from_tasks() self._seq_length = vocab_sequence_length if self._teacher.vocab_size: # using MultiDiscrete instead of DiscreteSequence so gym # _spec_from_gym_space won't complain. self._sentence_space = gym.spaces.MultiDiscrete( [self._teacher.vocab_size] * self._seq_length) self._sub_steps = sub_steps self._world.step(20) self._agent = self._world.get_agent() for task_group in self._teacher.get_task_groups(): for task in task_group.get_tasks(): task.setup(self._world, agent_type) logging.debug(self._world.info()) self._agent_joints = agent_cfg['control_joints'] joint_states = list( map(lambda s: self._agent.get_joint_state(s), self._agent_joints)) self._joints_limits = list( map(lambda s: s.get_effort_limits()[0], joint_states)) if agent_cfg['use_pid']: for joint_index in range(len(self._agent_joints)): self._agent.set_pid_controller( self._agent_joints[joint_index], 'velocity', p=0.02, d=0.00001, max_force=self._joints_limits[joint_index]) self._agent_control_range = agent_cfg['pid_control_limit'] else: self._agent_control_range = np.array(self._joints_limits) self._agent_camera = agent_cfg['camera_sensor'] logging.debug("joints to control: %s" % self._agent_joints) self._action_cost = action_cost self._with_language = with_language self._use_image_obs = use_image_observation self._image_with_internal_states = self._use_image_obs and image_with_internal_states assert image_data_format in ('channels_first', 'channels_last') self._data_format = image_data_format self._resized_image_size = resized_image_size self._substep_time = world_time_precision self._control_space = gym.spaces.Box(low=-1.0, high=1.0, shape=[len(self._agent_joints)], dtype=np.float32) if self._with_language: self.action_space = gym.spaces.Dict(control=self._control_space, sentence=self._sentence_space) else: self.action_space = self._control_space self.reset() obs_sample = self._get_observation_with_sentence("hello") if self._with_language or self._image_with_internal_states: self.observation_space = self._construct_dict_space( obs_sample, self._teacher.vocab_size) elif self._use_image_obs: self.observation_space = gym.spaces.Box(low=0, high=255, shape=obs_sample.shape, dtype=np.uint8) else: self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=obs_sample.shape, dtype=np.float32)
def __init__(self, agent_type='pioneer2dx_noplugin', world_name="play_ground.world", tasks=[GoalTask], with_language=False, with_agent_language=False, use_image_observation=False, image_with_internal_states=False, max_steps=200, world_time_precision=None, step_time=0.1, real_time_update_rate=0, port=None, action_cost=0.0, resized_image_size=(64, 64), vocab_sequence_length=20, action_wrapper=None): """ Args: agent_type (string): Select the agent robot, supporting pr2_noplugin, pioneer2dx_noplugin, turtlebot, youbot_noplugin and icub_with_hands for now note that 'agent_type' should be exactly the same string as the model's name at the beginning of model's sdf file world_name (string): Select the world file, e.g., empty.world, play_ground.world, grocery_ground.world tasks (list): a list of teacher.Task, e.g., GoalTask, KickingBallTask with_language (bool): The observation will be a dict with an extra sentence with_agent_language (bool): Include agent sentence in action space. use_image_observation (bool): Use image, or use low-dimentional states as observation. Poses in the states observation are in world coordinate max_steps (int): episode will end in so many steps, will be passed to the tasks image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with image. Only affect if use_image_observation is true world_time_precision (float|None): this parameter depends on the agent. if not none, the default time precision of simulator, i.e., the max_step_size defined in the agent cfg file, will be override. Note that pr2 and iCub requires a max_step_size <= 0.001, otherwise cannot train a successful policy. step_time (float): the peroid of one step() function of the environment in simulation. step_time is rounded to multiples of world_time_precision step_time / world_time_precision is how many simulator substeps during one environment step. for the tasks need higher control frequency (such as the tasks need walking by 2 legs), using a smaller step_time like 0.05 is better. experiments show that iCub can not learn how to walk in a 0.1 step_time real_time_update_rate (int): max update rate per second. There is no limit if this is set to 0 as default in the world file. If 1:1 real time is prefered (like playing or recording video), it should be set to 1.0/world_time_precision. port: Gazebo port, need to specify when run multiple environment in parallel action_cost (float): Add an extra action cost to reward, which helps to train an energy/forces efficency policy or reduce unnecessary movements resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) vocab_sequence_length (int): the length of encoded sequence action_wrapper (None|class): Some times primitive joints is not wanted, e.g., has redundant dimensions or offset. If not None, this is used to transform the agent actions. See ActionWrapper of gazebo_agent.py for example. """ self._action_cost = action_cost self._with_language = with_language self._seq_length = vocab_sequence_length self._with_agent_language = with_language and with_agent_language self._use_image_obs = use_image_observation self._image_with_internal_states = self._use_image_obs and image_with_internal_states # Load agent and world file with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r') as cfg_file: agent_cfgs = json.load(cfg_file) agent_cfg = agent_cfgs[agent_type] wd_path = os.path.join(social_bot.get_world_dir(), world_name) with open(wd_path, 'r+') as world_file: world_string = self._insert_agent_to_world_file( world_file, agent_type) if world_time_precision is None: world_time_precision = agent_cfg['max_sim_step_time'] self._sub_steps = int(round(step_time / world_time_precision)) self._step_time = world_time_precision * self._sub_steps sim_time_cfg = [ "//physics//max_step_size=" + str(world_time_precision), "//physics//real_time_update_rate=" + str(real_time_update_rate) ] super().__init__(world_string=world_string, world_config=sim_time_cfg, port=port) logging.debug(self._world.info()) # Setup agent self._agent = GazeboAgent( world=self._world, agent_type=agent_type, config=agent_cfg, with_language=with_language, with_agent_language=with_agent_language, vocab_sequence_length=self._seq_length, use_image_observation=use_image_observation, resized_image_size=resized_image_size, image_with_internal_states=image_with_internal_states, action_wrapper=action_wrapper) # Setup teacher and tasks self._teacher = teacher.Teacher(task_groups_exclusive=False) self._has_goal_task = False for task in tasks: if task == GoalTask: self._has_goal_task = True task_group = TaskGroup() task_group.add_task(task(env=self, max_steps=max_steps)) self._teacher.add_task_group(task_group) self._teacher._build_vocab_from_tasks() # Setup action space and observation space if self._teacher.vocab_size: sentence_space = gym.spaces.MultiDiscrete( [self._teacher.vocab_size] * self._seq_length) self._agent.set_sentence_space(sentence_space) self._control_space = self._agent.get_control_space() self.action_space = self._agent.get_action_space() self.reset() self.observation_space = self._agent.get_observation_space( self._teacher)
def __init__(self, with_language=False, use_image_observation=False, image_with_internal_states=False, random_goal=False, agent_type='pioneer2dx_noplugin', max_steps=200, port=None, resized_image_size=(64, 64), data_format='channels_last'): """ Args: with_language (bool): The observation will be a dict with an extra sentence use_image_observation (bool): Use image, or use low-dimentional states as observation. Poses in the states observation are in world coordinate image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with image. Only affect if use_image_observation is true random_goal (bool): If ture, teacher will randomly select goal from the object list each episode agent_type (string): Select the agent robot, supporting pr2_noplugin, pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now note that 'agent_type' should be the same str as the model's name port: Gazebo port, need to specify when run multiple environment in parallel resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) data_format (str): One of `channels_last` or `channels_first`. The ordering of the dimensions in the images. `channels_last` corresponds to images with shape `(height, width, channels)` while `channels_first` corresponds to images with shape `(channels, height, width)`. """ super(GroceryGround, self).__init__(port=port) self._teacher = teacher.Teacher(task_groups_exclusive=False) task_group = teacher.TaskGroup() self._teacher_task = GroceryGroundGoalTask(max_steps=max_steps, success_distance_thresh=0.5, fail_distance_thresh=3.0, reward_shaping=True, random_goal=random_goal, random_range=10.0) task_group.add_task(self._teacher_task) self._teacher.add_task_group(task_group) self._teacher.build_vocab_from_tasks() self._seq_length = 20 self._sentence_space = DiscreteSequence(self._teacher.vocab_size, self._seq_length) self._object_list = self._teacher_task.get_object_list() self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5))) self._pos_list.remove((0, 0)) wf_path = os.path.join(social_bot.get_world_dir(), "grocery_ground.world") with open(wf_path, 'r+') as world_file: world_string = self._insert_agent_to_world_file( world_file, agent_type) self._world = gazebo.new_world_from_string(world_string) self._world.step(20) self._insert_objects(self._object_list) self._agent = self._world.get_agent() logging.debug(self._world.info()) agent_cfgs = json.load( open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r')) agent_cfg = agent_cfgs[agent_type] self._agent_joints = agent_cfg['control_joints'] joint_states = list( map(lambda s: self._agent.get_joint_state(s), self._agent_joints)) self._joints_limits = list( map(lambda s: s.get_effort_limits()[0], joint_states)) if agent_cfg['use_pid']: for joint_index in range(len(self._agent_joints)): self._agent.set_pid_controller( self._agent_joints[joint_index], 'velocity', p=0.02, d=0.00001, max_force=self._joints_limits[joint_index]) self._agent_control_range = agent_cfg['pid_control_limit'] else: self._agent_control_range = np.array(self._joints_limits) self._agent_camera = agent_cfg['camera_sensor'] logging.debug("joints to control: %s" % self._agent_joints) self._with_language = with_language self._use_image_obs = use_image_observation self._image_with_internal_states = self._use_image_obs and image_with_internal_states assert data_format in ('channels_first', 'channels_last') self._data_format = data_format self._resized_image_size = resized_image_size self._random_goal = random_goal self.reset() obs_sample = self._get_observation("hello") if self._with_language or self._image_with_internal_states: self.observation_space = self._construct_dict_space( obs_sample, self._teacher.vocab_size) elif self._use_image_obs: self.observation_space = gym.spaces.Box(low=0, high=255, shape=obs_sample.shape, dtype=np.uint8) else: self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=obs_sample.shape, dtype=np.float32) self._control_space = gym.spaces.Box(low=-1.0, high=1.0, shape=[len(self._agent_joints)], dtype=np.float32) if self._with_language: self.action_space = gym.spaces.Dict(control=self._control_space, sentence=self._sentence_space) else: self.action_space = self._control_space