def __init__(self, max_steps=500, goal_name="ball", success_distance_thresh=0.5, fail_distance_thresh=3, random_range=10.0, random_goal=False, reward_weight=1.0): """ Args: max_steps (int): episode will end if not reaching gaol in so many steps goal_name (string): name of the goal in the world success_distance_thresh (float): the goal is reached if it's within this distance to the agent fail_distance_thresh (float): if the agent moves away from the goal more than this distance, it's considered a failure and is givne reward -1 random_range (float): the goal's random position range random_goal (bool): if ture, teacher will randomly select goal from the object list each episode """ assert goal_name is not None, "Goal name needs to be set, not None." GoalTask.__init__(self, max_steps=max_steps, goal_name=goal_name, success_distance_thresh=success_distance_thresh, fail_distance_thresh=fail_distance_thresh, random_range=random_range) GroceryGroundTaskBase.__init__(self) self._random_goal = random_goal self._objects_in_world = [ 'placing_table', 'plastic_cup_on_table', 'coke_can_on_table', 'hammer_on_table', 'cafe_table', 'ball' ] self._objects_to_insert = [ 'coke_can', 'table', 'bookshelf', 'car_wheel', 'plastic_cup', 'beer', 'hammer' ] logging.info("goal_name %s, random_goal %d, fail_distance_thresh %f", self._goal_name, self._random_goal, fail_distance_thresh) self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5))) self._pos_list.remove((0, 0)) self.reward_weight = reward_weight self.task_vocab = self.task_vocab + self._objects_in_world + self._objects_to_insert
def __init__(self, max_steps=500, goal_name="goal", success_distance_thresh=0.5, fail_distance_thresh=0.5, random_range=5.0, target_speed=2.0, step_time=0.1, reward_weight=1.0): """ Args: max_steps (int): episode will end if not reaching gaol in so many steps goal_name (string): name of the goal in the world success_distance_thresh (float): the goal is reached if it's within this distance to the agent fail_distance_thresh (float): if the agent moves away from the goal more than this distance, it's considered a failure and is given reward -1 random_range (float): the goal's random position range target_speed (float): the target speed runing to the ball. The agent will receive no more higher reward when its speed is higher than target_speed. step_time (float): used to caculate speed of the agent reward_weight (float): the weight of the reward """ GoalTask.__init__(self, max_steps=max_steps, goal_name=goal_name, fail_distance_thresh=fail_distance_thresh, random_range=random_range) GroceryGroundTaskBase.__init__(self) self._goal_name = 'goal' self._success_distance_thresh = success_distance_thresh, self._objects_in_world = [ 'placing_table', 'plastic_cup_on_table', 'coke_can_on_table', 'hammer_on_table', 'cafe_table', 'ball' ] self._step_time = step_time self._target_speed = target_speed self.reward_weight = reward_weight self.task_vocab = self.task_vocab + self._objects_in_world
def __init__(self, with_language=True, port=None): if port is None: port = 0 gazebo.initialize(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "pioneer2dx_camera.world")) self._agent = self._world.get_agent() assert self._agent is not None logger.info("joint names: %s" % self._agent.get_joint_names()) self._joint_names = self._agent.get_joint_names() self._teacher = teacher.Teacher(False) task_group = teacher.TaskGroup() task_group.add_task(GoalTask()) self._teacher.add_task_group(task_group) self._with_language = with_language # get observation dimension image = self._agent.get_camera_observation("camera") image = np.array(image, copy=False) if with_language: self._observation_space = gym.spaces.Dict( image=gym.spaces.Box(low=0, high=1, shape=image.shape, dtype=np.uint8), sentence=DiscreteSequence(256, 20)) self._action_space = gym.spaces.Dict( control=gym.spaces.Box(low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32), sentence=DiscreteSequence(256, 20)) else: self._observation_space = gym.spaces.Box(low=0, high=1, shape=image.shape, dtype=np.uint8) self._action_space = gym.spaces.Box(low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32)
def run(self, agent, world): self._random_move_objects() if self._random_goal: random_id = random.randrange(len(self._objects_to_insert)) self.set_goal_name(self._objects_to_insert[random_id]) yield from GoalTask.run(self, agent, world)
def __init__(self, with_language=False, image_with_internal_states=False, port=None, resized_image_size=None, data_format='channels_last'): """Create SimpleNavigation environment. Args: with_language (bool): whether to generate language for observation image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with the image. port (int): TCP/IP port for the simulation server resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) data_format (str): one of `channels_last` or `channels_first`. The ordering of the dimensions in the images. `channels_last` corresponds to images with shape `(height, width, channels)` while `channels_first` corresponds to images with shape `(channels, height, width)`. """ super(SimpleNavigation, self).__init__(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "pioneer2dx_camera.world")) self._agent = self._world.get_agent() self._rendering_cam_pose = "4 -4 3 0 0.4 2.3" assert self._agent is not None logging.debug("joint names: %s" % self._agent.get_joint_names()) self._all_joints = self._agent.get_joint_names() self._joint_names = list( filter(lambda s: s.find('wheel') != -1, self._all_joints)) self._teacher = teacher.Teacher(task_groups_exclusive=False) task_group = teacher.TaskGroup() task_group.add_task(GoalTask()) self._teacher.add_task_group(task_group) self._seq_length = 20 self._sentence_space = DiscreteSequence(self._teacher.vocab_size, self._seq_length) self._with_language = with_language self._image_with_internal_states = image_with_internal_states self._resized_image_size = resized_image_size assert data_format in ('channels_first', 'channels_last') self._data_format = data_format time.sleep(0.1) # Allow Gazebo threads to be fully ready self.reset() # Get observation dimension obs_sample = self._get_observation('hello') if self._with_language or self._image_with_internal_states: self._observation_space = self._construct_dict_space( obs_sample, self._teacher.vocab_size) else: self._observation_space = gym.spaces.Box( low=0, high=255, shape=obs_sample.shape, dtype=np.uint8) control_space = gym.spaces.Box( low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32) if with_language: self._action_space = gym.spaces.Dict( control=control_space, sentence=self._sentence_space) else: self._action_space = control_space