예제 #1
0
    def __init__(self,
                 with_language=False,
                 image_with_internal_states=False,
                 port=None,
                 resized_image_size=None):
        """Create SimpleNavigation environment.

        Args:
            with_language (bool): whether to generate language for observation
            image_with_internal_states (bool): If true, the agent's self internal
                states i.e., joint position and velocities would be available
                together with the image.
            port (int): TCP/IP port for the simulation server
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
        """
        super(SimpleNavigation,
              self).__init__(world_file='pioneer2dx_camera.world', port=port)

        self._with_language = with_language
        self._image_with_internal_states = image_with_internal_states
        self.set_rendering_cam_pose('4 -4 3 0 0.4 2.3')
        self._seq_length = 20

        # Setup agent
        self._agent = GazeboAgent(
            world=self._world,
            agent_type='pioneer2dx_noplugin',
            with_language=with_language,
            vocab_sequence_length=self._seq_length,
            use_image_observation=True,
            resized_image_size=resized_image_size,
            image_with_internal_states=image_with_internal_states)

        # Setup teacher and tasks
        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        task_group = teacher.TaskGroup()
        task = GoalTask(env=self,
                        max_steps=120,
                        goal_name="goal",
                        fail_distance_thresh=0.5,
                        distraction_list=[],
                        random_range=2.0)
        task_group.add_task(task)
        self._teacher.add_task_group(task_group)
        self._sentence_space = DiscreteSequence(self._teacher.vocab_size,
                                                self._seq_length)

        # Setup action space and observation space
        self.reset()
        self._agent.set_sentence_space(self._sentence_space)
        self._control_space = self._agent.get_control_space()
        self._action_space = self._agent.get_action_space()
        self._observation_space = self._agent.get_observation_space(
            self._teacher)
예제 #2
0
    def __init__(self, with_language=True, port=None):
        if port is None:
            port = 0
        gazebo.initialize(port=port)
        self._world = gazebo.new_world_from_file(
            os.path.join(social_bot.get_world_dir(),
                         "pioneer2dx_camera.world"))
        self._agent = self._world.get_agent()
        assert self._agent is not None
        logger.info("joint names: %s" % self._agent.get_joint_names())
        self._joint_names = self._agent.get_joint_names()
        self._teacher = teacher.Teacher(False)
        task_group = teacher.TaskGroup()
        task_group.add_task(GoalTask())
        self._teacher.add_task_group(task_group)
        self._with_language = with_language

        # get observation dimension
        image = self._agent.get_camera_observation("camera")
        image = np.array(image, copy=False)
        if with_language:
            self._observation_space = gym.spaces.Dict(
                image=gym.spaces.Box(low=0,
                                     high=1,
                                     shape=image.shape,
                                     dtype=np.uint8),
                sentence=DiscreteSequence(256, 20))

            self._action_space = gym.spaces.Dict(
                control=gym.spaces.Box(low=-0.2,
                                       high=0.2,
                                       shape=[len(self._joint_names)],
                                       dtype=np.float32),
                sentence=DiscreteSequence(256, 20))
        else:
            self._observation_space = gym.spaces.Box(low=0,
                                                     high=1,
                                                     shape=image.shape,
                                                     dtype=np.uint8)
            self._action_space = gym.spaces.Box(low=-0.2,
                                                high=0.2,
                                                shape=[len(self._joint_names)],
                                                dtype=np.float32)
예제 #3
0
    def __init__(self,
                 with_language=False,
                 use_image_observation=False,
                 image_with_internal_states=False,
                 task_name='goal',
                 agent_type='pioneer2dx_noplugin',
                 world_time_precision=None,
                 step_time=0.1,
                 port=None,
                 action_cost=0.0,
                 resized_image_size=(64, 64),
                 image_data_format='channels_last',
                 vocab_sequence_length=20):
        """
        Args:
            with_language (bool): The observation will be a dict with an extra sentence
            use_image_observation (bool): Use image, or use low-dimentional states as
                observation. Poses in the states observation are in world coordinate
            image_with_internal_states (bool): If true, the agent's self internal states
                i.e., joint position and velocities would be available together with image.
                Only affect if use_image_observation is true
            task_name (string): the teacher task, now there are 2 tasks,
                a simple goal task: 'goal'
                a simple kicking ball task: 'kickball'
            agent_type (string): Select the agent robot, supporting pr2_noplugin,
                pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now
                note that 'agent_type' should be the same str as the model's name
            world_time_precision (float|None): if not none, the time precision of
                simulator, i.e., the max_step_size defined in the agent cfg file, will be
                override. e.g., '0.002' for a 2ms sim step
            step_time (float): the peroid of one step of the environment.
                step_time / world_time_precision is how many simulator substeps during one
                environment step. for some complex agent, i.e., icub, using step_time of 0.05 is better
            port: Gazebo port, need to specify when run multiple environment in parallel
            action_cost (float): Add an extra action cost to reward, which helps to train
                an energy/forces efficency policy or reduce unnecessary movements
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
            image_data_format (str):  One of `channels_last` or `channels_first`.
                The ordering of the dimensions in the images.
                `channels_last` corresponds to images with shape
                `(height, width, channels)` while `channels_first` corresponds
                to images with shape `(channels, height, width)`.
        """

        agent_cfgs = json.load(
            open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
                 'r'))
        agent_cfg = agent_cfgs[agent_type]

        wf_path = os.path.join(social_bot.get_world_dir(),
                               "grocery_ground.world")
        with open(wf_path, 'r+') as world_file:
            world_string = self._insert_agent_to_world_file(
                world_file, agent_type)
        if world_time_precision is None:
            world_time_precision = agent_cfg['max_sim_step_time']
        sub_steps = int(round(step_time / world_time_precision))
        sim_time_cfg = [
            "//physics//max_step_size=" + str(world_time_precision)
        ]

        super(GroceryGround, self).__init__(world_string=world_string,
                                            world_config=sim_time_cfg,
                                            port=port)

        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        if task_name is None or task_name == 'goal':
            main_task = GroceryGroundGoalTask()
        elif task_name == 'kickball':
            main_task = GroceryGroundKickBallTask(step_time=step_time)
        else:
            logging.debug("upsupported task name: " + task_name)

        main_task_group = TaskGroup()
        main_task_group.add_task(main_task)
        self._teacher.add_task_group(main_task_group)
        if agent_type.find('icub') != -1:
            icub_aux_task_group = TaskGroup()
            icub_standing_task = ICubAuxiliaryTask(step_time=step_time)
            icub_aux_task_group.add_task(icub_standing_task)
            self._teacher.add_task_group(icub_aux_task_group)
        self._teacher._build_vocab_from_tasks()
        self._seq_length = vocab_sequence_length
        if self._teacher.vocab_size:
            # using MultiDiscrete instead of DiscreteSequence so gym
            # _spec_from_gym_space won't complain.
            self._sentence_space = gym.spaces.MultiDiscrete(
                [self._teacher.vocab_size] * self._seq_length)
        self._sub_steps = sub_steps

        self._world.step(20)
        self._agent = self._world.get_agent()
        for task_group in self._teacher.get_task_groups():
            for task in task_group.get_tasks():
                task.setup(self._world, agent_type)

        logging.debug(self._world.info())
        self._agent_joints = agent_cfg['control_joints']
        joint_states = list(
            map(lambda s: self._agent.get_joint_state(s), self._agent_joints))
        self._joints_limits = list(
            map(lambda s: s.get_effort_limits()[0], joint_states))
        if agent_cfg['use_pid']:
            for joint_index in range(len(self._agent_joints)):
                self._agent.set_pid_controller(
                    self._agent_joints[joint_index],
                    'velocity',
                    p=0.02,
                    d=0.00001,
                    max_force=self._joints_limits[joint_index])
            self._agent_control_range = agent_cfg['pid_control_limit']
        else:
            self._agent_control_range = np.array(self._joints_limits)
        self._agent_camera = agent_cfg['camera_sensor']

        logging.debug("joints to control: %s" % self._agent_joints)

        self._action_cost = action_cost
        self._with_language = with_language
        self._use_image_obs = use_image_observation
        self._image_with_internal_states = self._use_image_obs and image_with_internal_states
        assert image_data_format in ('channels_first', 'channels_last')
        self._data_format = image_data_format
        self._resized_image_size = resized_image_size
        self._substep_time = world_time_precision

        self._control_space = gym.spaces.Box(low=-1.0,
                                             high=1.0,
                                             shape=[len(self._agent_joints)],
                                             dtype=np.float32)
        if self._with_language:
            self.action_space = gym.spaces.Dict(control=self._control_space,
                                                sentence=self._sentence_space)
        else:
            self.action_space = self._control_space

        self.reset()
        obs_sample = self._get_observation_with_sentence("hello")
        if self._with_language or self._image_with_internal_states:
            self.observation_space = self._construct_dict_space(
                obs_sample, self._teacher.vocab_size)
        elif self._use_image_obs:
            self.observation_space = gym.spaces.Box(low=0,
                                                    high=255,
                                                    shape=obs_sample.shape,
                                                    dtype=np.uint8)
        else:
            self.observation_space = gym.spaces.Box(low=-np.inf,
                                                    high=np.inf,
                                                    shape=obs_sample.shape,
                                                    dtype=np.float32)
    def __init__(self,
                 with_language=False,
                 image_with_internal_states=False,
                 port=None,
                 resized_image_size=None,
                 data_format='channels_last'):
        """Create SimpleNavigation environment.

        Args:
            with_language (bool): whether to generate language for observation
            image_with_internal_states (bool): If true, the agent's self internal
                states i.e., joint position and velocities would be available
                together with the image.
            port (int): TCP/IP port for the simulation server
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
            data_format (str):  one of `channels_last` or `channels_first`.
                The ordering of the dimensions in the images.
                `channels_last` corresponds to images with shape
                `(height, width, channels)` while `channels_first` corresponds
                to images with shape `(channels, height, width)`.
        """
        super(SimpleNavigation, self).__init__(port=port)
        self._world = gazebo.new_world_from_file(
            os.path.join(social_bot.get_world_dir(),
                         "pioneer2dx_camera.world"))
        self._agent = self._world.get_agent()
        self._rendering_cam_pose = "4 -4 3 0 0.4 2.3"
        assert self._agent is not None
        logging.debug("joint names: %s" % self._agent.get_joint_names())
        self._all_joints = self._agent.get_joint_names()
        self._joint_names = list(
            filter(lambda s: s.find('wheel') != -1, self._all_joints))
        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        task_group = teacher.TaskGroup()
        task_group.add_task(GoalTask())
        self._teacher.add_task_group(task_group)
        self._seq_length = 20
        self._sentence_space = DiscreteSequence(self._teacher.vocab_size,
                                                self._seq_length)

        self._with_language = with_language
        self._image_with_internal_states = image_with_internal_states
        self._resized_image_size = resized_image_size
        assert data_format in ('channels_first', 'channels_last')
        self._data_format = data_format

        time.sleep(0.1)  # Allow Gazebo threads to be fully ready
        self.reset()

        # Get observation dimension
        obs_sample = self._get_observation('hello')
        if self._with_language or self._image_with_internal_states:
            self._observation_space = self._construct_dict_space(
                obs_sample, self._teacher.vocab_size)
        else:
            self._observation_space = gym.spaces.Box(
                low=0, high=255, shape=obs_sample.shape, dtype=np.uint8)

        control_space = gym.spaces.Box(
            low=-0.2,
            high=0.2,
            shape=[len(self._joint_names)],
            dtype=np.float32)
        if with_language:
            self._action_space = gym.spaces.Dict(
                control=control_space, sentence=self._sentence_space)
        else:
            self._action_space = control_space
예제 #5
0
    def __init__(self,
                 agent_type='pioneer2dx_noplugin',
                 world_name="play_ground.world",
                 tasks=[GoalTask],
                 with_language=False,
                 with_agent_language=False,
                 use_image_observation=False,
                 image_with_internal_states=False,
                 max_steps=200,
                 world_time_precision=None,
                 step_time=0.1,
                 real_time_update_rate=0,
                 port=None,
                 action_cost=0.0,
                 resized_image_size=(64, 64),
                 vocab_sequence_length=20,
                 action_wrapper=None):
        """
        Args:
            agent_type (string): Select the agent robot, supporting pr2_noplugin,
                pioneer2dx_noplugin, turtlebot, youbot_noplugin and icub_with_hands for now
                note that 'agent_type' should be exactly the same string as the model's
                name at the beginning of model's sdf file
            world_name (string): Select the world file, e.g., empty.world, play_ground.world,
                grocery_ground.world
            tasks (list): a list of teacher.Task, e.g., GoalTask, KickingBallTask
            with_language (bool): The observation will be a dict with an extra sentence
            with_agent_language (bool): Include agent sentence in action space.
            use_image_observation (bool): Use image, or use low-dimentional states as
                observation. Poses in the states observation are in world coordinate
            max_steps (int): episode will end in so many steps, will be passed to the tasks
            image_with_internal_states (bool): If true, the agent's self internal states
                i.e., joint position and velocities would be available together with image.
                Only affect if use_image_observation is true
            world_time_precision (float|None): this parameter depends on the agent.
                if not none, the default time precision of simulator, i.e., the max_step_size
                defined in the agent cfg file, will be override. Note that pr2 and iCub
                requires a max_step_size <= 0.001, otherwise cannot train a successful policy.
            step_time (float): the peroid of one step() function of the environment in simulation.
                step_time is rounded to multiples of world_time_precision
                step_time / world_time_precision is how many simulator substeps during one
                environment step. for the tasks need higher control frequency (such as the
                tasks need walking by 2 legs), using a smaller step_time like 0.05 is better.
                experiments show that iCub can not learn how to walk in a 0.1 step_time
            real_time_update_rate (int): max update rate per second. There is no limit if
                this is set to 0 as default in the world file. If 1:1 real time is prefered
                (like playing or recording video), it should be set to 1.0/world_time_precision.
            port: Gazebo port, need to specify when run multiple environment in parallel
            action_cost (float): Add an extra action cost to reward, which helps to train
                an energy/forces efficency policy or reduce unnecessary movements
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
            vocab_sequence_length (int): the length of encoded sequence
            action_wrapper (None|class): Some times primitive joints is not wanted, e.g., has
                redundant dimensions or offset. If not None, this is used to transform the agent
                actions. See ActionWrapper of gazebo_agent.py for example.
        """

        self._action_cost = action_cost
        self._with_language = with_language
        self._seq_length = vocab_sequence_length
        self._with_agent_language = with_language and with_agent_language
        self._use_image_obs = use_image_observation
        self._image_with_internal_states = self._use_image_obs and image_with_internal_states

        # Load agent and world file
        with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
                  'r') as cfg_file:
            agent_cfgs = json.load(cfg_file)
        agent_cfg = agent_cfgs[agent_type]
        wd_path = os.path.join(social_bot.get_world_dir(), world_name)
        with open(wd_path, 'r+') as world_file:
            world_string = self._insert_agent_to_world_file(
                world_file, agent_type)
        if world_time_precision is None:
            world_time_precision = agent_cfg['max_sim_step_time']
        self._sub_steps = int(round(step_time / world_time_precision))
        self._step_time = world_time_precision * self._sub_steps
        sim_time_cfg = [
            "//physics//max_step_size=" + str(world_time_precision),
            "//physics//real_time_update_rate=" + str(real_time_update_rate)
        ]
        super().__init__(world_string=world_string,
                         world_config=sim_time_cfg,
                         port=port)
        logging.debug(self._world.info())

        # Setup agent
        self._agent = GazeboAgent(
            world=self._world,
            agent_type=agent_type,
            config=agent_cfg,
            with_language=with_language,
            with_agent_language=with_agent_language,
            vocab_sequence_length=self._seq_length,
            use_image_observation=use_image_observation,
            resized_image_size=resized_image_size,
            image_with_internal_states=image_with_internal_states,
            action_wrapper=action_wrapper)

        # Setup teacher and tasks
        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        self._has_goal_task = False
        for task in tasks:
            if task == GoalTask:
                self._has_goal_task = True
            task_group = TaskGroup()
            task_group.add_task(task(env=self, max_steps=max_steps))
            self._teacher.add_task_group(task_group)
        self._teacher._build_vocab_from_tasks()

        # Setup action space and observation space
        if self._teacher.vocab_size:
            sentence_space = gym.spaces.MultiDiscrete(
                [self._teacher.vocab_size] * self._seq_length)
        self._agent.set_sentence_space(sentence_space)
        self._control_space = self._agent.get_control_space()
        self.action_space = self._agent.get_action_space()
        self.reset()
        self.observation_space = self._agent.get_observation_space(
            self._teacher)
예제 #6
0
    def __init__(self,
                 with_language=False,
                 use_image_observation=False,
                 image_with_internal_states=False,
                 random_goal=False,
                 agent_type='pioneer2dx_noplugin',
                 max_steps=200,
                 port=None,
                 resized_image_size=(64, 64),
                 data_format='channels_last'):
        """
        Args:
            with_language (bool): The observation will be a dict with an extra sentence
            use_image_observation (bool): Use image, or use low-dimentional states as 
                observation. Poses in the states observation are in world coordinate
            image_with_internal_states (bool): If true, the agent's self internal states
                i.e., joint position and velocities would be available together with image.
                Only affect if use_image_observation is true
            random_goal (bool): If ture, teacher will randomly select goal from the 
                object list each episode
            agent_type (string): Select the agent robot, supporting pr2_noplugin, 
                pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now
                note that 'agent_type' should be the same str as the model's name
            port: Gazebo port, need to specify when run multiple environment in parallel
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
            data_format (str):  One of `channels_last` or `channels_first`.
                The ordering of the dimensions in the images.
                `channels_last` corresponds to images with shape
                `(height, width, channels)` while `channels_first` corresponds
                to images with shape `(channels, height, width)`.
        """
        super(GroceryGround, self).__init__(port=port)

        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        task_group = teacher.TaskGroup()
        self._teacher_task = GroceryGroundGoalTask(max_steps=max_steps,
                                                   success_distance_thresh=0.5,
                                                   fail_distance_thresh=3.0,
                                                   reward_shaping=True,
                                                   random_goal=random_goal,
                                                   random_range=10.0)
        task_group.add_task(self._teacher_task)
        self._teacher.add_task_group(task_group)
        self._teacher.build_vocab_from_tasks()
        self._seq_length = 20
        self._sentence_space = DiscreteSequence(self._teacher.vocab_size,
                                                self._seq_length)

        self._object_list = self._teacher_task.get_object_list()
        self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5)))
        self._pos_list.remove((0, 0))

        wf_path = os.path.join(social_bot.get_world_dir(),
                               "grocery_ground.world")
        with open(wf_path, 'r+') as world_file:
            world_string = self._insert_agent_to_world_file(
                world_file, agent_type)
        self._world = gazebo.new_world_from_string(world_string)
        self._world.step(20)
        self._insert_objects(self._object_list)
        self._agent = self._world.get_agent()
        logging.debug(self._world.info())
        agent_cfgs = json.load(
            open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
                 'r'))
        agent_cfg = agent_cfgs[agent_type]
        self._agent_joints = agent_cfg['control_joints']
        joint_states = list(
            map(lambda s: self._agent.get_joint_state(s), self._agent_joints))
        self._joints_limits = list(
            map(lambda s: s.get_effort_limits()[0], joint_states))
        if agent_cfg['use_pid']:
            for joint_index in range(len(self._agent_joints)):
                self._agent.set_pid_controller(
                    self._agent_joints[joint_index],
                    'velocity',
                    p=0.02,
                    d=0.00001,
                    max_force=self._joints_limits[joint_index])
            self._agent_control_range = agent_cfg['pid_control_limit']
        else:
            self._agent_control_range = np.array(self._joints_limits)
        self._agent_camera = agent_cfg['camera_sensor']

        logging.debug("joints to control: %s" % self._agent_joints)

        self._with_language = with_language
        self._use_image_obs = use_image_observation
        self._image_with_internal_states = self._use_image_obs and image_with_internal_states
        assert data_format in ('channels_first', 'channels_last')
        self._data_format = data_format
        self._resized_image_size = resized_image_size
        self._random_goal = random_goal

        self.reset()
        obs_sample = self._get_observation("hello")
        if self._with_language or self._image_with_internal_states:
            self.observation_space = self._construct_dict_space(
                obs_sample, self._teacher.vocab_size)
        elif self._use_image_obs:
            self.observation_space = gym.spaces.Box(low=0,
                                                    high=255,
                                                    shape=obs_sample.shape,
                                                    dtype=np.uint8)
        else:
            self.observation_space = gym.spaces.Box(low=-np.inf,
                                                    high=np.inf,
                                                    shape=obs_sample.shape,
                                                    dtype=np.float32)

        self._control_space = gym.spaces.Box(low=-1.0,
                                             high=1.0,
                                             shape=[len(self._agent_joints)],
                                             dtype=np.float32)
        if self._with_language:
            self.action_space = gym.spaces.Dict(control=self._control_space,
                                                sentence=self._sentence_space)
        else:
            self.action_space = self._control_space
예제 #7
0
    def __init__(self,
                 with_language=False,
                 use_image_obs=False,
                 agent_type='pioneer2dx_noplugin',
                 goal_name='table',
                 max_steps=160,
                 port=None):
        """
        Args:
            with_language (bool): the observation will be a dict with an extra sentence
            use_image_obs (bool): use image, or use internal states as observation
                poses in internal states observation are in world coordinate
            agent_type (string): select the agent robot, supporting pr2_differential, 
                pioneer2dx_noplugin, turtlebot, and irobot create for now
            port: Gazebo port, need to specify when run multiple environment in parallel
        """
        if port is None:
            port = 0
        gazebo.initialize(port=port)
        self._world = gazebo.new_world_from_file(
            os.path.join(social_bot.get_world_dir(), "grocery_ground.world"))
        self._object_types = [
            'coke_can', 'cube_20k', 'car_wheel', 'plastic_cup', 'beer',
            'hammer'
        ]
        self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5)))
        self._pos_list.remove((0, 0))
        self._world.info()
        self._world.insertModelFile('model://' + agent_type)
        self._world.step(20)
        time.sleep(0.1)  # Avoid 'px!=0' error
        self._random_insert_objects()
        self._world.model_list_info()
        self._world.info()

        # Specify joints and sensors for the robots
        control_joints = {
            'pr2_differential': [
                'pr2_differential::fl_caster_r_wheel_joint',
                'pr2_differential::fr_caster_l_wheel_joint'
            ],
            'pioneer2dx_noplugin': [
                'pioneer2dx_noplugin::left_wheel_hinge',
                'pioneer2dx_noplugin::right_wheel_hinge'
            ],
            'turtlebot': [
                'turtlebot::create::left_wheel',
                'turtlebot::create::right_wheel'
            ],
            'create': ['create::left_wheel', 'create::right_wheel'],
        }
        control_force = {
            'pr2_differential': 20,
            'pioneer2dx_noplugin': 0.5,
            'turtlebot': 0.5,
            'create': 0.5,
        }
        camera_sensor = {
            'pr2_differential':
            'default::pr2_differential::head_tilt_link::head_mount_prosilica_link_sensor',
            'pioneer2dx_noplugin': ' ',
            'turtlebot': 'default::turtlebot::kinect::link::camera',
            'create': ' ',
        }

        self._agent = self._world.get_agent(agent_type)
        self._agent_joints = control_joints[agent_type]
        self._agent_control_force = control_force[agent_type]
        self._agent_camera = camera_sensor[agent_type]
        self._goal_name = goal_name
        self._goal = self._world.get_model(goal_name)

        logger.info("joints to control: %s" % self._agent_joints)

        self._teacher = teacher.Teacher(False)
        task_group = teacher.TaskGroup()
        self._teacher_task = GroceryGroundGoalTask(max_steps=max_steps,
                                                   goal_name=self._goal_name,
                                                   success_distance_thresh=0.5,
                                                   fail_distance_thresh=3.0,
                                                   reward_shaping=True,
                                                   random_range=10.0)
        task_group.add_task(self._teacher_task)
        self._teacher.add_task_group(task_group)

        self._with_language = with_language
        self._use_image_obs = use_image_obs

        obs = self.reset()
        if self._use_image_obs:
            obs_data_space = gym.spaces.Box(low=0,
                                            high=255,
                                            shape=obs.shape,
                                            dtype=np.uint8)
        else:
            obs_data_space = gym.spaces.Box(low=-50,
                                            high=50,
                                            shape=obs.shape,
                                            dtype=np.float32)

        control_space = gym.spaces.Box(low=-self._agent_control_force,
                                       high=self._agent_control_force,
                                       shape=[len(self._agent_joints)],
                                       dtype=np.float32)

        if self._with_language:
            self.observation_space = gym.spaces.Dict(data=obs_data_space,
                                                     sentence=DiscreteSequence(
                                                         128, 24))
            self._action_space = gym.spaces.Dict(control=control_space,
                                                 sentence=DiscreteSequence(
                                                     128, 24))
        else:
            self.observation_space = obs_data_space
            self.action_space = control_space