예제 #1
0
    def reset(self, initial_motor_angles=None, reset_duration_test=1.0):
        self._pybullet_client.configureDebugVisualizer(
            self._pybullet_client.COV_ENABLE_RENDERING, 0)
        if self._env_step_counter > 0:
            self.logging.save_episode(self._episode_proto)
        self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
        minitaur_logging.preallocate_episode_proto(self._episode_proto,
                                                   self._num_steps_to_log)
        if self._hard_reset:
            self._pybullet_client.resetSimulation()
            self._pybullet_client.setPhysicsEngineParameter(
                numSolverIterations=int(self._num_bullet_solver_iterations))
            self._pybullet_client.setTimeStep(self._time_step)
            self._ground_id = self._pybullet_client.loadURDF("%s/plane.urdf" %
                                                             self._urdf_root)
            if (self._reflection):
                self._pybullet_client.changeVisualShape(
                    self._ground_id, -1, rgbaColor=[1, 1, 1, 0.8])
                self._pybullet_client.configureDebugVisualizer(
                    self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION,
                    self._ground_id)
            self._pybullet_client.setGravity(0, 0, -10)
            acc_motor = self._accurate_motor_model_enabled
            motor_protect = self._motor_overheat_protection

        self.minitaur = Minitaur(
            pybullet_client=self._pybullet_client,
            action_repeat=self._action_repeat,
            urdf_root=self._urdf_root,
            time_step=self._time_step,
            self_collision_enabled=self._self_collision_enabled,
            motor_velocity_limit=self._motor_velocity_limit,
            pd_control_enabled=self._pd_control_enabled,
            accurate_motor_model_enabled=acc_motor,
            remove_default_joint_damping=self._remove_default_joint_damping,
            motor_kp=self._motor_kp,
            motor_kd=self._motor_kd,
            control_latency=self._control_latency,
            pd_latency=self._pd_latency,
            observation_noise_stdev=self._observation_noise_stdev,
            torque_control_enabled=self._torque_control_enabled,
            motor_overheat_protection=motor_protect,
            on_rack=self._on_rack)
        print('-----Enter Minitaur.Reset2-----')
        self.minitaur.Reset(reload_urdf=False,
                            default_motor_angles=initial_motor_angles,
                            reset_time=reset_duration_test)
        print('-----Exit Minitaur.Reset2------')
        for env_randomizer in self._env_randomizers:
            env_randomizer.randomize_env(self)

        self._pybullet_client.setPhysicsEngineParameter(enableConeFriction=0)
        self._env_step_counter = 0
        self._last_base_position = [0, 0, 0]
        self._objectives = []
        self._pybullet_client.resetDebugVisualizerCamera(
            self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
        self._pybullet_client.configureDebugVisualizer(
            self._pybullet_client.COV_ENABLE_RENDERING, 1)
        return self._get_true_observation()
예제 #2
0
  def restore_episode(self, log_path):
    """Restore the episodic proto from the log path.

    Args:
      log_path: The full path of the log file.
    Returns:
      The minitaur episode proto.
    """
    with tf.gfile.Open(log_path, 'rb') as f:
      content = f.read()
      episode_proto = minitaur_logging_pb2.MinitaurEpisode()
      episode_proto.ParseFromString(content)
      return episode_proto
예제 #3
0
    def __init__(self,
                 urdf_root=pybullet_data.getDataPath(),
                 urdf_version=None,
                 distance_weight=1.0,
                 energy_weight=0.005,
                 shake_weight=0.0,
                 drift_weight=0.0,
                 distance_limit=float("inf"),
                 observation_noise_stdev=SENSOR_NOISE_STDDEV,
                 self_collision_enabled=True,
                 motor_velocity_limit=np.inf,
                 pd_control_enabled=False,
                 leg_model_enabled=True,
                 accurate_motor_model_enabled=False,
                 remove_default_joint_damping=False,
                 motor_kp=1.0,
                 motor_kd=0.02,
                 control_latency=0.0,
                 pd_latency=0.0,
                 torque_control_enabled=False,
                 motor_overheat_protection=False,
                 hard_reset=True,
                 on_rack=False,
                 render=False,
                 num_steps_to_log=1000,
                 action_repeat=1,
                 control_time_step=None,
                 env_randomizer=None,
                 forward_reward_cap=float("inf"),
                 reflection=True,
                 log_path=None):
        """Initialize the minitaur gym environment.

    Args:
      urdf_root: The path to the urdf data folder.
      urdf_version: [DEFAULT_URDF_VERSION, DERPY_V0_URDF_VERSION,
        RAINBOW_DASH_V0_URDF_VERSION] are allowable
        versions. If None, DEFAULT_URDF_VERSION is used. DERPY_V0_URDF_VERSION
        is the result of first pass system identification for derpy.
        We will have a different URDF and related Minitaur class each time we
        perform system identification. While the majority of the code of the
        class remains the same, some code changes (e.g. the constraint location
        might change). __init__() will choose the right Minitaur class from
        different minitaur modules based on
        urdf_version.
      distance_weight: The weight of the distance term in the reward.
      energy_weight: The weight of the energy term in the reward.
      shake_weight: The weight of the vertical shakiness term in the reward.
      drift_weight: The weight of the sideways drift term in the reward.
      distance_limit: The maximum distance to terminate the episode.
      observation_noise_stdev: The standard deviation of observation noise.
      self_collision_enabled: Whether to enable self collision in the sim.
      motor_velocity_limit: The velocity limit of each motor.
      pd_control_enabled: Whether to use PD controller for each motor.
      leg_model_enabled: Whether to use a leg motor to reparameterize the action
        space.
      accurate_motor_model_enabled: Whether to use the accurate DC motor model.
      remove_default_joint_damping: Whether to remove the default joint damping.
      motor_kp: proportional gain for the accurate motor model.
      motor_kd: derivative gain for the accurate motor model.
      control_latency: It is the delay in the controller between when an
        observation is made at some point, and when that reading is reported
        back to the Neural Network.
      pd_latency: latency of the PD controller loop. PD calculates PWM based on
        the motor angle and velocity. The latency measures the time between when
        the motor angle and velocity are observed on the microcontroller and
        when the true state happens on the motor. It is typically (0.001-
        0.002s).
      torque_control_enabled: Whether to use the torque control, if set to
        False, pose control will be used.
      motor_overheat_protection: Whether to shutdown the motor that has exerted
        large torque (OVERHEAT_SHUTDOWN_TORQUE) for an extended amount of time
        (OVERHEAT_SHUTDOWN_TIME). See ApplyAction() in minitaur.py for more
        details.
      hard_reset: Whether to wipe the simulation and load everything when reset
        is called. If set to false, reset just place the minitaur back to start
        position and set its pose to initial configuration.
      on_rack: Whether to place the minitaur on rack. This is only used to debug
        the walking gait. In this mode, the minitaur's base is hanged midair so
        that its walking gait is clearer to visualize.
      render: Whether to render the simulation.
      num_steps_to_log: The max number of control steps in one episode that will
        be logged. If the number of steps is more than num_steps_to_log, the
        environment will still be running, but only first num_steps_to_log will
        be recorded in logging.
      action_repeat: The number of simulation steps before actions are applied.
      control_time_step: The time step between two successive control signals.
      env_randomizer: An instance (or a list) of EnvRandomizer(s). An
        EnvRandomizer may randomize the physical property of minitaur, change
          the terrrain during reset(), or add perturbation forces during step().
      forward_reward_cap: The maximum value that forward reward is capped at.
        Disabled (Inf) by default.
      log_path: The path to write out logs. For the details of logging, refer to
        minitaur_logging.proto.
    Raises:
      ValueError: If the urdf_version is not supported.
    """
        # Set up logging.
        self._log_path = log_path
        self.logging = minitaur_logging.MinitaurLogging(log_path)
        # PD control needs smaller time step for stability.
        if control_time_step is not None:
            self.control_time_step = control_time_step
            self._action_repeat = action_repeat
            self._time_step = control_time_step / action_repeat
        else:
            # Default values for time step and action repeat
            if accurate_motor_model_enabled or pd_control_enabled:
                self._time_step = 0.002
                self._action_repeat = 5
            else:
                self._time_step = 0.01
                self._action_repeat = 1
            self.control_time_step = self._time_step * self._action_repeat
        # TODO(b/73829334): Fix the value of self._num_bullet_solver_iterations.
        self._num_bullet_solver_iterations = int(
            NUM_SIMULATION_ITERATION_STEPS / self._action_repeat)
        self._urdf_root = urdf_root
        self._self_collision_enabled = self_collision_enabled
        self._motor_velocity_limit = motor_velocity_limit
        self._observation = []
        self._true_observation = []
        self._objectives = []
        self._objective_weights = [
            distance_weight, energy_weight, drift_weight, shake_weight
        ]
        self._env_step_counter = 0
        self._num_steps_to_log = num_steps_to_log
        self._is_render = render
        self._last_base_position = [0, 0, 0]
        self._distance_weight = distance_weight
        self._energy_weight = energy_weight
        self._drift_weight = drift_weight
        self._shake_weight = shake_weight
        self._distance_limit = distance_limit
        self._observation_noise_stdev = observation_noise_stdev
        self._action_bound = 1
        self._pd_control_enabled = pd_control_enabled
        self._leg_model_enabled = leg_model_enabled
        self._accurate_motor_model_enabled = accurate_motor_model_enabled
        self._remove_default_joint_damping = remove_default_joint_damping
        self._motor_kp = motor_kp
        self._motor_kd = motor_kd
        self._torque_control_enabled = torque_control_enabled
        self._motor_overheat_protection = motor_overheat_protection
        self._on_rack = on_rack
        self._cam_dist = 1.0
        self._cam_yaw = 0
        self._cam_pitch = -30
        self._forward_reward_cap = forward_reward_cap
        self._hard_reset = True
        self._last_frame_time = 0.0
        self._control_latency = control_latency
        self._pd_latency = pd_latency
        self._urdf_version = urdf_version
        self._ground_id = None
        self._reflection = reflection
        self._env_randomizers = convert_to_list(
            env_randomizer) if env_randomizer else []
        self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
        if self._is_render:
            self._pybullet_client = bullet_client.BulletClient(
                connection_mode=pybullet.GUI)
        else:
            self._pybullet_client = bullet_client.BulletClient()
        if self._urdf_version is None:
            self._urdf_version = DEFAULT_URDF_VERSION
        self._pybullet_client.setPhysicsEngineParameter(enableConeFriction=0)
        self.seed()
        self.reset()
        observation_high = (self._get_observation_upper_bound() +
                            OBSERVATION_EPS)
        observation_low = (self._get_observation_lower_bound() -
                           OBSERVATION_EPS)
        action_dim = NUM_MOTORS
        action_high = np.array([self._action_bound] * action_dim)
        self.action_space = spaces.Box(-action_high, action_high)
        self.observation_space = spaces.Box(observation_low, observation_high)
        self.viewer = None
        self._hard_reset = hard_reset  # This assignment need to be after reset()
예제 #4
0
 def __init__(self,
              urdf_root=pybullet_data.getDataPath(),
              urdf_version=None,
              distance_weight=1.0,
              energy_weight=0.005,
              shake_weight=0.0,
              drift_weight=0.0,
              distance_limit=float("inf"),
              observation_noise_stdev=SENSOR_NOISE_STDDEV,
              self_collision_enabled=True,
              motor_velocity_limit=np.inf,
              pd_control_enabled=False,
              leg_model_enabled=True,
              accurate_motor_model_enabled=False,
              remove_default_joint_damping=False,
              motor_kp=1.0,
              motor_kd=0.02,
              control_latency=0.0,
              pd_latency=0.0,
              torque_control_enabled=False,
              motor_overheat_protection=False,
              hard_reset=True,
              on_rack=False,
              render=False,
              num_steps_to_log=1000,
              action_repeat=1,
              control_time_step=None,
              env_randomizer=None,
              forward_reward_cap=float("inf"),
              reflection=True,
              log_path=None):
     print(
         '------------------------------INITIALIZE ENVIRONMENT------------------------------'
     )
     self._log_path = log_path
     self.logging = minitaur_logging.MinitaurLogging(log_path)
     if control_time_step is not None:
         self.control_time_step = control_time_step  # 0.006
         self._action_repeat = action_repeat  # 6
         self._time_step = control_time_step / action_repeat  # 0.001
     else:
         if accurate_motor_model_enabled or pd_control_enabled:
             self._time_step = 0.002
             self._action_repeat = 5
         else:
             self._time_step = 0.01
             self._action_repeat = 1
         self.control_time_step = self._time_step * self._action_repeat
     self._num_bullet_solver_iterations = int(
         NUM_SIMULATION_ITERATION_STEPS / self._action_repeat)
     self._urdf_root = urdf_root
     self._self_collision_enabled = self_collision_enabled
     self._motor_velocity_limit = motor_velocity_limit
     self._observation = []
     self._true_observation = []
     self._objectives = []
     self._objective_weights = [
         distance_weight, energy_weight, drift_weight, shake_weight
     ]
     self._env_step_counter = 0
     self._num_steps_to_log = num_steps_to_log
     self._is_render = render
     self._last_base_position = [0, 0, 0]
     self._distance_weight = distance_weight
     self._energy_weight = energy_weight
     self._drift_weight = drift_weight
     self._shake_weight = shake_weight
     self._distance_limit = distance_limit
     self._observation_noise_stdev = observation_noise_stdev
     self._action_bound = 1
     self._pd_control_enabled = pd_control_enabled
     self._leg_model_enabled = leg_model_enabled
     self._accurate_motor_model_enabled = accurate_motor_model_enabled
     self._remove_default_joint_damping = remove_default_joint_damping
     self._motor_kp = motor_kp
     self._motor_kd = motor_kd
     self._torque_control_enabled = torque_control_enabled
     self._motor_overheat_protection = motor_overheat_protection
     self._on_rack = on_rack
     self._cam_dist = 1.0
     self._cam_yaw = 0
     self._cam_pitch = -30
     self._forward_reward_cap = forward_reward_cap
     self._hard_reset = True
     self._last_frame_time = 0.0
     self._control_latency = control_latency
     self._pd_latency = pd_latency
     self._urdf_version = urdf_version
     self._ground_id = None
     self._reflection = reflection
     self._env_randomizers = convert_to_list(
         env_randomizer) if env_randomizer else []
     self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
     print('----------ENTER BulletClient----------')
     if self._is_render:
         self._pybullet_client = BC.BulletClient(
             connection_mode=pybullet.GUI)
         print('----------EXIT BulletClient----------')
     else:
         self._pybullet_client = BC.BulletClient()
         print('----------EXIT BulletClient----------')
     self._pybullet_client.setPhysicsEngineParameter(enableConeFriction=0)
     self.seed()
     print('----------ENTER MinitaurGymEnv.reset1-----------')
     self.reset()
     print('----------EXIT MinitaurGymEnv.reset1------------')
     print(
         '------------------------------ENVIRONMENT INITIALIZED------------------------------'
     )
예제 #5
0
    def __init__(self,
                 gym_config,
                 clock: Union[Callable[..., float], Text] = 'SIM_CLOCK',
                 robot_class: Any = None,
                 scene: scene_base.SceneBase = None,
                 sensors: Sequence[sensor.Sensor] = None,
                 task: Any = None,
                 env_randomizers: Any = None):
        """Initializes the locomotion gym environment.

    Args:
      gym_config: An instance of LocomotionGymConfig.
      clock: The clock source to be used for the gym env. The clock should
        return a timestamp in seconds. Setting clock == "SIM_CLOCK" will enable
        the built-in simulation clock. For real robot experiments, we can use
        time.time or other clock wall clock sources.
      robot_class: A class of a robot. We provide a class rather than an
        instance due to hard_reset functionality. Parameters are expected to be
        configured with gin.
      scene: An object for managing the robot's surroundings.
      sensors: A list of environmental sensors for observation.
      task: A callable function/class to calculate the reward and termination
        condition. Takes the gym env as the argument when calling.
      env_randomizers: A list of EnvRandomizer(s). An EnvRandomizer may
        randomize the physical property of minitaur, change the terrrain during
        reset(), or add perturbation forces during step().
      client_factory: A function to create a simulation client, it can be a
        pybullet client.

    Raises:
      ValueError: If the num_action_repeat is less than 1.

    """
        self._pybullet_client = None
        self._metric_logger = metric_logger.MetricLogger()
        # TODO(sehoonha) split observation and full-state sensors (b/129858214)

        # Makes sure that close() is always called to flush out the logs to the
        # disk.
        atexit.register(self.close)
        self.seed()
        self._gym_config = gym_config
        if robot_class is None:
            raise ValueError('robot_class cannot be None.')
        self._robot_class = robot_class
        if issubclass(self._robot_class, robot_base.RobotBase):
            self._use_new_robot_class = True
        else:
            self._use_new_robot_class = False
        self._robot = None

        self._scene = scene or scene_base.SceneBase()

        # TODO(sehoonha) change the data structure to dictionary
        self._env_sensors = list(sensors) if sensors is not None else list()

        # TODO(b/152161457): Make logging a standalone module.
        self._log_path = gym_config.log_path
        self._logging = minitaur_logging.MinitaurLogging(self._log_path)
        self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
        self._data_dir = gym_config.data_dir

        self._task = task

        self._env_randomizers = env_randomizers if env_randomizers else []

        # Simulation related parameters.
        self._num_action_repeat = gym_config.simulation_parameters.num_action_repeat
        self._on_rack = gym_config.simulation_parameters.robot_on_rack
        if self._num_action_repeat < 1:
            raise ValueError('number of action repeats should be at least 1.')
        # TODO(b/73829334): Fix the value of self._num_bullet_solver_iterations.
        self._num_bullet_solver_iterations = int(
            _NUM_SIMULATION_ITERATION_STEPS / self._num_action_repeat)

        self._sim_time_step = gym_config.simulation_parameters.sim_time_step_s
        # The sim step counter is an internal varialbe to count the number of
        # pybullet stepSimulation() has been called since last reset.
        self._sim_step_counter = 0

        self._env_time_step = self._num_action_repeat * self._sim_time_step
        # The env step counter accounts for how many times env.step has been
        # called since reset.
        self._env_step_counter = 0

        if clock == SIM_CLOCK:
            self._clock = self._get_sim_time
        else:
            self._clock = clock

        # Creates the bullet client.
        self._is_render = gym_config.simulation_parameters.enable_rendering
        # The wall-clock time at which the last frame is rendered.
        self._last_frame_time = 0.0

        if gym_config.simulation_parameters.enable_rendering:
            self._pybullet_client = bullet_client.BulletClient(
                connection_mode=pybullet.GUI)
            self._pybullet_client.configureDebugVisualizer(
                pybullet.COV_ENABLE_GUI,
                gym_config.simulation_parameters.enable_rendering_gui)
        else:
            self._pybullet_client = bullet_client.BulletClient()
        if gym_config.simulation_parameters.egl_rendering:
            self._pybullet_client.loadPlugin('eglRendererPlugin')

        self._pybullet_client.setAdditionalSearchPath(
            pybullet_data.getDataPath())

        # If enabled, save the performance profile to profiling_path
        # Use Google Chrome about://tracing to open the file
        if gym_config.profiling_path is not None:
            self._profiling_slot = self._pybullet_client.startStateLogging(
                self._pybullet_client.STATE_LOGGING_PROFILE_TIMINGS,
                gym_config.profiling_path)
            self._profiling_counter = 10
        else:
            self._profiling_slot = -1

        # Set the default render options. TODO(b/152161124): Make rendering a
        # standalone module.
        self._camera_target = gym_config.simulation_parameters.camera_target
        self._camera_dist = gym_config.simulation_parameters.camera_distance
        self._camera_yaw = gym_config.simulation_parameters.camera_yaw
        self._camera_pitch = gym_config.simulation_parameters.camera_pitch
        self._render_width = gym_config.simulation_parameters.render_width
        self._render_height = gym_config.simulation_parameters.render_height

        # Loads the environment and robot. Actions space will be created as well.
        self._hard_reset = True
        self._observation_dict = {}
        self.reset()
        self._hard_reset = gym_config.simulation_parameters.enable_hard_reset

        # Construct the observation space from the list of sensors.
        self.observation_space = (
            space_utils.convert_sensors_to_gym_space_dictionary([
                sensor for sensor in self.all_sensors() if sensor.get_name()
                not in self._gym_config.ignored_sensor_list
            ]))
예제 #6
0
    def reset(
        self,
        initial_motor_angles=None,
        reset_duration=1.0,
        reset_visualization_camera=True,
    ):
        """Resets the robot's position in the world or rebuild the sim world.

    The simulation world will be rebuilt if self._hard_reset is True.

    Args:
      initial_motor_angles: A list of Floats. The desired joint angles after
        reset. If None, the robot will use its built-in value.
      reset_duration: Float. The time (in seconds) needed to rotate all motors
        to the desired initial values.
      reset_visualization_camera: Whether to reset debug visualization camera on
        reset.

    Returns:
      A numpy array contains the initial observation after reset.
    """

        self._env_step_counter = 0
        self._sim_step_counter = 0
        self._last_reset_time = self._clock()
        self._metric_logger.reset_episode()

        # Clear the simulation world and rebuild the robot interface.
        if self._hard_reset:
            self._load()

        # Resets the scene
        self._scene.reset()

        # Resets the robot with the provided init parameters.
        if self._use_new_robot_class:
            self._robot.reset()
        else:
            self._robot.Reset(reload_urdf=False,
                              default_motor_angles=initial_motor_angles,
                              reset_time=reset_duration)

        # Flush the logs to disc and reinitialize the logging system.
        if self._log_path is not None:
            self._logging.save_episode(self._episode_proto)
            self._episode_proto = minitaur_logging_pb2.MinitaurEpisode()
            minitaur_logging.preallocate_episode_proto(self._episode_proto,
                                                       _LOG_BUFFER_LENGTH,
                                                       self._robot)

        # TODO(b/152161124): Move this part to the renderer module.
        if reset_visualization_camera:
            self._pybullet_client.resetDebugVisualizerCamera(
                self._camera_dist, self._camera_yaw, self._camera_pitch,
                [0, 0, 0])

        # Create an example last action based on the type of action space.
        self._last_action = space_utils.create_constant_action(
            self.action_space)

        for s in self.all_sensors():
            s.on_reset(self)

        if self._task and hasattr(self._task, 'reset'):
            self._task.reset(self)

        # Loop over all env randomizers.
        for env_randomizer in self._env_randomizers:
            env_randomizer.randomize_env(self)

        for obj in self._dynamic_objects():
            obj.reset()

        # Initialize the robot base position.
        if self._use_new_robot_class:
            self._last_base_position = self._robot.base_position
        else:
            self._last_base_position = self._robot.GetBasePosition()

        # Resets the observations again, since randomizers might change the env.
        for s in self.all_sensors():
            s.on_reset(self)

        self._last_reset_time = self._clock()
        return self._get_observation()