class AntBulletEnv(WalkerBaseBulletEnv):

  def __init__(self, render=False):
    self.robot = Ant()
    WalkerBaseBulletEnv.__init__(self, self.robot, render)
    print("ByBullet Ant: reward = progress + 0.01 + (torque_cost * -0.01) + (nJointLimit * -0.1)")

  def step(self, a):
    if not self.scene.multiplayer:  # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
      self.robot.apply_action(a)
      self.scene.global_step()

    oldpos0 = self.robot.body_xyz[0] 
    oldpos1 = self.robot.body_xyz[1] 
    state = self.robot.calc_state()  # also calculates self.joints_at_limit

    self._alive = float(
        self.robot.alive_bonus(
            state[0] + self.robot.initial_z,
            self.robot.body_rpy[1]))  # state[0] is body height above ground, body_rpy[1] is pitch
    done = self._isDone()
    if not np.isfinite(state).all():
      print("~INF~", state)
      done = True

    # potential_old = self.potential
    # self.potential = self.robot.calc_potential()
    # progress = float(self.potential - potential_old)

    xy_mov_angle = np.arctan2(self.robot.body_xyz[1] - oldpos1, self.robot.body_xyz[0] - oldpos0)
    self_mov_angle = xy_mov_angle - self.robot.yaw
    step_length = np.sqrt((self.robot.body_xyz[0] - oldpos0)**2 + (self.robot.body_xyz[1] - oldpos1)**2) / self.robot.scene.dt
    # this line remove for walking forward, use self_mov_angle-(np.pi/4) and self_mov_angle+(np.pi/4) 
    # to reward for walking 45 degree left or right
    progress = 0.0
    if self.robot.behavior1 == 5.0 and self.robot.behavior2 == 0.0:
      progress = step_length * np.cos(self_mov_angle-(np.pi/4))
    else:
      progress = step_length * np.cos(self_mov_angle+(np.pi/4))

    feet_collision_cost = 0.0
    for i, f in enumerate(
        self.robot.feet
    ):  # TODO: Maybe calculating feet contacts could be done within the robot code
      contact_ids = set((x[2], x[4]) for x in f.contact_list())
      #print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
      if (self.ground_ids & contact_ids):
        #see Issue 63: https://github.com/openai/roboschool/issues/63
        #feet_collision_cost += self.foot_collision_cost
        self.robot.feet_contact[i] = 1.0
      else:
        self.robot.feet_contact[i] = 0.0

    stall_cost = -0.01 * float(np.square(a).mean())
    joints_at_limit_cost = float(-0.1 * self.robot.joints_at_limit)
    #jspeedrew  = 1.0  * float(np.abs(self.robot.joint_speeds).mean()) 
 
    self.HUD(state, a, done)

    return state, progress + 0.01 + stall_cost + joints_at_limit_cost, bool(done), {"progress" : progress}
class AntBulletEnv(WalkerBaseBulletEnv):
    def __init__(self, render=False):
        self.robot = Ant()
        WalkerBaseBulletEnv.__init__(self, self.robot, render)
        print(
            "ByBullet Ant-v5: reward = progress + 0.01 + (torque_cost * -0.01) + (nJointLimit * -0.1)"
        )

    def step(self, a):
        if not self.scene.multiplayer:  # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
            self.robot.apply_action(a)
            self.scene.global_step()

        state = self.robot.calc_state()  # also calculates self.joints_at_limit

        self._alive = float(
            self.robot.alive_bonus(state[0] + self.robot.initial_z,
                                   self.robot.body_rpy[1])
        )  # state[0] is body height above ground, body_rpy[1] is pitch
        done = self._isDone()
        if not np.isfinite(state).all():
            print("~INF~", state)
            done = True

        potential_old = self.potential
        self.potential = self.robot.calc_potential()
        progress = float(self.potential - potential_old)

        feet_collision_cost = 0.0
        for i, f in enumerate(
                self.robot.feet
        ):  # TODO: Maybe calculating feet contacts could be done within the robot code
            contact_ids = set((x[2], x[4]) for x in f.contact_list())
            #print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
            if (self.ground_ids & contact_ids):
                #see Issue 63: https://github.com/openai/roboschool/issues/63
                #feet_collision_cost += self.foot_collision_cost
                self.robot.feet_contact[i] = 1.0
            else:
                self.robot.feet_contact[i] = 0.0

        stall_cost = -0.01 * float(np.square(a).mean())
        joints_at_limit_cost = float(-0.1 * self.robot.joints_at_limit)
        #jspeedrew  = 1.0  * float(np.abs(self.robot.joint_speeds).mean())

        self.HUD(state, a, done)

        return state, progress + 0.01 + stall_cost + joints_at_limit_cost, bool(
            done), {
                "progress": progress
            }
Beispiel #3
0
class CrippledAntBulletEnv(WalkerBaseBulletEnv):
    def __init__(self, render=False):
        self.robot = Ant()
        self.crippled_leg = None
        WalkerBaseBulletEnv.__init__(self, self.robot, render)

    def reset(self):
        if (self.stateId >= 0):
            self._p.restoreState(self.stateId)

        r = MJCFBaseBulletEnv.reset(self)
        self._p.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 0)

        self.parts, self.jdict, self.ordered_joints, self.robot_body = self.robot.addToScene(
            self._p, self.stadium_scene.ground_plane_mjcf)
        self.ground_ids = set([(self.parts[f].bodies[self.parts[f].bodyIndex],
                                self.parts[f].bodyPartIndex)
                               for f in self.foot_ground_object_names])
        self._p.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 1)
        if (self.stateId < 0):
            self.stateId = self._p.saveState()
            #print("saving state self.stateId:",self.stateId)

        cripple_prob = [0.4, 0.15, 0.15, 0.15, 0.15]  # None .40, 0-4 0.15 each
        self.crippled_leg = np.random.choice([None, 0, 1, 2, 3],
                                             p=cripple_prob)

        return r

    def step(self, a):
        if self.crippled_leg is not None:
            a[self.crippled_leg * 3:(self.crippled_leg + 1) * 3] = 0
            # color leg red

        self.robot.apply_action(a)
        self.scene.global_step()

        state = self.robot.calc_state()

        self._alive = float(
            self.robot.alive_bonus(state[0] + self.robot.initial_z,
                                   self.robot.body_rpy[1]))
        done = self._isDone()
        if not np.isfinite(state).all():
            print("~INF~", state)
            done = True

        potential_old = self.potential
        self.potential = self.robot.calc_potential()
        progress = float(self.potential - potential_old)

        feet_collision_cost = 0.0
        for i, f in enumerate(self.robot.feet):
            contact_ids = set((x[2], x[4]) for x in f.contact_list())
            if (self.ground_ids & contact_ids):
                self.robot.feet_contact[i] = 1.0
            else:
                self.robot.feet_contact[i] = 0.0

        electricity_cost = self.electricity_cost * \
            float(np.abs(a * self.robot.joint_speeds).mean())
        electricity_cost += self.stall_torque_cost * \
            float(np.square(a).mean())

        joints_at_limit_cost_T = float(self.joints_at_limit_cost *
                                       self.robot.joints_at_limit)
        joints_at_limit_cost = float(-0.1 * self.robot.joints_at_limit)

        self.rewards = [
            0.01, joints_at_limit_cost, progress
            #self._alive, progress, electricity_cost, joints_at_limit_cost, feet_collision_cost
        ]

        self.HUD(state, a, done)
        self.reward += sum(self.rewards)
        return state, sum(self.rewards), bool(done), {}