Ejemplo n.º 1
0
class HalfCheetahBulletEnv(WalkerBaseBulletEnv):
    def __init__(self, render=False):
        self.robot = HalfCheetah()
        WalkerBaseBulletEnv.__init__(self, self.robot, render)
        print(
            "PyBullet Halfcheetah: reward = progress + (njoint_at_limit * -0.1), terminate also when z < 0.3"
        )

    def _isDone(self):
        return False

    def step(self, a):
        if not self.scene.multiplayer:  # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
            self.robot.apply_action(a)
            self.scene.global_step()

        state = self.robot.calc_state()  # also calculates self.joints_at_limit

        self._alive = float(
            self.robot.alive_bonus(state[0] + self.robot.initial_z,
                                   self.robot.body_rpy[1])
        )  # state[0] is body height above ground, body_rpy[1] is pitch
        done = self._isDone()
        if not np.isfinite(state).all():
            print("~INF~", state)
            done = True

        potential_old = self.potential
        self.potential = self.robot.calc_potential()
        progress = float(self.potential - potential_old)

        feet_collision_cost = 0.0
        for i, f in enumerate(
                self.robot.feet
        ):  # TODO: Maybe calculating feet contacts could be done within the robot code
            contact_ids = set((x[2], x[4]) for x in f.contact_list())
            #print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
            if (self.ground_ids & contact_ids):
                #see Issue 63: https://github.com/openai/roboschool/issues/63
                #feet_collision_cost += self.foot_collision_cost
                self.robot.feet_contact[i] = 1.0
            else:
                self.robot.feet_contact[i] = 0.0

        joints_at_limit_cost = float(self.joints_at_limit_cost *
                                     self.robot.joints_at_limit)

        self.HUD(state, a, done)

        if (self._alive < 0):
            return state, progress + joints_at_limit_cost, True, {
                "progress": progress
            }
        else:
            return state, progress + joints_at_limit_cost, False, {
                "progress": progress
            }
Ejemplo n.º 2
0
class CrippledHalfCheetahBulletEnv(WalkerBaseBulletEnv):
    def __init__(self, render=False):
        self.robot = HalfCheetah()
        WalkerBaseBulletEnv.__init__(self, self.robot, render)

        cripple_prob = [0.5, 1 / 12, 1 / 12, 1 / 12, 1 / 12, 1 / 12,
                        1 / 12]  # None .40, 0-4 0.15 each
        self.crippled_joint = np.random.choice([None, 0, 1, 2, 3, 4, 5],
                                               p=cripple_prob)
        self.gates = [
            np.where(np.random.uniform(0, 1, size=(48, )) >= 0.2, 1.0, 0.0)
            for _ in range(7)
        ]

    def reset(self):
        if (self.stateId >= 0):
            self._p.restoreState(self.stateId)

        r = MJCFBaseBulletEnv.reset(self)
        self._p.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 0)

        self.parts, self.jdict, self.ordered_joints, self.robot_body = self.robot.addToScene(
            self._p, self.stadium_scene.ground_plane_mjcf)
        self.ground_ids = set([(self.parts[f].bodies[self.parts[f].bodyIndex],
                                self.parts[f].bodyPartIndex)
                               for f in self.foot_ground_object_names])
        self._p.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 1)
        if (self.stateId < 0):
            self.stateId = self._p.saveState()

        cripple_prob = [0.5, 1 / 12, 1 / 12, 1 / 12, 1 / 12, 1 / 12,
                        1 / 12]  # None .40, 0-4 0.15 each
        self.crippled_joint = np.random.choice([None, 0, 1, 2, 3, 4, 5],
                                               p=cripple_prob)
        self.gate = self.gates[self.crippled_joint +
                               1 if self.crippled_joint is not None else 0]

        return r

    def _isDone(self):
        return self._alive < 0

    electricity_cost = -2.0
    stall_torque_cost = -0.1
    foot_collision_cost = -1.0
    joints_at_limit_cost = -0.1
    foot_ground_object_names = set(["floor"])

    def step(self, a):
        if self.crippled_joint is not None:
            a[self.crippled_joint] = 0

        self.robot.apply_action(a)
        self.scene.global_step()

        state = self.robot.calc_state()

        self._alive = float(
            self.robot.alive_bonus(state[0] + self.robot.initial_z,
                                   self.robot.body_rpy[1]))
        done = self._isDone()
        if not np.isfinite(state).all():
            print("~INF~", state)
            done = True

        potential_old = self.potential
        self.potential = self.robot.calc_potential()
        progress = float(self.potential - potential_old)

        feet_collision_cost = 0.0
        for i, f in enumerate(self.robot.feet):
            contact_ids = set((x[2], x[4]) for x in f.contact_list())
            if (self.ground_ids & contact_ids):
                self.robot.feet_contact[i] = 1.0
            else:
                self.robot.feet_contact[i] = 0.0

        electricity_cost = self.electricity_cost * \
            float(np.abs(a * self.robot.joint_speeds).mean())
        electricity_cost += self.stall_torque_cost * \
            float(np.square(a).mean())

        joints_at_limit_cost_T = float(self.joints_at_limit_cost *
                                       self.robot.joints_at_limit)
        joints_at_limit_cost = float(-0.1 * self.robot.joints_at_limit)

        self.rewards = [joints_at_limit_cost, progress]

        self.HUD(state, a, done)
        self.reward += sum(self.rewards)
        return state, sum(self.rewards), bool(done), {}