class AntBulletEnv(WalkerBaseBulletEnv): def __init__(self, render=False): self.robot = Ant() WalkerBaseBulletEnv.__init__(self, self.robot, render) print("ByBullet Ant: reward = progress + 0.01 + (torque_cost * -0.01) + (nJointLimit * -0.1)") def step(self, a): if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions self.robot.apply_action(a) self.scene.global_step() oldpos0 = self.robot.body_xyz[0] oldpos1 = self.robot.body_xyz[1] state = self.robot.calc_state() # also calculates self.joints_at_limit self._alive = float( self.robot.alive_bonus( state[0] + self.robot.initial_z, self.robot.body_rpy[1])) # state[0] is body height above ground, body_rpy[1] is pitch done = self._isDone() if not np.isfinite(state).all(): print("~INF~", state) done = True # potential_old = self.potential # self.potential = self.robot.calc_potential() # progress = float(self.potential - potential_old) xy_mov_angle = np.arctan2(self.robot.body_xyz[1] - oldpos1, self.robot.body_xyz[0] - oldpos0) self_mov_angle = xy_mov_angle - self.robot.yaw step_length = np.sqrt((self.robot.body_xyz[0] - oldpos0)**2 + (self.robot.body_xyz[1] - oldpos1)**2) / self.robot.scene.dt # this line remove for walking forward, use self_mov_angle-(np.pi/4) and self_mov_angle+(np.pi/4) # to reward for walking 45 degree left or right progress = 0.0 if self.robot.behavior1 == 5.0 and self.robot.behavior2 == 0.0: progress = step_length * np.cos(self_mov_angle-(np.pi/4)) else: progress = step_length * np.cos(self_mov_angle+(np.pi/4)) feet_collision_cost = 0.0 for i, f in enumerate( self.robot.feet ): # TODO: Maybe calculating feet contacts could be done within the robot code contact_ids = set((x[2], x[4]) for x in f.contact_list()) #print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) ) if (self.ground_ids & contact_ids): #see Issue 63: https://github.com/openai/roboschool/issues/63 #feet_collision_cost += self.foot_collision_cost self.robot.feet_contact[i] = 1.0 else: self.robot.feet_contact[i] = 0.0 stall_cost = -0.01 * float(np.square(a).mean()) joints_at_limit_cost = float(-0.1 * self.robot.joints_at_limit) #jspeedrew = 1.0 * float(np.abs(self.robot.joint_speeds).mean()) self.HUD(state, a, done) return state, progress + 0.01 + stall_cost + joints_at_limit_cost, bool(done), {"progress" : progress}
class AntBulletEnv(WalkerBaseBulletEnv): def __init__(self, render=False): self.robot = Ant() WalkerBaseBulletEnv.__init__(self, self.robot, render) print( "ByBullet Ant-v5: reward = progress + 0.01 + (torque_cost * -0.01) + (nJointLimit * -0.1)" ) def step(self, a): if not self.scene.multiplayer: # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions self.robot.apply_action(a) self.scene.global_step() state = self.robot.calc_state() # also calculates self.joints_at_limit self._alive = float( self.robot.alive_bonus(state[0] + self.robot.initial_z, self.robot.body_rpy[1]) ) # state[0] is body height above ground, body_rpy[1] is pitch done = self._isDone() if not np.isfinite(state).all(): print("~INF~", state) done = True potential_old = self.potential self.potential = self.robot.calc_potential() progress = float(self.potential - potential_old) feet_collision_cost = 0.0 for i, f in enumerate( self.robot.feet ): # TODO: Maybe calculating feet contacts could be done within the robot code contact_ids = set((x[2], x[4]) for x in f.contact_list()) #print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) ) if (self.ground_ids & contact_ids): #see Issue 63: https://github.com/openai/roboschool/issues/63 #feet_collision_cost += self.foot_collision_cost self.robot.feet_contact[i] = 1.0 else: self.robot.feet_contact[i] = 0.0 stall_cost = -0.01 * float(np.square(a).mean()) joints_at_limit_cost = float(-0.1 * self.robot.joints_at_limit) #jspeedrew = 1.0 * float(np.abs(self.robot.joint_speeds).mean()) self.HUD(state, a, done) return state, progress + 0.01 + stall_cost + joints_at_limit_cost, bool( done), { "progress": progress }
class CrippledAntBulletEnv(WalkerBaseBulletEnv): def __init__(self, render=False): self.robot = Ant() self.crippled_leg = None WalkerBaseBulletEnv.__init__(self, self.robot, render) def reset(self): if (self.stateId >= 0): self._p.restoreState(self.stateId) r = MJCFBaseBulletEnv.reset(self) self._p.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 0) self.parts, self.jdict, self.ordered_joints, self.robot_body = self.robot.addToScene( self._p, self.stadium_scene.ground_plane_mjcf) self.ground_ids = set([(self.parts[f].bodies[self.parts[f].bodyIndex], self.parts[f].bodyPartIndex) for f in self.foot_ground_object_names]) self._p.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 1) if (self.stateId < 0): self.stateId = self._p.saveState() #print("saving state self.stateId:",self.stateId) cripple_prob = [0.4, 0.15, 0.15, 0.15, 0.15] # None .40, 0-4 0.15 each self.crippled_leg = np.random.choice([None, 0, 1, 2, 3], p=cripple_prob) return r def step(self, a): if self.crippled_leg is not None: a[self.crippled_leg * 3:(self.crippled_leg + 1) * 3] = 0 # color leg red self.robot.apply_action(a) self.scene.global_step() state = self.robot.calc_state() self._alive = float( self.robot.alive_bonus(state[0] + self.robot.initial_z, self.robot.body_rpy[1])) done = self._isDone() if not np.isfinite(state).all(): print("~INF~", state) done = True potential_old = self.potential self.potential = self.robot.calc_potential() progress = float(self.potential - potential_old) feet_collision_cost = 0.0 for i, f in enumerate(self.robot.feet): contact_ids = set((x[2], x[4]) for x in f.contact_list()) if (self.ground_ids & contact_ids): self.robot.feet_contact[i] = 1.0 else: self.robot.feet_contact[i] = 0.0 electricity_cost = self.electricity_cost * \ float(np.abs(a * self.robot.joint_speeds).mean()) electricity_cost += self.stall_torque_cost * \ float(np.square(a).mean()) joints_at_limit_cost_T = float(self.joints_at_limit_cost * self.robot.joints_at_limit) joints_at_limit_cost = float(-0.1 * self.robot.joints_at_limit) self.rewards = [ 0.01, joints_at_limit_cost, progress #self._alive, progress, electricity_cost, joints_at_limit_cost, feet_collision_cost ] self.HUD(state, a, done) self.reward += sum(self.rewards) return state, sum(self.rewards), bool(done), {}