class InvertedDoublePendulumBulletEnv(MujocoXmlBaseBulletEnv): def __init__(self): self.robot = InvertedDoublePendulum() MujocoXmlBaseBulletEnv.__init__(self, self.robot) def create_single_player_scene(self): return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1) def _step(self, a): self.robot.apply_action(a) self.scene.global_step() state = self.robot.calc_state() # sets self.pos_x self.pos_y # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9 # using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3 dist_penalty = 0.01 * self.robot.pos_x**2 + (self.robot.pos_y + 0.3 - 2)**2 # v1, v2 = self.model.data.qvel[1:3] TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040 #vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 vel_penalty = 0 alive_bonus = 10 done = self.robot.pos_y + 0.3 <= 1 self.rewards = [ float(alive_bonus), float(-dist_penalty), float(-vel_penalty) ] self.HUD(state, a, done) return state, sum(self.rewards), done, {} def camera_adjust(self): self.camera.move_and_look_at(0, 1.2, 1.2, 0, 0, 0.5)
class InvertedDoublePendulumBulletEnv(MJCFBaseBulletEnv): def __init__(self): self.robot = InvertedDoublePendulum() MJCFBaseBulletEnv.__init__(self, self.robot) self.stateId = -1 def create_single_player_scene(self, bullet_client): return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1) def _reset(self): if (self.stateId>=0): self._p.restoreState(self.stateId) r = MJCFBaseBulletEnv._reset(self) if (self.stateId<0): self.stateId = self._p.saveState() return r def _step(self, a): self.robot.apply_action(a) self.scene.global_step() state = self.robot.calc_state() # sets self.pos_x self.pos_y # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9 # using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3 dist_penalty = 0.01 * self.robot.pos_x ** 2 + (self.robot.pos_y + 0.3 - 2) ** 2 # v1, v2 = self.model.data.qvel[1:3] TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040 #vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 vel_penalty = 0 alive_bonus = 10 done = self.robot.pos_y + 0.3 <= 1 self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)] self.HUD(state, a, done) return state, sum(self.rewards), done, {} def camera_adjust(self): self.camera.move_and_look_at(0,1.2,1.2, 0,0,0.5)
def __init__(self): self.robot = InvertedDoublePendulum() MJCFBaseBulletEnv.__init__(self, self.robot) self.stateId = -1 initial_boundary_path = ROOT + "/initial_space/" + self.__class__.__name__ + "-v0/boundary.npy" boundary = np.load(initial_boundary_path) self.initial_space = spaces.Box(low=boundary[0], high=boundary[1])
def __init__(self): self.robot = InvertedDoublePendulum() MJCFBaseBulletEnv.__init__(self, self.robot) self.stateId = -1 self.torqueForce = 200 # In double pendulum the initial torqueForce is 200 self.gravity = 9.8 self.pole1Mass = 5 self.pole2Mass = 5 self.OGtorqueForce = 200 # In double pendulum the initial torqueForce is 200 self.OGgravity = 9.8 self.OGpole1Mass = 5 self.OGpole2Mass = 5
class InvertedDoublePendulumBulletEnv(MJCFBaseBulletEnv): def __init__(self): self.robot = InvertedDoublePendulum() MJCFBaseBulletEnv.__init__(self, self.robot) self.stateId = -1 initial_boundary_path = ROOT + "/initial_space/" + self.__class__.__name__ + "-v0/boundary.npy" boundary = np.load(initial_boundary_path) self.initial_space = spaces.Box(low=boundary[0], high=boundary[1]) def create_single_player_scene(self, bullet_client): return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1) def reset(self, **kwargs): if (self.stateId >= 0): self._p.restoreState(self.stateId) r = MJCFBaseBulletEnv.reset(self, **kwargs) if (self.stateId < 0): self.stateId = self._p.saveState() return r def step(self, a): self.robot.apply_action(a) self.scene.global_step() state = self.robot.calc_state() # sets self.pos_x self.pos_y # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9 # using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3 dist_penalty = 0.01 * self.robot.pos_x**2 + (self.robot.pos_y + 0.3 - 2)**2 # v1, v2 = self.model.data.qvel[1:3] TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040 #vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 vel_penalty = 0 alive_bonus = 10 done = self.robot.pos_y + 0.3 <= 1 self.rewards = [ float(alive_bonus), float(-dist_penalty), float(-vel_penalty) ] self.HUD(state, a, done) return state, sum(self.rewards) / 10000, done, {} def camera_adjust(self): self.camera.move_and_look_at(0, 1.2, 1.2, 0, 0, 0.5)
def __init__(self): self.robot = InvertedDoublePendulum() MJCFBaseBulletEnv.__init__(self, self.robot) self.stateId = -1
def __init__(self): self.robot = InvertedDoublePendulum() MujocoXmlBaseBulletEnv.__init__(self, self.robot)
class InvertedDoublePendulumBulletEnv(MJCFBaseBulletEnv): def __init__(self): self.robot = InvertedDoublePendulum() MJCFBaseBulletEnv.__init__(self, self.robot) self.stateId = -1 self.torqueForce = 200 # In double pendulum the initial torqueForce is 200 self.gravity = 9.8 self.pole1Mass = 5 self.pole2Mass = 5 self.OGtorqueForce = 200 # In double pendulum the initial torqueForce is 200 self.OGgravity = 9.8 self.OGpole1Mass = 5 self.OGpole2Mass = 5 def get_features(self): return self.torqueForce, self.gravity, self.pole1Mass, self.pole2Mass def reset_features(self): self.torqueForce = self.OGtorqueForce self.gravity = self.OGgravity self.pole1Mass = self.OGpole1Mass self.pole2Mass = self.OGpole2Mass def set_features(self, features=[200, 9.8, 5, 5]): self.reset() torqueForce = features[0] gravity = features[1] pole1Mass = features[2] pole2Mass = features[3] self.torqueForce = torqueForce self.gravity = gravity self.pole1Mass = pole1Mass self.pole2Mass = pole2Mass self._p.setGravity(0, 0, -self.gravity) bodyIndex = self.robot.parts["pole"].bodyIndex bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex] partIndex = self.robot.parts["pole"].bodyPartIndex self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole1Mass) bodyIndex = self.robot.parts["pole2"].bodyIndex bodyUniqueId = self.robot.parts["pole2"].bodies[bodyIndex] partIndex = self.robot.parts["pole2"].bodyPartIndex self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole2Mass) def randomize(self, level=0): self.reset() low_window = 0.95 high_window = 1.05 lower_window = 0.9 higher_window = 1.15 np.random.seed() if (level == 0): return if (level == 1): self.torqueForce = self.np_random.uniform( low=self.OGtorqueForce * low_window, high=self.OGtorqueForce * high_window) self.gravity = self.np_random.uniform( low=self.OGgravity * low_window, high=self.OGgravity * high_window) self.pole1Mass = self.np_random.uniform( low=self.OGpole1Mass * low_window, high=self.OGpole1Mass * high_window) self.pole2Mass = self.np_random.uniform( low=self.OGpole2Mass * low_window, high=self.OGpole2Mass * high_window) elif (level == 2): if (self.np_random.uniform(low=0, high=1.0) > 0.5): self.torqueForce = self.np_random.uniform( low=self.OGtorqueForce * lower_window, high=self.OGtorqueForce * low_window) else: self.torqueForce = self.np_random.uniform( low=self.OGtorqueForce * high_window, high=self.OGtorqueForce * higher_window) if (self.np_random.uniform(low=0, high=1.0) > 0.5): self.gravity = self.np_random.uniform( low=self.OGgravity * lower_window, high=self.OGgravity * low_window) else: self.gravity = self.np_random.uniform( low=self.OGgravity * high_window, high=self.OGgravity * higher_window) if (self.np_random.uniform(low=0, high=1.0) > 0.5): self.pole1Mass = self.np_random.uniform( low=self.OGpole1Mass * lower_window, high=self.OGpole1Mass * low_window) else: self.pole1Mass = self.np_random.uniform( low=self.OGpole1Mass * high_window, high=self.OGpole1Mass * higher_window) if (self.np_random.uniform(low=0, high=1.0) > 0.5): self.pole2Mass = self.np_random.uniform( low=self.OGpole2Mass * lower_window, high=self.OGpole2Mass * low_window) else: self.pole2Mass = self.np_random.uniform( low=self.OGpole2Mass * high_window, high=self.OGpole2Mass * higher_window) else: print( "ERROR: Invalid randomization mode selected, only 0, 1, or 2 are available" ) #print("randomizing, torque: ", self.torqueForce , " gravity ", self.gravity) self._p.setGravity(0, 0, -self.gravity) bodyIndex = self.robot.parts["pole"].bodyIndex bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex] partIndex = self.robot.parts["pole"].bodyPartIndex self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole1Mass) bodyIndex = self.robot.parts["pole2"].bodyIndex bodyUniqueId = self.robot.parts["pole2"].bodies[bodyIndex] partIndex = self.robot.parts["pole2"].bodyPartIndex self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole2Mass) def create_single_player_scene(self, bullet_client): return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1) def reset(self): if (self.stateId >= 0): self._p.restoreState(self.stateId) r = MJCFBaseBulletEnv.reset(self) if (self.stateId < 0): self.stateId = self._p.saveState() self._p.setGravity(0, 0, -self.gravity) bodyIndex = self.robot.parts["pole"].bodyIndex bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex] partIndex = self.robot.parts["pole"].bodyPartIndex self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole1Mass) bodyIndex = self.robot.parts["pole2"].bodyIndex bodyUniqueId = self.robot.parts["pole2"].bodies[bodyIndex] partIndex = self.robot.parts["pole2"].bodyPartIndex self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole2Mass) return r def apply_action(self, a): assert (np.isfinite(a).all()) self.robot.slider.set_motor_torque(self.torqueForce * float(np.clip(a[0], -1, +1))) def step(self, a): #print("GLOBAL: torqueForce: ", self.torqueForce, " gravity: ", self.gravity, " pole1Mass: ", self.pole1Mass, " pole2Mass: ", self.pole2Mass) #self.robot.apply_action(a) self.apply_action(a) self.scene.global_step() state = self.robot.calc_state() # sets self.pos_x self.pos_y # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9 # using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3 dist_penalty = 0.01 * self.robot.pos_x**2 + (self.robot.pos_y + 0.3 - 2)**2 # v1, v2 = self.model.data.qvel[1:3] TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040 #vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 vel_penalty = 0 alive_bonus = 10 done = self.robot.pos_y + 0.3 <= 1 self.rewards = [ float(alive_bonus), float(-dist_penalty), float(-vel_penalty) ] self.HUD(state, a, done) return state, sum(self.rewards), done, {} def camera_adjust(self): self.camera.move_and_look_at(0, 1.2, 1.2, 0, 0, 0.5)