Beispiel #1
0
class InvertedDoublePendulumBulletEnv(MJCFBaseBulletEnv):
	def __init__(self):
		self.robot = InvertedDoublePendulum()
		MJCFBaseBulletEnv.__init__(self, self.robot)
		self.stateId = -1
	def create_single_player_scene(self, bullet_client):
		return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1)

	def _reset(self):
		if (self.stateId>=0):
			self._p.restoreState(self.stateId)
		r = MJCFBaseBulletEnv._reset(self)
		if (self.stateId<0):
			self.stateId = self._p.saveState()
		return r
	
	def _step(self, a):
		self.robot.apply_action(a)
		self.scene.global_step()
		state = self.robot.calc_state()  # sets self.pos_x self.pos_y
		# upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9
		# using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3
		dist_penalty = 0.01 * self.robot.pos_x ** 2 + (self.robot.pos_y + 0.3 - 2) ** 2
		# v1, v2 = self.model.data.qvel[1:3]   TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040
		#vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
		vel_penalty = 0
		alive_bonus = 10
		done = self.robot.pos_y + 0.3 <= 1
		self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)]
		self.HUD(state, a, done)
		return state, sum(self.rewards), done, {}

	def camera_adjust(self):
		self.camera.move_and_look_at(0,1.2,1.2, 0,0,0.5)
Beispiel #2
0
class InvertedDoublePendulumBulletEnv(MujocoXmlBaseBulletEnv):
    def __init__(self):
        self.robot = InvertedDoublePendulum()
        MujocoXmlBaseBulletEnv.__init__(self, self.robot)

    def create_single_player_scene(self):
        return SingleRobotEmptyScene(gravity=9.8,
                                     timestep=0.0165,
                                     frame_skip=1)

    def _step(self, a):
        self.robot.apply_action(a)
        self.scene.global_step()
        state = self.robot.calc_state()  # sets self.pos_x self.pos_y
        # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9
        # using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3
        dist_penalty = 0.01 * self.robot.pos_x**2 + (self.robot.pos_y + 0.3 -
                                                     2)**2
        # v1, v2 = self.model.data.qvel[1:3]   TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040
        #vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
        vel_penalty = 0
        alive_bonus = 10
        done = self.robot.pos_y + 0.3 <= 1
        self.rewards = [
            float(alive_bonus),
            float(-dist_penalty),
            float(-vel_penalty)
        ]
        self.HUD(state, a, done)
        return state, sum(self.rewards), done, {}

    def camera_adjust(self):
        self.camera.move_and_look_at(0, 1.2, 1.2, 0, 0, 0.5)
Beispiel #3
0
class InvertedDoublePendulumBulletEnv(MJCFBaseBulletEnv):
    def __init__(self):
        self.robot = InvertedDoublePendulum()
        MJCFBaseBulletEnv.__init__(self, self.robot)
        self.stateId = -1

        initial_boundary_path = ROOT + "/initial_space/" + self.__class__.__name__ + "-v0/boundary.npy"
        boundary = np.load(initial_boundary_path)
        self.initial_space = spaces.Box(low=boundary[0], high=boundary[1])

    def create_single_player_scene(self, bullet_client):
        return SingleRobotEmptyScene(bullet_client,
                                     gravity=9.8,
                                     timestep=0.0165,
                                     frame_skip=1)

    def reset(self, **kwargs):
        if (self.stateId >= 0):
            self._p.restoreState(self.stateId)
        r = MJCFBaseBulletEnv.reset(self, **kwargs)

        if (self.stateId < 0):
            self.stateId = self._p.saveState()
        return r

    def step(self, a):
        self.robot.apply_action(a)
        self.scene.global_step()
        state = self.robot.calc_state()  # sets self.pos_x self.pos_y
        # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9
        # using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3
        dist_penalty = 0.01 * self.robot.pos_x**2 + (self.robot.pos_y + 0.3 -
                                                     2)**2
        # v1, v2 = self.model.data.qvel[1:3]   TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040
        #vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
        vel_penalty = 0
        alive_bonus = 10
        done = self.robot.pos_y + 0.3 <= 1
        self.rewards = [
            float(alive_bonus),
            float(-dist_penalty),
            float(-vel_penalty)
        ]
        self.HUD(state, a, done)

        return state, sum(self.rewards) / 10000, done, {}

    def camera_adjust(self):
        self.camera.move_and_look_at(0, 1.2, 1.2, 0, 0, 0.5)
Beispiel #4
0
class InvertedDoublePendulumBulletEnv(MJCFBaseBulletEnv):
    def __init__(self):
        self.robot = InvertedDoublePendulum()
        MJCFBaseBulletEnv.__init__(self, self.robot)
        self.stateId = -1

        self.torqueForce = 200  # In double pendulum the initial torqueForce is 200
        self.gravity = 9.8
        self.pole1Mass = 5
        self.pole2Mass = 5

        self.OGtorqueForce = 200  # In double pendulum the initial torqueForce is 200
        self.OGgravity = 9.8
        self.OGpole1Mass = 5
        self.OGpole2Mass = 5

    def get_features(self):
        return self.torqueForce, self.gravity, self.pole1Mass, self.pole2Mass

    def reset_features(self):
        self.torqueForce = self.OGtorqueForce
        self.gravity = self.OGgravity
        self.pole1Mass = self.OGpole1Mass
        self.pole2Mass = self.OGpole2Mass

    def set_features(self, features=[200, 9.8, 5, 5]):
        self.reset()

        torqueForce = features[0]
        gravity = features[1]
        pole1Mass = features[2]
        pole2Mass = features[3]

        self.torqueForce = torqueForce
        self.gravity = gravity
        self.pole1Mass = pole1Mass
        self.pole2Mass = pole2Mass

        self._p.setGravity(0, 0, -self.gravity)

        bodyIndex = self.robot.parts["pole"].bodyIndex
        bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex]
        partIndex = self.robot.parts["pole"].bodyPartIndex
        self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole1Mass)

        bodyIndex = self.robot.parts["pole2"].bodyIndex
        bodyUniqueId = self.robot.parts["pole2"].bodies[bodyIndex]
        partIndex = self.robot.parts["pole2"].bodyPartIndex
        self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole2Mass)

    def randomize(self, level=0):
        self.reset()

        low_window = 0.95
        high_window = 1.05
        lower_window = 0.9
        higher_window = 1.15

        np.random.seed()

        if (level == 0):
            return
        if (level == 1):
            self.torqueForce = self.np_random.uniform(
                low=self.OGtorqueForce * low_window,
                high=self.OGtorqueForce * high_window)
            self.gravity = self.np_random.uniform(
                low=self.OGgravity * low_window,
                high=self.OGgravity * high_window)
            self.pole1Mass = self.np_random.uniform(
                low=self.OGpole1Mass * low_window,
                high=self.OGpole1Mass * high_window)
            self.pole2Mass = self.np_random.uniform(
                low=self.OGpole2Mass * low_window,
                high=self.OGpole2Mass * high_window)
        elif (level == 2):
            if (self.np_random.uniform(low=0, high=1.0) > 0.5):
                self.torqueForce = self.np_random.uniform(
                    low=self.OGtorqueForce * lower_window,
                    high=self.OGtorqueForce * low_window)
            else:
                self.torqueForce = self.np_random.uniform(
                    low=self.OGtorqueForce * high_window,
                    high=self.OGtorqueForce * higher_window)

            if (self.np_random.uniform(low=0, high=1.0) > 0.5):
                self.gravity = self.np_random.uniform(
                    low=self.OGgravity * lower_window,
                    high=self.OGgravity * low_window)
            else:
                self.gravity = self.np_random.uniform(
                    low=self.OGgravity * high_window,
                    high=self.OGgravity * higher_window)

            if (self.np_random.uniform(low=0, high=1.0) > 0.5):
                self.pole1Mass = self.np_random.uniform(
                    low=self.OGpole1Mass * lower_window,
                    high=self.OGpole1Mass * low_window)
            else:
                self.pole1Mass = self.np_random.uniform(
                    low=self.OGpole1Mass * high_window,
                    high=self.OGpole1Mass * higher_window)

            if (self.np_random.uniform(low=0, high=1.0) > 0.5):
                self.pole2Mass = self.np_random.uniform(
                    low=self.OGpole2Mass * lower_window,
                    high=self.OGpole2Mass * low_window)
            else:
                self.pole2Mass = self.np_random.uniform(
                    low=self.OGpole2Mass * high_window,
                    high=self.OGpole2Mass * higher_window)
        else:
            print(
                "ERROR: Invalid randomization mode selected, only 0, 1, or 2 are available"
            )
        #print("randomizing, torque: ", self.torqueForce  , " gravity ", self.gravity)

        self._p.setGravity(0, 0, -self.gravity)

        bodyIndex = self.robot.parts["pole"].bodyIndex
        bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex]
        partIndex = self.robot.parts["pole"].bodyPartIndex

        self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole1Mass)

        bodyIndex = self.robot.parts["pole2"].bodyIndex
        bodyUniqueId = self.robot.parts["pole2"].bodies[bodyIndex]
        partIndex = self.robot.parts["pole2"].bodyPartIndex

        self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole2Mass)

    def create_single_player_scene(self, bullet_client):
        return SingleRobotEmptyScene(bullet_client,
                                     gravity=9.8,
                                     timestep=0.0165,
                                     frame_skip=1)

    def reset(self):

        if (self.stateId >= 0):
            self._p.restoreState(self.stateId)
        r = MJCFBaseBulletEnv.reset(self)
        if (self.stateId < 0):
            self.stateId = self._p.saveState()

        self._p.setGravity(0, 0, -self.gravity)

        bodyIndex = self.robot.parts["pole"].bodyIndex
        bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex]
        partIndex = self.robot.parts["pole"].bodyPartIndex
        self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole1Mass)

        bodyIndex = self.robot.parts["pole2"].bodyIndex
        bodyUniqueId = self.robot.parts["pole2"].bodies[bodyIndex]
        partIndex = self.robot.parts["pole2"].bodyPartIndex
        self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.pole2Mass)

        return r

    def apply_action(self, a):
        assert (np.isfinite(a).all())
        self.robot.slider.set_motor_torque(self.torqueForce *
                                           float(np.clip(a[0], -1, +1)))

    def step(self, a):
        #print("GLOBAL: torqueForce: ", self.torqueForce, " gravity: ", self.gravity, " pole1Mass: ", self.pole1Mass, " pole2Mass: ", self.pole2Mass)
        #self.robot.apply_action(a)
        self.apply_action(a)
        self.scene.global_step()
        state = self.robot.calc_state()  # sets self.pos_x self.pos_y
        # upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9
        # using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3
        dist_penalty = 0.01 * self.robot.pos_x**2 + (self.robot.pos_y + 0.3 -
                                                     2)**2
        # v1, v2 = self.model.data.qvel[1:3] TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040
        #vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
        vel_penalty = 0
        alive_bonus = 10
        done = self.robot.pos_y + 0.3 <= 1
        self.rewards = [
            float(alive_bonus),
            float(-dist_penalty),
            float(-vel_penalty)
        ]
        self.HUD(state, a, done)
        return state, sum(self.rewards), done, {}

    def camera_adjust(self):
        self.camera.move_and_look_at(0, 1.2, 1.2, 0, 0, 0.5)