Beispiel #1
0
class Lander(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': FPS
    }

    continuous = False

    def __init__(self, initial_generator):
        self.seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.ship = None
        self.lander = None
        self.particles = []
        self.water = Water([0, 1], force_const_volume=True)

        self.prev_reward = None

        self.origin = (W / 2, H / 20)

        # dummy initial variables, will be set in update_initials
        self.initial_pos_x = self.origin[0]
        self.initial_pos_y = self.origin[1] + H / 2
        self.initial_vel = 0
        self.initial_dir = 0
        self.initial_vdir = 0
        self.ship_width = 100
        # non constant hyperparameters
        self.initial_generator = initial_generator
        self.difficulty = 0.1

        self.steps = 0

        high = np.array(
            [np.inf] * 8)  # useful range is -1 .. +1, but spikes can be higher
        self.observation_space = spaces.Box(-high, high)

        if self.continuous:
            # Action is two floats [main engine, left-right engines].
            # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
            # Left-right:  -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
            self.action_space = spaces.Box(-1, +1, (2, ))
        else:
            # Nop, fire left engine, main engine, right engine
            self.action_space = spaces.Discrete(4)

        self.reset()

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.ship: return
        self.world.contactListener = None
        self._clean_particles(True)
        self.world.DestroyBody(self.ship)
        self.ship = None
        self.world.DestroyBody(self.lander)
        self.lander = None
        self.world.DestroyBody(self.legs[0])
        self.world.DestroyBody(self.legs[1])

    def reset(self):
        self.update_initials()

        self._destroy()
        self.world.contactListener_keepref = ContactDetector(self)
        self.world.contactListener = self.world.contactListener_keepref
        self.game_over = False
        self.prev_shaping = None

        # ship
        self.ship = self.world.CreateStaticBody(
            position=self.origin,
            shapes=polygonShape(
                vertices=[(x / SCALE, y / SCALE)
                          for x, y in SHIP['vertices'](self.ship_width)]))

        self.ship.color1 = SHIP['color1']
        self.ship.color2 = SHIP['color2']

        # lander
        self.lander = self.world.CreateDynamicBody(
            position=(self.initial_pos_x, self.initial_pos_y),
            angle=self.initial_dir,
            fixtures=fixtureDef(
                shape=polygonShape(vertices=[(x / SCALE, y / SCALE)
                                             for x, y in ROCKET['vertices']]),
                density=5.0,
                friction=0.1,
                categoryBits=0x0010,
                maskBits=0x001,  # collide only with ground
                restitution=0.0)  # 0.99 bouncy
        )
        self.lander.color1 = ROCKET['body_color1']
        self.lander.color2 = ROCKET['body_color2']
        self.lander.ApplyForceToCenter(
            (self.initial_vel * math.sin(self.initial_dir),
             -self.initial_vel * math.cos(self.initial_dir)), True)
        self.lander.ApplyAngularImpulse(self.initial_vdir, True)

        self.legs = []
        for i in [-1, +1]:
            leg = self.world.CreateDynamicBody(
                position=(self.initial_pos_x - i * ROCKET['leg_away'] / SCALE,
                          self.initial_pos_y),
                angle=self.initial_dir + i * 0.05,
                fixtures=fixtureDef(
                    shape=polygonShape(box=(ROCKET['leg_w'] / SCALE,
                                            ROCKET['leg_h'] / SCALE)),
                    density=1.0,
                    restitution=0.0,
                    categoryBits=0x0020,
                    maskBits=0x001))
            leg.ground_contact = False
            leg.color1 = ROCKET['leg_color1']
            leg.color2 = ROCKET['leg_color2']
            rjd = revoluteJointDef(
                bodyA=self.lander,
                bodyB=leg,
                localAnchorA=(0, 0),
                localAnchorB=(i * ROCKET['leg_away'] / SCALE,
                              ROCKET['leg_down'] / SCALE),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=ROCKET['leg_spring_torque'],
                motorSpeed=+0.3 * i  # low enough not to jump back into the sky
            )
            if i == -1:
                rjd.lowerAngle = +0.9 - 0.5  # Yes, the most esoteric numbers here, angles legs have freedom to travel within
                rjd.upperAngle = +0.9
            else:
                rjd.lowerAngle = -0.9
                rjd.upperAngle = -0.9 + 0.5
            leg.joint = self.world.CreateJoint(rjd)
            self.legs.append(leg)

        self.drawlist = [self.lander, self.ship] + self.legs

        return self.step(np.array([0, 0]) if self.continuous else 0)[0]

    def _create_particle(self, mass, x, y, ttl):
        p = self.world.CreateDynamicBody(
            position=(x, y),
            angle=0.0,
            fixtures=fixtureDef(
                shape=circleShape(radius=5 / SCALE, pos=(0, 0)),
                density=mass,
                friction=0.1,
                categoryBits=0x0100,
                maskBits=0x001,  # collide only with ground
                restitution=0.3))
        p.ttl = ttl
        self.particles.append(p)
        self._clean_particles(False)
        return p

    def _clean_particles(self, all):
        while self.particles and (all or self.particles[0].ttl < 0):
            self.world.DestroyBody(self.particles.pop(0))

    def step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid " % (action, type(action))

        # Engines
        tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
        side = (-tip[1], tip[0])
        dispersion = [
            self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)
        ]

        m_power = 0.0
        if (self.continuous and action[0] > 0.0) or (not self.continuous
                                                     and action == 2):
            # Main engine
            if self.continuous:
                m_power = (np.clip(action[0], 0.0, 1.0) +
                           1.0) * 0.5  # 0.5..1.0
                assert m_power >= 0.5 and m_power <= 1.0
            else:
                m_power = 1.0
            ox = tip[0] * (
                4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[
                    1]  # 4 is move a bit downwards, +-2 for randomness
            oy = -tip[1] * (4 / SCALE +
                            2 * dispersion[0]) - side[1] * dispersion[1]
            impulse_pos = (self.lander.position[0] + ox,
                           self.lander.position[1] + oy)
            p = self._create_particle(
                0.7, impulse_pos[0], impulse_pos[1], m_power
            )  # particles are just a decoration, 3.5 is here to make particle speed adequate
            p.ApplyLinearImpulse((ox * ROCKET['main_engine_power'] * m_power,
                                  oy * ROCKET['main_engine_power'] * m_power),
                                 impulse_pos, True)
            self.lander.ApplyLinearImpulse(
                (-ox * ROCKET['main_engine_power'] * m_power,
                 -oy * ROCKET['main_engine_power'] * m_power), impulse_pos,
                True)

        s_power = 0.0
        if (self.continuous
                and np.abs(action[1]) > 0.5) or (not self.continuous
                                                 and action in [1, 3]):
            # Orientation engines
            if self.continuous:
                direction = np.sign(action[1])
                s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
                assert s_power >= 0.5 and s_power <= 1.0
            else:
                direction = action - 2
                s_power = 1.0
            ox = tip[0] * dispersion[0] + side[0] * (
                3 * dispersion[1] +
                direction * ROCKET['side_engine_away'] / SCALE)
            oy = -tip[1] * dispersion[0] - side[1] * (
                3 * dispersion[1] +
                direction * ROCKET['side_engine_away'] / SCALE)
            impulse_pos = (self.lander.position[0] + ox - tip[0] * 17 / SCALE,
                           self.lander.position[1] + oy +
                           tip[1] * ROCKET['side_engine_height'] / SCALE)
            p = self._create_particle(0.14, impulse_pos[0], impulse_pos[1],
                                      s_power)
            p.ApplyLinearImpulse((ox * ROCKET['side_engine_power'] * s_power,
                                  oy * ROCKET['side_engine_power'] * s_power),
                                 impulse_pos, True)
            self.lander.ApplyLinearImpulse(
                (-ox * ROCKET['side_engine_power'] * s_power,
                 -oy * ROCKET['side_engine_power'] * s_power), impulse_pos,
                True)

        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)

        pos = self.lander.position
        vel = self.lander.linearVelocity
        state = [
            (pos.x - W / 2) / (W / 2),
            (pos.y - (self.origin[1] + ROCKET['leg_down'] / SCALE)) / (W / 2),
            vel.x * (W / 2) / FPS, vel.y * (H / 2) / FPS,
            3 * self.lander.angle, 100.0 * self.lander.angularVelocity / FPS,
            1.0 if self.legs[0].ground_contact else 0.0,
            1.0 if self.legs[1].ground_contact else 0.0
        ]
        assert len(state) == 8

        reward = 0
        shaping = \
         - 10*np.sqrt(state[0]*state[0] + state[1]*state[1]) \
         - 10*np.sqrt(state[2]*state[2] + state[3]*state[3]) \
         - 30*abs(state[4]) + 1*state[6] + 1*state[7]   # And one point for legs contact, the idea is if you
        # lose contact again after landing, you get negative reward
        if self.prev_shaping is not None:
            reward = shaping - self.prev_shaping
        self.prev_shaping = shaping

        reward -= m_power * 0.3  # less fuel spent is better, about -30 for heuristic landing
        reward -= s_power * 0.03

        done = False
        success = False
        if self.game_over or abs(state[0]) >= 1.5 or state[1] <= -0.05:
            done = True
            reward = -100
        if not self.lander.awake:
            done = True
            reward = +500
            success = True

        self.steps += 1

        info = {
            'attempt_over': done,
            'attempt_succesful': success,
            'attempt_duration': self.steps
        }

        if done:
            state = self.reset()
            self.steps = 0

        return np.array(state), reward, done, info

    def render(self, mode='human'):
        from gym.envs.classic_control import rendering

        if self.viewer is None:
            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
            self.viewer.set_bounds(0, W, 0, H)

            def key_press(key, mod):
                if key == 0xff1b:  # Escape
                    self.close()  # close window
                    sys.exit()

            self.unwrapped.viewer.window.on_key_press = key_press

        # environment
        self.env_polys = []
        self.env_colors = []
        self.env_polys.append([(x * W, y * H) for x, y in ENV['sky_vertices']])
        self.env_colors.append(ENV['sky_color'])

        water_x, water_y = self.water.step()

        scale = 0.02
        water_poly = [(W, 0), (0, 0)] + list(
            zip(W * water_x, H *
                (ENV['water_level'] + scale * water_y))) + [(W, 0)]
        self.env_polys.append(water_poly)
        self.env_colors.append(ENV['water_color'])

        for i, p in enumerate(self.env_polys):
            self.viewer.draw_polygon(p, color=self.env_colors[i])

        # particles and lander
        for obj in self.particles:
            obj.ttl -= 0.15
            obj.color1 = (max(0.2, 0.2 + obj.ttl), max(0.2, 0.5 * obj.ttl),
                          max(0.2, 0.5 * obj.ttl))
            obj.color2 = (max(0.2, 0.2 + obj.ttl), max(0.2, 0.5 * obj.ttl),
                          max(0.2, 0.5 * obj.ttl))

        self._clean_particles(False)

        for obj in self.particles + self.drawlist:
            for f in obj.fixtures:
                trans = f.body.transform
                if type(f.shape) is circleShape:
                    t = rendering.Transform(translation=trans * f.shape.pos)
                    self.viewer.draw_circle(f.shape.radius,
                                            20,
                                            color=obj.color1).add_attr(t)
                    self.viewer.draw_circle(f.shape.radius,
                                            20,
                                            color=obj.color2,
                                            filled=False,
                                            linewidth=1).add_attr(t)
                else:
                    path = [trans * v for v in f.shape.vertices]
                    self.viewer.draw_polygon(path, color=obj.color1)
                    path.append(path[0])
                    self.viewer.draw_polyline(path,
                                              color=obj.color2,
                                              linewidth=1)

        return self.viewer.render(return_rgb_array=mode == 'rgb_array')

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def update_initials(self):
        if not self.initial_generator:
            return

        difficulties = self.initial_generator(self.difficulty)

        self.initial_pos_x = (difficulties['pos_x'] + 1) * W / 2
        self.initial_pos_y = self.origin[1] + difficulties['pos_y'] * (
            H - self.origin[1])
        self.initial_vel = difficulties['vel']
        self.initial_dir = difficulties['dir']
        self.initial_vdir = difficulties['vdir']

        self.ship_width = difficulties['ship_width']
Beispiel #2
0
class Scene :
	def __init__( self , fov , ratio , near , far  , skybox_img , duck_img ) :
		self.fov = fov
		self.far = far
		self.near = near 
		self.ratio = ratio

		self.last_time = timer()

		self.water = Water( 128 )
		self.box   = Skybox( skybox_img )
		self.duck  = Mesh( 'data/duck.gpt' , duck_img , 'shad/anisotropic' )
		self.path  = BSpline( (-1,1) , (-1,1) )

		self.light = np.array( (0,2,0) )

		self.water.drop_rnd()

	def gfx_init( self ) :
		self.camera = Camera( ( 0 , 5 ,  0 ) , ( 1 , 1 , 0 ) , ( 1 , 0 , 0 ) )
		self._update_proj()

		self.water.gfx_init()
		self.box  .gfx_init()
		self.duck .gfx_init()

	def draw( self ) :
		self.time = timer()
		dt = self.time - self.last_time

		glMatrixMode(GL_MODELVIEW)
		glLoadIdentity()
			   
		self.camera.look()

		self.box.draw()

		self.path.next( dt )
		self.water.drop( *((self.path.value+1.0)*self.water.n/2.0) ,
				force = np.linalg.norm(self.path.tangent)*25 )
		self.water.step( dt * .5 )
		self.water.draw( self.box.texture , self.camera.matrix )

		self.duck.draw( self.path.value , self.path.tangent , self.light )

		self.last_time = self.time

	def set_fov( self , fov ) :
		self.fov = fov
		self._update_proj()

	def set_near( self , near ) :
		self.near = near
		self._update_proj()

	def set_ratio( self , ratio ) :
		self.ratio = ratio
		self._update_proj()

	def set_screen_size( self , w , h ) :
		self.width  = w 
		self.height = h
		self.set_ratio( float(w)/float(h) )

	def set_fov( self , fov ) :
		self.fov = fov
		self._update_proj()

	def set_near( self , near ) :
		self.near = near
		self._update_proj()

	def set_ratio( self , ratio ) :
		self.ratio = ratio
		self._update_proj()

	def set_screen_size( self , w , h ) :
		self.width  = w
		self.height = h
		self.set_ratio( float(w)/float(h) )

	def mouse_move( self , df ) :
		self.camera.rot( *map( lambda x : -x*.2 , df ) )

	def key_pressed( self , mv ) :
		self.camera.move( *map( lambda x : x*.05 , mv ) )

	def _update_proj( self ) :                                         
		glMatrixMode(GL_PROJECTION)
		glLoadIdentity()
		gluPerspective( self.fov , self.ratio , self.near , self.far )
		glMatrixMode(GL_MODELVIEW)