def reset(self): self.prev_shaping = None # Create cusom dynamics model self.dynamics = DJIPhantomDynamics(self.FRAMES_PER_SECOND) # Initialize custom dynamics state = np.zeros(12) self.dynamics.setState(state) return self.step(np.array([0, 0, 0, 0]))[0]
def reset(self): self.prev_shaping = None # Create cusom dynamics model self.dynamics = DJIPhantomDynamics(self.FRAMES_PER_SECOND) # Initialize custom dynamics with random perturbations state = np.zeros(12) d = self.dynamics state[d.STATE_X] = self.INITIAL_RANDOM_OFFSET * np.random.randn() state[d.STATE_Y] = self.INITIAL_RANDOM_OFFSET * np.random.randn() state[d.STATE_Z] = -self.INITIAL_ALTITUDE self.dynamics.setState(state) return self.step(np.array([0, 0, 0, 0]))[0]
def reset(self): self._destroy() self.prev_shaping = None # Create cusom dynamics model self.dynamics = DJIPhantomDynamics(self.FRAMES_PER_SECOND) # Initialize custom dynamics with random perturbation state = np.zeros(12) d = self.dynamics state[d.STATE_Y] = 0 state[d.STATE_Z] = -self.INITIAL_ALTITUDE self.dynamics.setState(state) self.dynamics.perturb( np.array([0, self._perturb(), self._perturb(), 0, 0, 0])) return self.step(np.array([0, 0]))[0]
class Distance(gym.Env, EzPickle): FRAMES_PER_SECOND = 50 metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': FRAMES_PER_SECOND } def __init__(self): EzPickle.__init__(self) self.seed() # Observation is all state values except yaw and its derivative self.observation_space = spaces.Box(-np.inf, np.inf, shape=(10, ), dtype=np.float32) # Action is motor values self.action_space = spaces.Box(-1, +1, (4, ), dtype=np.float32) # Support for rendering self.renderer = None self.pose = None self.reset() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self): self.prev_shaping = None # Create cusom dynamics model self.dynamics = DJIPhantomDynamics(self.FRAMES_PER_SECOND) # Initialize custom dynamics state = np.zeros(12) self.dynamics.setState(state) return self.step(np.array([0, 0, 0, 0]))[0] def step(self, action): # Abbreviation d = self.dynamics d.setMotors(action) d.update() # Get new state from dynamics posx, velx, posy, vely, posz, velz, phi, velphi, theta, veltheta, psi, _ = d.getState( ) # Set pose in display self.pose = posx, posy, posz, phi, theta, psi # Convert state to usable form state = np.array( [posx, velx, posy, vely, posz, velz, phi, velphi, theta, veltheta]) # Reward is a simple penalty for overall distance and velocity shaping = np.sqrt(posx**2 + posy**2) reward = (shaping - self.prev_shaping) if (self.prev_shaping is not None) else 0 self.prev_shaping = shaping done = False return np.array(state, dtype=np.float32), reward, done, {} def render(self, mode='human'): from gym_copter.rendering.threed import ThreeDDistanceRenderer # Create renderer if not done yet if self.renderer is None: self.renderer = ThreeDDistanceRenderer(self, self.LANDING_RADIUS) return self.renderer.render() def close(self): return
def _reset(self): self.state = np.zeros(self.statedims) self.dynamics = DJIPhantomDynamics() self.tick = 0 self.done = False
class Lander2D(gym.Env, EzPickle): # Parameters to adjust INITIAL_RANDOM_FORCE = 30 # perturbation for initial position INITIAL_ALTITUDE = 10 LANDING_RADIUS = 2 PENALTY_FACTOR = 12 # designed so that maximal penalty is around 100 BOUNDS = 10 OUT_OF_BOUNDS_PENALTY = 100 INSIDE_RADIUS_BONUS = 100 FRAMES_PER_SECOND = 50 metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': FRAMES_PER_SECOND } def __init__(self): EzPickle.__init__(self) self.seed() self.viewer = None self.prev_reward = None # useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(-np.inf, np.inf, shape=(6, ), dtype=np.float32) # Action is two floats [throttle, roll] self.action_space = spaces.Box(-1, +1, (2, ), dtype=np.float32) # Support for rendering self.viewer = None self.pose = None self.spinning = False self.reset() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self): self._destroy() self.prev_shaping = None # Create cusom dynamics model self.dynamics = DJIPhantomDynamics(self.FRAMES_PER_SECOND) # Initialize custom dynamics with random perturbation state = np.zeros(12) d = self.dynamics state[d.STATE_Y] = 0 state[d.STATE_Z] = -self.INITIAL_ALTITUDE self.dynamics.setState(state) self.dynamics.perturb( np.array([0, self._perturb(), self._perturb(), 0, 0, 0])) return self.step(np.array([0, 0]))[0] def step(self, action): # Abbreviation d = self.dynamics status = d.getStatus() # Stop motors after safe landing if status == d.STATUS_LANDED: d.setMotors(np.zeros(4)) self.spinning = False # In air, set motors from action else: m = np.clip(action, 0, 1) # keep motors in interval [0,1] d.setMotors([m[0], m[1], m[1], m[0]]) self.spinning = sum(m) > 0 d.update() # Get new state from dynamics _, _, posy, vely, posz, velz, phi, velphi = d.getState()[:8] # Set lander pose for viewer self.pose = posy, posz, phi # Convert state to usable form state = np.array([posy, vely, posz, velz, phi, velphi]) # A simple penalty for overall distance and velocity shaping = -self.PENALTY_FACTOR * np.sqrt(np.sum(state[0:4]**2)) reward = ((shaping - self.prev_shaping) if (self.prev_shaping is not None) else 0) self.prev_shaping = shaping # Assume we're not done yet done = False # Lose bigly if we go outside window if abs(posy) >= self.BOUNDS: done = True reward -= self.OUT_OF_BOUNDS_PENALTY else: # It's all over once we're on the ground if status == d.STATUS_LANDED: done = True self.spinning = False # Win bigly we land safely between the flags if abs(posy) < self.LANDING_RADIUS: reward += self.INSIDE_RADIUS_BONUS elif status == d.STATUS_CRASHED: # Crashed! done = True self.spinning = False return np.array(state, dtype=np.float32), reward, done, {} def render(self, mode='human'): from gym_copter.rendering.twod import TwoDLanderRenderer # Create viewer if not done yet if self.viewer is None: self.viewer = TwoDLanderRenderer(self.LANDING_RADIUS) return self.viewer.render(mode, self.pose, self.spinning) def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def _perturb(self): return np.random.uniform(-self.INITIAL_RANDOM_FORCE, +self.INITIAL_RANDOM_FORCE) def _destroy(self): if self.viewer is not None: self.viewer.close()
class Lander1D(gym.Env, EzPickle): # Parameters to adjust INITIAL_ALTITUDE = 10 LANDING_RADIUS = 2 # for display purposes only PENALTY_FACTOR = 12 # designed so that maximal penalty is around 100 BOUNDS = 10 SAFE_LANDING_BONUS = 100 FRAMES_PER_SECOND = 50 metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': FRAMES_PER_SECOND } def __init__(self): EzPickle.__init__(self) self.viewer = None self.prev_reward = None # Observation is altitude and its rate of change self.observation_space = spaces.Box(-np.inf, np.inf, shape=(2, ), dtype=np.float32) # Action is one value [throttle] self.action_space = spaces.Box(-1, +1, (1, ), dtype=np.float32) # Support for rendering self.renderer = None self.pose = None self.spinning = False self.reset() def reset(self): self._destroy() self.prev_shaping = None # Create cusom dynamics model self.dynamics = DJIPhantomDynamics(self.FRAMES_PER_SECOND) # Initialize custom dynamics with random perturbation state = np.zeros(12) d = self.dynamics state[d.STATE_Y] = 0 state[d.STATE_Z] = -self.INITIAL_ALTITUDE self.dynamics.setState(state) return self.step(np.array([0]))[0] def step(self, action): # Abbreviation d = self.dynamics status = d.getStatus() # Stop motors after safe landing if status == d.STATUS_LANDED: d.setMotors(np.zeros(4)) self.spinning = False # In air, set motors from action else: # Keep action in interval [0,1] t = np.clip(action[0], 0, 1) d.setMotors([t, t, t, t]) self.spinning = t > 0 d.update() # Get new state from dynamics _, _, posy, _, posz, velz, phi, _ = d.getState()[:8] # Set lander pose for renderer self.pose = posy, posz, phi # Convert state to usable form state = np.array([posz, velz]) # Reward is a simple penalty for overall distance and velocity shaping = -self.PENALTY_FACTOR * np.sqrt(np.sum(state[0:4]**2)) reward = (shaping - self.prev_shaping) if (self.prev_shaping is not None) else 0 self.prev_shaping = shaping # Assume we're not done yet done = False # Lose bigly if we go outside window if abs(posy) >= self.BOUNDS: done = True else: # It's all over once we're on the ground if status == d.STATUS_LANDED: done = True reward += self.SAFE_LANDING_BONUS elif status == d.STATUS_CRASHED: # Crashed! done = True return np.array(state, dtype=np.float32), reward, done, {} def render(self, mode='human'): from gym_copter.rendering.twod import TwoDLanderRenderer # Create renderer if not done yet if self.renderer is None: self.renderer = TwoDLanderRenderer(self.LANDING_RADIUS) d = self.dynamics return self.renderer.render(mode, self.pose, self.spinning) def close(self): if self.renderer is not None: self.renderer.close() self.renderer = None def _destroy(self): if self.renderer is not None: self.renderer.close()
class Lander3D(gym.Env, EzPickle): # Parameters to adjust INITIAL_ALTITUDE = 5 INITIAL_RANDOM_OFFSET = 2.5 XY_PENALTY_FACTOR = 25 # designed so that maximal penalty is around 100 PITCH_ROLL_PENALTY_FACTOR = 0 # 250 YAW_PENALTY_FACTOR = 50 MOTOR_PENALTY_FACTOR = 0.03 BOUNDS = 10 OUT_OF_BOUNDS_PENALTY = 100 RESTING_DURATION = 1.0 # render a short while after successful landing FRAMES_PER_SECOND = 50 MAX_ANGLE = 45 # big penalty if roll or pitch angles go beyond this EXCESS_ANGLE_PENALTY = 100 metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': FRAMES_PER_SECOND } def __init__(self): EzPickle.__init__(self) self.seed() self.prev_reward = None # Observation is all state values self.observation_space = (spaces.Box(-np.inf, np.inf, shape=(12, ), dtype=np.float32)) # Action is four floats (one per motor) self.action_space = spaces.Box(-1, +1, (4, ), dtype=np.float32) # Support for rendering self.viewer = None self.pose = None # Pre-convert max-angle degrees to radian self.max_angle = np.radians(self.MAX_ANGLE) self.reset() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self): self.prev_shaping = None # Create cusom dynamics model self.dynamics = DJIPhantomDynamics(self.FRAMES_PER_SECOND) # Initialize custom dynamics with random perturbations state = np.zeros(12) d = self.dynamics state[d.STATE_X] = self.INITIAL_RANDOM_OFFSET * np.random.randn() state[d.STATE_Y] = self.INITIAL_RANDOM_OFFSET * np.random.randn() state[d.STATE_Z] = -self.INITIAL_ALTITUDE self.dynamics.setState(state) return self.step(np.array([0, 0, 0, 0]))[0] def step(self, action): # Abbreviation d = self.dynamics status = d.getStatus() # Keep motors in interval [0,1] motors = (np.zeros(4) if status == d.STATUS_LANDED else np.clip( action, 0, 1)) d.setMotors(motors) # Update dynamics if airborne if status != d.STATUS_LANDED: d.update() # Get new state from dynamics state = np.array(d.getState()) # Extract pose from state x, y, z, phi, theta, psi = state[0::2] # Set lander pose in display self.pose = x, y, z, phi, theta, psi # Reward is a simple penalty for overall distance and angle and their # first derivatives, plus a bit more for running motors (discourage # endless hover) shaping = -( self.XY_PENALTY_FACTOR * np.sqrt(np.sum(state[0:6]**2)) + self.PITCH_ROLL_PENALTY_FACTOR * np.sqrt(np.sum(state[6:10]**2)) + self.YAW_PENALTY_FACTOR * np.sqrt(np.sum(state[10:12]**2)) + self.MOTOR_PENALTY_FACTOR * np.sum(motors)) reward = ((shaping - self.prev_shaping) if (self.prev_shaping is not None) else 0) self.prev_shaping = shaping # Assume we're not done yet done = False # Lose bigly if we go out of bounds if abs(x) >= self.BOUNDS or abs(y) >= self.BOUNDS: done = True reward = -self.OUT_OF_BOUNDS_PENALTY # Lose bigly for excess roll or pitch if abs(phi) >= self.max_angle or abs(theta) >= self.max_angle: done = True reward = -self.OUT_OF_BOUNDS_PENALTY # It's all over once we're on the ground if status == d.STATUS_LANDED: done = True # Different subclasses add different bonuses for proximity to # center reward += self._get_bonus(x, y) elif status == d.STATUS_CRASHED: # Crashed! done = True return np.array(state, dtype=np.float32), reward, done, {} def render(self, mode='human'): ''' Returns None because we run viewer on a separate thread ''' return None def close(self): return def _get_bonus(self, x, y): # Bonus is proximity to center return self.BOUNDS - np.sqrt(x**2 + y**2)
class Lander3DSimple(gym.Env, EzPickle): # Parameters to adjust INITIAL_RANDOM_OFFSET = 2.5 # perturbation factor for initial horizontal position INITIAL_ALTITUDE = 5 LANDING_RADIUS = 2 XY_PENALTY_FACTOR = 25 # designed so that maximal penalty is around 100 PITCH_ROLL_PENALTY_FACTOR = 250 BOUNDS = 10 OUT_OF_BOUNDS_PENALTY = 100 INSIDE_RADIUS_BONUS = 100 RESTING_DURATION = 1.0 # for rendering for a short while after successful landing FRAMES_PER_SECOND = 50 MAX_ANGLE = 45 # big penalty if roll or pitch angles go beyond this EXCESS_ANGLE_PENALTY = 100 metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second' : FRAMES_PER_SECOND } def __init__(self): EzPickle.__init__(self) self.seed() self.prev_reward = None # Observation is all state values self.observation_space = spaces.Box(-np.inf, np.inf, shape=(10,), dtype=np.float32) # Action is three floats (throttle, roll, pitch) self.action_space = spaces.Box(-1, +1, (3,), dtype=np.float32) # Support for rendering self.renderer = None self.pose = None # Pre-convert max-angle degrees to radian self.max_angle = np.radians(self.MAX_ANGLE) self.reset() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self): self.prev_shaping = None # Create cusom dynamics model self.dynamics = DJIPhantomDynamics(self.FRAMES_PER_SECOND) # Initialize custom dynamics with random perturbation state = np.zeros(12) d = self.dynamics state[d.STATE_X] = self.INITIAL_RANDOM_OFFSET #* np.random.randn() state[d.STATE_Y] = self.INITIAL_RANDOM_OFFSET #* np.random.randn() state[d.STATE_Z] = -self.INITIAL_ALTITUDE self.dynamics.setState(state) return self.step(np.array([-1, 0, 0]))[0] def step(self, action): # Use mixer to convert demands into motor values t = np.clip(action[0], 0, 1) # truncate throttle below 0 r = action[1] p = action[2] motors = [t-r-p, t+r+p, t+r-p, t-r+p] # Abbreviation d = self.dynamics status = d.getStatus() # Stop motors after safe landing if status == d.STATUS_LANDED: d.setMotors(np.zeros(4)) # In air, set motors from action else: d.setMotors(np.clip(motors, 0, 1)) # keep motors in interval [0,1] d.update() # Get new state from dynamics, removing yaw state = np.array(d.getState())[:10] # Extract pose from state x, y, z, phi, theta = state[0::2] # Set lander pose in display (with zero for yaw) self.pose = x, y, z, phi, theta, 0 # Reward is a simple penalty for overall distance and angle and their first derivatives shaping = -( self.XY_PENALTY_FACTOR * np.sqrt(np.sum(state[0:6]**2)) + self.PITCH_ROLL_PENALTY_FACTOR * np.sqrt(np.sum(state[6:10]**2)) ) reward = (shaping - self.prev_shaping) if (self.prev_shaping is not None) else 0 self.prev_shaping = shaping # Assume we're not done yet done = False # Lose bigly if we go out of bounds if abs(x) >= self.BOUNDS or abs(y) >= self.BOUNDS: done = True reward = -self.OUT_OF_BOUNDS_PENALTY # Lose bigly for excess roll or pitch if abs(phi) >= self.max_angle or abs(theta) >= self.max_angle: done = True reward = -self.OUT_OF_BOUNDS_PENALTY # It's all over once we're on the ground if status == d.STATUS_LANDED: done = True # Win bigly we land safely between the flags if x**2+y**2 < self.LANDING_RADIUS**2: reward += self.INSIDE_RADIUS_BONUS elif status == d.STATUS_CRASHED: # Crashed! done = True return np.array(state, dtype=np.float32), reward, done, {} def render(self, mode='human'): from gym_copter.rendering.threed import ThreeDLanderRenderer # Create renderer if not done yet if self.renderer is None: self.renderer = ThreeDLanderRenderer(self, self.LANDING_RADIUS) return self.renderer.render() def close(self): return
class CopterLander2D(gym.Env, EzPickle): FPS = 50 SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well # Criteria for a successful landing LANDING_POS_Y = 4.15 LANDING_VEL_X = 2.0 LANDING_ANGLE = 0.05 # Initial velocity perturbation factor INITIAL_RANDOM_VELOCITY = 0.75 # Vehicle display properties --------------------------------------------------------- LANDER_POLY = [(-14, +17), (-17, 0), (-17, -10), (+17, -10), (+17, 0), (+14, +17)] LEG_X = 12 LEG_Y = -7 LEG_W = 3 LEG_H = 20 MOTOR_X = 25 MOTOR_Y = 7 MOTOR_W = 4 MOTOR_H = 5 BLADE_X = 25 BLADE_Y = 8 BLADE_W = 20 BLADE_H = 2 BLADE1L_POLY = [ (BLADE_X, BLADE_Y), (BLADE_X - BLADE_W / 2, BLADE_Y + BLADE_H), (BLADE_X - BLADE_W, BLADE_Y), (BLADE_X - BLADE_W / 2, BLADE_Y + -BLADE_H), ] BLADE1R_POLY = [ (BLADE_X, BLADE_Y), (BLADE_X + BLADE_W / 2, BLADE_Y + BLADE_H), (BLADE_X + BLADE_W, BLADE_Y), (BLADE_X + BLADE_W / 2, BLADE_Y + -BLADE_H), ] BLADE2L_POLY = [ (-BLADE_X, BLADE_Y), (-(BLADE_X + BLADE_W / 2), BLADE_Y + BLADE_H), (-(BLADE_X + BLADE_W), BLADE_Y), (-(BLADE_X + BLADE_W / 2), BLADE_Y + -BLADE_H), ] BLADE2R_POLY = [ (-BLADE_X, BLADE_Y), (-BLADE_X + BLADE_W / 2, BLADE_Y + BLADE_H), (-BLADE_X + BLADE_W, BLADE_Y), (-BLADE_X + BLADE_W / 2, BLADE_Y + -BLADE_H), ] HULL_POLY = [ (-30, 0), (-4, +4), (+4, +4), (+30, 0), (+4, -14), (-4, -14), ] LEG1_POLY = [(-LEG_X, LEG_Y), (-LEG_X + LEG_W, LEG_Y), (-LEG_X + LEG_W, LEG_Y - LEG_H), (-LEG_X, LEG_Y - LEG_H)] LEG2_POLY = [(+LEG_X, LEG_Y), (+LEG_X + LEG_W, LEG_Y), (+LEG_X + LEG_W, LEG_Y - LEG_H), (+LEG_X, LEG_Y - LEG_H)] MOTOR1_POLY = [(+MOTOR_X, MOTOR_Y), (+MOTOR_X + MOTOR_W, MOTOR_Y), (+MOTOR_X + MOTOR_W, MOTOR_Y - MOTOR_H), (+MOTOR_X, MOTOR_Y - MOTOR_H)] MOTOR2_POLY = [(-MOTOR_X, MOTOR_Y), (-MOTOR_X + MOTOR_W, MOTOR_Y), (-MOTOR_X + MOTOR_W, MOTOR_Y - MOTOR_H), (-MOTOR_X, MOTOR_Y - MOTOR_H)] VIEWPORT_W = 600 VIEWPORT_H = 400 TERRAIN_CHUNKS = 11 SKY_COLOR = 0.5, 0.8, 1.0 GROUND_COLOR = 0.5, 0.7, 0.3 FLAG_COLOR = 0.8, 0.0, 0.0 VEHICLE_COLOR = 1.0, 1.0, 1.0 MOTOR_COLOR = 0.5, 0.5, 0.5 PROP_COLOR = 0.0, 0.0, 0.0 OUTLINE_COLOR = 0.0, 0.0, 0.0 # ------------------------------------------------------------------------------------- metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': FPS } def __init__(self): EzPickle.__init__(self) self.seed() self.viewer = None self.world = Box2D.b2World() self.ground = None self.lander = None self.prev_reward = None # useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(-np.inf, np.inf, shape=(6, ), dtype=np.float32) # Action is two floats [main engine, left-right engines]. # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power. # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off self.action_space = spaces.Box(-1, +1, (2, ), dtype=np.float32) self.reset() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.ground: return self.world.contactListener = None self.world.DestroyBody(self.ground) self.ground = None self.world.DestroyBody(self.lander) self.lander = None def reset(self): self._destroy() self.prev_shaping = None self.startpos = None W = self.VIEWPORT_W / self.SCALE H = self.VIEWPORT_H / self.SCALE # terrain height = self.np_random.uniform(0, H / 2, size=(self.TERRAIN_CHUNKS + 1, )) chunk_x = [ W / (self.TERRAIN_CHUNKS - 1) * i for i in range(self.TERRAIN_CHUNKS) ] self.helipad_x1 = chunk_x[self.TERRAIN_CHUNKS // 2 - 1] self.helipad_x2 = chunk_x[self.TERRAIN_CHUNKS // 2 + 1] self.helipad_y = H / 4 height[self.TERRAIN_CHUNKS // 2 - 2] = self.helipad_y height[self.TERRAIN_CHUNKS // 2 - 1] = self.helipad_y height[self.TERRAIN_CHUNKS // 2 + 0] = self.helipad_y height[self.TERRAIN_CHUNKS // 2 + 1] = self.helipad_y height[self.TERRAIN_CHUNKS // 2 + 2] = self.helipad_y smooth_y = [ 0.33 * (height[i - 1] + height[i + 0] + height[i + 1]) for i in range(self.TERRAIN_CHUNKS) ] self.ground = self.world.CreateStaticBody(shapes=edgeShape( vertices=[(0, 0), (W, 0)])) self.sky_polys = [] for i in range(self.TERRAIN_CHUNKS - 1): p1 = (chunk_x[i], smooth_y[i]) p2 = (chunk_x[i + 1], smooth_y[i + 1]) self.ground.CreateEdgeFixture(vertices=[p1, p2], density=0, friction=0.1) self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)]) self.startpos = self.VIEWPORT_W / self.SCALE / 2, self.VIEWPORT_H / self.SCALE self.lander = self.world.CreateDynamicBody( position=self.startpos, angle=0.0, fixtures=[ fixtureDef(shape=polygonShape(vertices=[(x / self.SCALE, y / self.SCALE) for x, y in poly]), density=0.0) for poly in [ self.HULL_POLY, self.LEG1_POLY, self.LEG2_POLY, self.MOTOR1_POLY, self.MOTOR2_POLY, self.BLADE1L_POLY, self.BLADE1R_POLY, self.BLADE2L_POLY, self.BLADE2R_POLY ] ]) # By showing props periodically, we can emulate prop rotation self.props_visible = 0 # Create cusom dynamics model self.dynamics = DJIPhantomDynamics() # Initialize custom dynamics with slight velocity perturbation state = np.zeros(12) d = self.dynamics state[d.STATE_Y] = self.startpos[ 0] # 3D copter Y comes from 2D copter X state[d.STATE_Z] = -self.startpos[ 1] # 3D copter Z comes from 2D copter Y, negated for NED state[d.STATE_Y_DOT] = self.INITIAL_RANDOM_VELOCITY * np.random.randn() state[d.STATE_Z_DOT] = self.INITIAL_RANDOM_VELOCITY * np.random.randn() self.dynamics.setState(state) return self.step(np.array([0, 0]))[0] def step(self, action): # Map throttle demand from [-1,+1] to [0,1] throttle = (action[0] + 1) / 2 # Abberviation d = self.dynamics # Set motors from demands roll = action[1] d.setMotors( np.clip([ throttle - roll, throttle + roll, throttle + roll, throttle - roll ], 0, 1)) # Update dynamics d.update(1. / self.FPS) # Get new state from dynamics x = d.getState() # Parse out state into elements posx = x[d.STATE_Y] posy = -x[d.STATE_Z] velx = x[d.STATE_Y_DOT] vely = -x[d.STATE_Z_DOT] angle = x[d.STATE_PHI] angularVelocity = x[d.STATE_PHI_DOT] # Set lander pose in display self.lander.position = posx, posy self.lander.angle = -angle # Convert state to usable form state = ((posx - self.VIEWPORT_W / self.SCALE / 2) / (self.VIEWPORT_W / self.SCALE / 2), (posy - (self.helipad_y)) / (self.VIEWPORT_H / self.SCALE / 2), velx * (self.VIEWPORT_W / self.SCALE / 2) / self.FPS, vely * (self.VIEWPORT_H / self.SCALE / 2) / self.FPS, angle, 20.0 * angularVelocity / self.FPS) # Shape the reward reward = 0 shaping = 0 shaping -= 100 * np.sqrt( state[0]**2 + state[1]**2) # Lose points for altitude and vertical drop rate' shaping -= 100 * np.sqrt( state[2]**2 + state[3]**2 ) # Lose points for distance from X center and horizontal velocity if self.prev_shaping is not None: reward = shaping - self.prev_shaping self.prev_shaping = shaping # Assume we're not done yet done = False # Lose bigly if we go outside window if abs(state[0]) >= 1.0: done = True reward = -100 ''' print('posy=%3.3f (%3.3f)\tvelx=%+3.3f (%3.3f)\tang=%+3.3f (%3.3f) %d' % (posy, self.LANDING_POS_Y, velx, self.LANDING_VEL_X, self.lander.angle, self.LANDING_ANGLE, self.helipad_x1 < posx < self.helipad_x2)) ''' # It's all over once we're on the ground if self._on_ground(): done = True landed_safely = abs(velx) < self.LANDING_VEL_X and abs( self.lander.angle ) < self.LANDING_ANGLE and self.helipad_x1 < posx < self.helipad_x2 # More reward if we land safely, less otherwise reward = reward + 100 if landed_safely else -50 return np.array(state, dtype=np.float32), reward, done, {} def render(self, mode='human'): from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.Viewer(self.VIEWPORT_W, self.VIEWPORT_H) self.viewer.set_bounds(0, self.VIEWPORT_W / self.SCALE, 0, self.VIEWPORT_H / self.SCALE) # Draw ground as background self.viewer.draw_polygon([(0, 0), (self.VIEWPORT_W, 0), (self.VIEWPORT_W, self.VIEWPORT_H), (0, self.VIEWPORT_H)], color=self.GROUND_COLOR) # Draw sky for p in self.sky_polys: self.viewer.draw_polygon(p, color=self.SKY_COLOR) # Draw flags for x in [self.helipad_x1, self.helipad_x2]: flagy1 = self.helipad_y flagy2 = flagy1 + 50 / self.SCALE self.viewer.draw_polyline([(x, flagy1), (x, flagy2)], color=(1, 1, 1)) self.viewer.draw_polygon( [(x, flagy2), (x, flagy2 - 10 / self.SCALE), (x + 25 / self.SCALE, flagy2 - 5 / self.SCALE)], color=self.FLAG_COLOR) # Draw copter self._show_fixture(1, self.VEHICLE_COLOR) self._show_fixture(2, self.VEHICLE_COLOR) self._show_fixture(0, self.VEHICLE_COLOR) self._show_fixture(3, self.MOTOR_COLOR) self._show_fixture(4, self.MOTOR_COLOR) # Simulate spinning props by alternating show/hide if self.props_visible: for k in range(5, 9): self._show_fixture(k, self.PROP_COLOR) self.props_visible = self._on_ground(0.05) or ( (self.props_visible + 1) % 3) # Pause briefly to show vehicle on ground if self._on_ground(): sleep(0.5) return self.viewer.render(return_rgb_array=mode == 'rgb_array') def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def _on_ground(self, tolerance=0): return self.lander.position.y < (self.LANDING_POS_Y + tolerance) def _show_fixture(self, index, color): fixture = self.lander.fixtures[index] trans = fixture.body.transform path = [trans * v for v in fixture.shape.vertices] self.viewer.draw_polygon(path, color=color) path.append(path[0]) self.viewer.draw_polyline(path, color=self.OUTLINE_COLOR, linewidth=1)
def reset(self): self._destroy() self.prev_shaping = None self.startpos = None W = self.VIEWPORT_W / self.SCALE H = self.VIEWPORT_H / self.SCALE # terrain height = self.np_random.uniform(0, H / 2, size=(self.TERRAIN_CHUNKS + 1, )) chunk_x = [ W / (self.TERRAIN_CHUNKS - 1) * i for i in range(self.TERRAIN_CHUNKS) ] self.helipad_x1 = chunk_x[self.TERRAIN_CHUNKS // 2 - 1] self.helipad_x2 = chunk_x[self.TERRAIN_CHUNKS // 2 + 1] self.helipad_y = H / 4 height[self.TERRAIN_CHUNKS // 2 - 2] = self.helipad_y height[self.TERRAIN_CHUNKS // 2 - 1] = self.helipad_y height[self.TERRAIN_CHUNKS // 2 + 0] = self.helipad_y height[self.TERRAIN_CHUNKS // 2 + 1] = self.helipad_y height[self.TERRAIN_CHUNKS // 2 + 2] = self.helipad_y smooth_y = [ 0.33 * (height[i - 1] + height[i + 0] + height[i + 1]) for i in range(self.TERRAIN_CHUNKS) ] self.ground = self.world.CreateStaticBody(shapes=edgeShape( vertices=[(0, 0), (W, 0)])) self.sky_polys = [] for i in range(self.TERRAIN_CHUNKS - 1): p1 = (chunk_x[i], smooth_y[i]) p2 = (chunk_x[i + 1], smooth_y[i + 1]) self.ground.CreateEdgeFixture(vertices=[p1, p2], density=0, friction=0.1) self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)]) self.startpos = self.VIEWPORT_W / self.SCALE / 2, self.VIEWPORT_H / self.SCALE self.lander = self.world.CreateDynamicBody( position=self.startpos, angle=0.0, fixtures=[ fixtureDef(shape=polygonShape(vertices=[(x / self.SCALE, y / self.SCALE) for x, y in poly]), density=0.0) for poly in [ self.HULL_POLY, self.LEG1_POLY, self.LEG2_POLY, self.MOTOR1_POLY, self.MOTOR2_POLY, self.BLADE1L_POLY, self.BLADE1R_POLY, self.BLADE2L_POLY, self.BLADE2R_POLY ] ]) # By showing props periodically, we can emulate prop rotation self.props_visible = 0 # Create cusom dynamics model self.dynamics = DJIPhantomDynamics() # Initialize custom dynamics with slight velocity perturbation state = np.zeros(12) d = self.dynamics state[d.STATE_Y] = self.startpos[ 0] # 3D copter Y comes from 2D copter X state[d.STATE_Z] = -self.startpos[ 1] # 3D copter Z comes from 2D copter Y, negated for NED state[d.STATE_Y_DOT] = self.INITIAL_RANDOM_VELOCITY * np.random.randn() state[d.STATE_Z_DOT] = self.INITIAL_RANDOM_VELOCITY * np.random.randn() self.dynamics.setState(state) return self.step(np.array([0, 0]))[0]
class CopterBox2D(gym.Env): START_X = 10 START_Y = 13 FPS = 50 SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well LEG_X = 12 LEG_Y = -7 LEG_W = 3 LEG_H = 20 MOTOR_X = 25 MOTOR_Y = 7 MOTOR_W = 4 MOTOR_H = 5 BLADE_X = 25 BLADE_Y = 8 BLADE_W = 20 BLADE_H = 2 BLADE1L_POLY = [ (BLADE_X, BLADE_Y), (BLADE_X - BLADE_W / 2, BLADE_Y + BLADE_H), (BLADE_X - BLADE_W, BLADE_Y), (BLADE_X - BLADE_W / 2, BLADE_Y + -BLADE_H), ] BLADE1R_POLY = [ (BLADE_X, BLADE_Y), (BLADE_X + BLADE_W / 2, BLADE_Y + BLADE_H), (BLADE_X + BLADE_W, BLADE_Y), (BLADE_X + BLADE_W / 2, BLADE_Y + -BLADE_H), ] BLADE2L_POLY = [ (-BLADE_X, BLADE_Y), (-(BLADE_X + BLADE_W / 2), BLADE_Y + BLADE_H), (-(BLADE_X + BLADE_W), BLADE_Y), (-(BLADE_X + BLADE_W / 2), BLADE_Y + -BLADE_H), ] BLADE2R_POLY = [ (-BLADE_X, BLADE_Y), (-BLADE_X + BLADE_W / 2, BLADE_Y + BLADE_H), (-BLADE_X + BLADE_W, BLADE_Y), (-BLADE_X + BLADE_W / 2, BLADE_Y + -BLADE_H), ] HULL_POLY = [ (-30, 0), (-4, +4), (+4, +4), (+30, 0), (+4, -14), (-4, -14), ] LEG1_POLY = [(-LEG_X, LEG_Y), (-LEG_X + LEG_W, LEG_Y), (-LEG_X + LEG_W, LEG_Y - LEG_H), (-LEG_X, LEG_Y - LEG_H)] LEG2_POLY = [(+LEG_X, LEG_Y), (+LEG_X + LEG_W, LEG_Y), (+LEG_X + LEG_W, LEG_Y - LEG_H), (+LEG_X, LEG_Y - LEG_H)] MOTOR1_POLY = [(+MOTOR_X, MOTOR_Y), (+MOTOR_X + MOTOR_W, MOTOR_Y), (+MOTOR_X + MOTOR_W, MOTOR_Y - MOTOR_H), (+MOTOR_X, MOTOR_Y - MOTOR_H)] MOTOR2_POLY = [(-MOTOR_X, MOTOR_Y), (-MOTOR_X + MOTOR_W, MOTOR_Y), (-MOTOR_X + MOTOR_W, MOTOR_Y - MOTOR_H), (-MOTOR_X, MOTOR_Y - MOTOR_H)] VIEWPORT_W = 600 VIEWPORT_H = 400 SKY_COLOR = 0.5, 0.8, 1.0 GROUND_COLOR = 0.5, 0.7, 0.3 FLAG_COLOR = 0.8, 0.0, 0.0 VEHICLE_COLOR = 1.0, 1.0, 1.0 MOTOR_COLOR = 0.5, 0.5, 0.5 PROP_COLOR = 0.0, 0.0, 0.0 OUTLINE_COLOR = 0.0, 0.0, 0.0 metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': FPS } def __init__(self, observation_size, action_size): self.seed() self.viewer = None self.world = Box2D.b2World() self.ground = None self.lander = None self.prev_reward = None # Useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(-np.inf, np.inf, shape=(observation_size, ), dtype=np.float32) # [-1,+1] will be rescaled to [0,1] for dynamics input self.action_space = spaces.Box(-1, +1, (action_size, ), dtype=np.float32) self._reset(0, 0) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.ground: return self.world.contactListener = None self.world.DestroyBody(self.ground) self.ground = None self.world.DestroyBody(self.lander) self.lander = None def _reset(self, xoff=0, yoff=0): self._destroy() self.world.contactListener_keepref = ContactDetector(self) self.world.contactListener = self.world.contactListener_keepref self.landed = False self.prev_shaping = None self.rendering = False W = self.VIEWPORT_W / self.SCALE H = self.VIEWPORT_H / self.SCALE # Turn off gravity so we can run our own dynamics self.world.gravity = 0, 0 # terrain CHUNKS = 11 height = self.np_random.uniform(0, H / 2, size=(CHUNKS + 1, )) chunk_x = [W / (CHUNKS - 1) * i for i in range(CHUNKS)] self.helipad_x1 = chunk_x[CHUNKS // 2 - 1] self.helipad_x2 = chunk_x[CHUNKS // 2 + 1] self.helipad_y = H / 4 height[CHUNKS // 2 - 2] = self.helipad_y height[CHUNKS // 2 - 1] = self.helipad_y height[CHUNKS // 2 + 0] = self.helipad_y height[CHUNKS // 2 + 1] = self.helipad_y height[CHUNKS // 2 + 2] = self.helipad_y smooth_y = [ 0.33 * (height[i - 1] + height[i + 0] + height[i + 1]) for i in range(CHUNKS) ] self.ground = self.world.CreateStaticBody(shapes=edgeShape( vertices=[(0, 0), (W, 0)])) self.sky_polys = [] for i in range(CHUNKS - 1): p1 = (chunk_x[i], smooth_y[i]) p2 = (chunk_x[i + 1], smooth_y[i + 1]) self.ground.CreateEdgeFixture(vertices=[p1, p2], density=0, friction=0.1) self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)]) initial_y = self.VIEWPORT_H / self.SCALE self.lander = self.world.CreateDynamicBody( position=(self.VIEWPORT_W / self.SCALE / 2, initial_y), angle=0.0, fixtures=[ fixtureDef(shape=polygonShape(vertices=[(x / self.SCALE, y / self.SCALE) for x, y in poly]), density=1.0) for poly in [ self.HULL_POLY, self.LEG1_POLY, self.LEG2_POLY, self.MOTOR1_POLY, self.MOTOR2_POLY, self.BLADE1L_POLY, self.BLADE1R_POLY, self.BLADE2L_POLY, self.BLADE2R_POLY ] ]) self.dynamics = DJIPhantomDynamics() # Start at top center, plus optional offset state = np.zeros(12) state[ self.dynamics. STATE_Y] = self.START_X + xoff # 3D copter Y comes from 2D copter X state[self.dynamics.STATE_Z] = -( self.START_Y + yoff) # 3D copter Z comes from 2D copter Y self.dynamics.setState(state) # By showing props periodically, we can emulate prop rotation self.show_props = 0 # Support showing vehicle while on ground self.ground_count = 0 return self.step(np.array([0, 0]))[0] def step(self, action): motors = self._action_to_motors(action) # Set motors and compute dynamics self.dynamics.setMotors(motors) self.dynamics.update(1.0 / self.FPS) state = self.dynamics.getState() # Run one tick of Box2D simulator self.world.Step(1.0 / self.FPS, 6 * 30, 2 * 30) # Copy dynamics kinematics out to lander, negating Z for NED => ENU dyn = self.dynamics self.lander.position = state[dyn.STATE_Y], -state[dyn.STATE_Z] self.lander.angle = -state[dyn.STATE_PHI] self.lander.angularVelocity = -state[dyn.STATE_PHI_DOT] self.lander.linearVelocity = (state[dyn.STATE_Y_DOT], -state[dyn.STATE_Z_DOT]) state, reward, done = self._get_state_reward_done() return np.array(state, dtype=np.float32), reward, done, {} def render(self, mode='human'): from gym.envs.classic_control import rendering # Helps with a little extra time at the end self.rendering = True if self.viewer is None: self.viewer = rendering.Viewer(self.VIEWPORT_W, self.VIEWPORT_H) self.viewer.set_bounds(0, self.VIEWPORT_W / self.SCALE, 0, self.VIEWPORT_H / self.SCALE) self.viewer.draw_polygon([(0, 0), (self.VIEWPORT_W, 0), (self.VIEWPORT_W, self.VIEWPORT_H), (0, self.VIEWPORT_H)], color=self.GROUND_COLOR) for p in self.sky_polys: self.viewer.draw_polygon(p, color=self.SKY_COLOR) self._show_fixture(1, self.VEHICLE_COLOR) self._show_fixture(2, self.VEHICLE_COLOR) self._show_fixture(0, self.VEHICLE_COLOR) self._show_fixture(3, self.MOTOR_COLOR) self._show_fixture(4, self.MOTOR_COLOR) # Simulate spinning props by alernating if self.landed or self.show_props: for k in range(5, 9): self._show_fixture(k, self.PROP_COLOR) for x in [self.helipad_x1, self.helipad_x2]: flagy1 = self.helipad_y flagy2 = flagy1 + 50 / self.SCALE self.viewer.draw_polyline([(x, flagy1), (x, flagy2)], color=(1, 1, 1)) self.viewer.draw_polygon( [(x, flagy2), (x, flagy2 - 10 / self.SCALE), (x + 25 / self.SCALE, flagy2 - 5 / self.SCALE)], color=self.FLAG_COLOR) self.show_props = (self.show_props + 1) % 3 return self.viewer.render(return_rgb_array=mode == 'rgb_array') def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def _show_fixture(self, index, color): fixture = self.lander.fixtures[index] trans = fixture.body.transform path = [trans * v for v in fixture.shape.vertices] self.viewer.draw_polygon(path, color=color) path.append(path[0]) self.viewer.draw_polyline(path, color=self.OUTLINE_COLOR, linewidth=1)
def _reset(self, xoff=0, yoff=0): self._destroy() self.world.contactListener_keepref = ContactDetector(self) self.world.contactListener = self.world.contactListener_keepref self.landed = False self.prev_shaping = None self.rendering = False W = self.VIEWPORT_W / self.SCALE H = self.VIEWPORT_H / self.SCALE # Turn off gravity so we can run our own dynamics self.world.gravity = 0, 0 # terrain CHUNKS = 11 height = self.np_random.uniform(0, H / 2, size=(CHUNKS + 1, )) chunk_x = [W / (CHUNKS - 1) * i for i in range(CHUNKS)] self.helipad_x1 = chunk_x[CHUNKS // 2 - 1] self.helipad_x2 = chunk_x[CHUNKS // 2 + 1] self.helipad_y = H / 4 height[CHUNKS // 2 - 2] = self.helipad_y height[CHUNKS // 2 - 1] = self.helipad_y height[CHUNKS // 2 + 0] = self.helipad_y height[CHUNKS // 2 + 1] = self.helipad_y height[CHUNKS // 2 + 2] = self.helipad_y smooth_y = [ 0.33 * (height[i - 1] + height[i + 0] + height[i + 1]) for i in range(CHUNKS) ] self.ground = self.world.CreateStaticBody(shapes=edgeShape( vertices=[(0, 0), (W, 0)])) self.sky_polys = [] for i in range(CHUNKS - 1): p1 = (chunk_x[i], smooth_y[i]) p2 = (chunk_x[i + 1], smooth_y[i + 1]) self.ground.CreateEdgeFixture(vertices=[p1, p2], density=0, friction=0.1) self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)]) initial_y = self.VIEWPORT_H / self.SCALE self.lander = self.world.CreateDynamicBody( position=(self.VIEWPORT_W / self.SCALE / 2, initial_y), angle=0.0, fixtures=[ fixtureDef(shape=polygonShape(vertices=[(x / self.SCALE, y / self.SCALE) for x, y in poly]), density=1.0) for poly in [ self.HULL_POLY, self.LEG1_POLY, self.LEG2_POLY, self.MOTOR1_POLY, self.MOTOR2_POLY, self.BLADE1L_POLY, self.BLADE1R_POLY, self.BLADE2L_POLY, self.BLADE2R_POLY ] ]) self.dynamics = DJIPhantomDynamics() # Start at top center, plus optional offset state = np.zeros(12) state[ self.dynamics. STATE_Y] = self.START_X + xoff # 3D copter Y comes from 2D copter X state[self.dynamics.STATE_Z] = -( self.START_Y + yoff) # 3D copter Z comes from 2D copter Y self.dynamics.setState(state) # By showing props periodically, we can emulate prop rotation self.show_props = 0 # Support showing vehicle while on ground self.ground_count = 0 return self.step(np.array([0, 0]))[0]