def main(): # Make a command-line parser parser = make_parser_3d() parser.add_argument('filename', metavar='FILENAME', help='input file') args = parser.parse_args() viewangles = parse_view_angles(args) # Load net and environment name from pickled file net, env_name = pickle.load(open(args.filename, 'rb')) # Make environment from name env = gym.make(env_name) movie_name = None if args.movie: print('Running episode ...') movie_name = 'movie.mp4' # Begin 3D rendering on main thread # render, report = True, True renderer = ThreeDLanderRenderer(env, eval_with_movie, (net, args.seed), viewangles=viewangles, outfile=movie_name) renderer.start()
def render(self, mode='human'): from gym_copter.rendering.threed import ThreeDLanderRenderer # Create renderer if not done yet if self.renderer is None: self.renderer = ThreeDLanderRenderer(self, self.LANDING_RADIUS) return self.renderer.render()
def run(env, radius): from gym_copter.rendering.threed import ThreeDLanderRenderer import threading viewer = ThreeDLanderRenderer(env, radius) thread = threading.Thread(target=heuristic_lander, args=(env, heuristic, viewer)) thread.daemon = True thread.start() # Begin 3D rendering on main thread viewer.start()
def main(): # Parse command-line arguments parser = argparse.ArgumentParser() parser.add_argument('filename', metavar='FILENAME', help='input file') parser.add_argument('--record', default=None, help='If specified, sets the recording dir') parser.add_argument('--seed', default=None, type=int, help='Sets Gym, PyTorch and Numpy seeds') args = parser.parse_args() # Load network, environment name, and number of hidden units from pickled file parts, env_name, nhid = torch.load(open(args.filename, 'rb')) # Make a gym environment from the name env = gym.make(env_name) # Set random seed if indicated if args.seed is not None: env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Support recordinga movie if args.record: env = wrappers.Monitor(env, args.record, force=True) # We use a different evaluator functions for TD3 vs. other algorithms fun = run_td3 if 'td3' in args.filename else run_other # Create a three-D renderer renderer = ThreeDLanderRenderer(env) # Start the network-evaluation episode on a separate thread thread = threading.Thread(target=fun, args=(parts, env, nhid, args.record)) thread.daemon = True thread.start() # Begin 3D rendering on main thread renderer.start()
def main(): # Make a command-line parser with --view enabled parser = make_parser() parser.add_argument('filename', metavar='FILENAME', help='input file') parser.add_argument('--movie', default=None, help='If specified, sets the output movie file name') parser.add_argument('--seed', default=None, type=int, help='Sets Gym, PyTorch and Numpy seeds') args, viewangles = parse(parser) # Load network, environment name, and number of hidden units from pickled # file parts, env_name, nhid = torch.load(open(args.filename, 'rb')) # Make a gym environment from the name env = gym.make(env_name) # Set random seed if indicated if args.seed is not None: env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # We use a different evaluator functions for TD3 vs. other algorithms fun = run_td3 if 'td3' in args.filename else run_other if args.movie is not None: print('Running episode ...') # Start the network-evaluation episode on a separate thread thread = threading.Thread(target=fun, args=(parts, env, nhid, args.movie)) thread.start() # Begin 3D rendering on main thread renderer = ThreeDLanderRenderer(env, viewangles=viewangles, outfile=args.movie) renderer.start()
def main(): # Make a command-line parser parser = make_parser_3d() parser.add_argument('filename', metavar='FILENAME', help='input file') args = parser.parse_args() viewangles = parse_view_angles(args) # Load network, environment name, and number of hidden units from pickled # file parts, env_name, nhid = torch.load(open(args.filename, 'rb')) # Make a gym environment from the name env = gym.make(env_name) # Set random seed if indicated if args.seed is not None: env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # We use a different evaluator functions for TD3 vs. other algorithms fun = run_td3 if 'td3' in args.filename else run_other movie_name = None if args.movie: print('Running episode ...') movie_name = 'movie.mp4' # Begin 3D rendering on main thread renderer = ThreeDLanderRenderer(env, fun, (parts, nhid, movie_name), viewangles=viewangles, outfile=movie_name) renderer.start()
import gym from neat_gym import read_file, eval_net from gym_copter.rendering.threed import ThreeDLanderRenderer import threading def _eval_net(net, env): print('Reward = %+03.f' % eval_net(net, env, render=True)) if __name__ == '__main__': # Load network and environment name from pickled file net, env_name, _, _ = read_file() # Make environment from name env = gym.make(env_name) # Create a three-D renderer renderer = ThreeDLanderRenderer(env) # Start the network-evaluation episode on a separate thread thread = threading.Thread(target=_eval_net, args=(net, env_name)) thread.daemon = True thread.start() # Begin 3D rendering on main thread renderer.start()
class Lander3DSimple(gym.Env, EzPickle): # Parameters to adjust INITIAL_RANDOM_OFFSET = 2.5 # perturbation factor for initial horizontal position INITIAL_ALTITUDE = 5 LANDING_RADIUS = 2 XY_PENALTY_FACTOR = 25 # designed so that maximal penalty is around 100 PITCH_ROLL_PENALTY_FACTOR = 250 BOUNDS = 10 OUT_OF_BOUNDS_PENALTY = 100 INSIDE_RADIUS_BONUS = 100 RESTING_DURATION = 1.0 # for rendering for a short while after successful landing FRAMES_PER_SECOND = 50 MAX_ANGLE = 45 # big penalty if roll or pitch angles go beyond this EXCESS_ANGLE_PENALTY = 100 metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second' : FRAMES_PER_SECOND } def __init__(self): EzPickle.__init__(self) self.seed() self.prev_reward = None # Observation is all state values self.observation_space = spaces.Box(-np.inf, np.inf, shape=(10,), dtype=np.float32) # Action is three floats (throttle, roll, pitch) self.action_space = spaces.Box(-1, +1, (3,), dtype=np.float32) # Support for rendering self.renderer = None self.pose = None # Pre-convert max-angle degrees to radian self.max_angle = np.radians(self.MAX_ANGLE) self.reset() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self): self.prev_shaping = None # Create cusom dynamics model self.dynamics = DJIPhantomDynamics(self.FRAMES_PER_SECOND) # Initialize custom dynamics with random perturbation state = np.zeros(12) d = self.dynamics state[d.STATE_X] = self.INITIAL_RANDOM_OFFSET #* np.random.randn() state[d.STATE_Y] = self.INITIAL_RANDOM_OFFSET #* np.random.randn() state[d.STATE_Z] = -self.INITIAL_ALTITUDE self.dynamics.setState(state) return self.step(np.array([-1, 0, 0]))[0] def step(self, action): # Use mixer to convert demands into motor values t = np.clip(action[0], 0, 1) # truncate throttle below 0 r = action[1] p = action[2] motors = [t-r-p, t+r+p, t+r-p, t-r+p] # Abbreviation d = self.dynamics status = d.getStatus() # Stop motors after safe landing if status == d.STATUS_LANDED: d.setMotors(np.zeros(4)) # In air, set motors from action else: d.setMotors(np.clip(motors, 0, 1)) # keep motors in interval [0,1] d.update() # Get new state from dynamics, removing yaw state = np.array(d.getState())[:10] # Extract pose from state x, y, z, phi, theta = state[0::2] # Set lander pose in display (with zero for yaw) self.pose = x, y, z, phi, theta, 0 # Reward is a simple penalty for overall distance and angle and their first derivatives shaping = -( self.XY_PENALTY_FACTOR * np.sqrt(np.sum(state[0:6]**2)) + self.PITCH_ROLL_PENALTY_FACTOR * np.sqrt(np.sum(state[6:10]**2)) ) reward = (shaping - self.prev_shaping) if (self.prev_shaping is not None) else 0 self.prev_shaping = shaping # Assume we're not done yet done = False # Lose bigly if we go out of bounds if abs(x) >= self.BOUNDS or abs(y) >= self.BOUNDS: done = True reward = -self.OUT_OF_BOUNDS_PENALTY # Lose bigly for excess roll or pitch if abs(phi) >= self.max_angle or abs(theta) >= self.max_angle: done = True reward = -self.OUT_OF_BOUNDS_PENALTY # It's all over once we're on the ground if status == d.STATUS_LANDED: done = True # Win bigly we land safely between the flags if x**2+y**2 < self.LANDING_RADIUS**2: reward += self.INSIDE_RADIUS_BONUS elif status == d.STATUS_CRASHED: # Crashed! done = True return np.array(state, dtype=np.float32), reward, done, {} def render(self, mode='human'): from gym_copter.rendering.threed import ThreeDLanderRenderer # Create renderer if not done yet if self.renderer is None: self.renderer = ThreeDLanderRenderer(self, self.LANDING_RADIUS) return self.renderer.render() def close(self): return