コード例 #1
0
ファイル: 3dtest.py プロジェクト: simondlevy/gym-copter
def main():

    # Make a command-line parser
    parser = make_parser_3d()
    parser.add_argument('filename', metavar='FILENAME', help='input file')
    args = parser.parse_args()
    viewangles = parse_view_angles(args)

    # Load net and environment name from pickled file
    net, env_name = pickle.load(open(args.filename, 'rb'))

    # Make environment from name
    env = gym.make(env_name)

    movie_name = None

    if args.movie:
        print('Running episode ...')
        movie_name = 'movie.mp4'

    # Begin 3D rendering on main thread
    # render, report = True, True
    renderer = ThreeDLanderRenderer(env,
                                    eval_with_movie, (net, args.seed),
                                    viewangles=viewangles,
                                    outfile=movie_name)
    renderer.start()
コード例 #2
0
    def render(self, mode='human'):

        from gym_copter.rendering.threed import ThreeDLanderRenderer

        # Create renderer if not done yet
        if self.renderer is None:
            self.renderer = ThreeDLanderRenderer(self, self.LANDING_RADIUS)

        return self.renderer.render()
コード例 #3
0
def run(env, radius):

    from gym_copter.rendering.threed import ThreeDLanderRenderer
    import threading

    viewer = ThreeDLanderRenderer(env, radius)

    thread = threading.Thread(target=heuristic_lander,
                              args=(env, heuristic, viewer))
    thread.daemon = True
    thread.start()

    # Begin 3D rendering on main thread
    viewer.start()
コード例 #4
0
ファイル: 3dtest.py プロジェクト: coletta1/gym-copter
def main():

    # Parse command-line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('filename', metavar='FILENAME', help='input file')
    parser.add_argument('--record',
                        default=None,
                        help='If specified, sets the recording dir')
    parser.add_argument('--seed',
                        default=None,
                        type=int,
                        help='Sets Gym, PyTorch and Numpy seeds')
    args = parser.parse_args()

    # Load network, environment name, and number of hidden units from pickled file
    parts, env_name, nhid = torch.load(open(args.filename, 'rb'))

    # Make a gym environment from the name
    env = gym.make(env_name)

    # Set random seed if indicated
    if args.seed is not None:
        env.seed(args.seed)
        torch.manual_seed(args.seed)
        np.random.seed(args.seed)

    # Support recordinga  movie
    if args.record:
        env = wrappers.Monitor(env, args.record, force=True)

    # We use a different evaluator functions for TD3 vs. other algorithms
    fun = run_td3 if 'td3' in args.filename else run_other

    # Create a three-D renderer
    renderer = ThreeDLanderRenderer(env)

    # Start the network-evaluation episode on a separate thread
    thread = threading.Thread(target=fun, args=(parts, env, nhid, args.record))
    thread.daemon = True
    thread.start()

    # Begin 3D rendering on main thread
    renderer.start()
コード例 #5
0
def main():

    # Make a command-line parser with --view enabled
    parser = make_parser()
    parser.add_argument('filename', metavar='FILENAME', help='input file')
    parser.add_argument('--movie',
                        default=None,
                        help='If specified, sets the output movie file name')
    parser.add_argument('--seed',
                        default=None,
                        type=int,
                        help='Sets Gym, PyTorch and Numpy seeds')
    args, viewangles = parse(parser)

    # Load network, environment name, and number of hidden units from pickled
    # file
    parts, env_name, nhid = torch.load(open(args.filename, 'rb'))

    # Make a gym environment from the name
    env = gym.make(env_name)

    # Set random seed if indicated
    if args.seed is not None:
        env.seed(args.seed)
        torch.manual_seed(args.seed)
        np.random.seed(args.seed)

    # We use a different evaluator functions for TD3 vs. other algorithms
    fun = run_td3 if 'td3' in args.filename else run_other

    if args.movie is not None:
        print('Running episode ...')

    # Start the network-evaluation episode on a separate thread
    thread = threading.Thread(target=fun, args=(parts, env, nhid, args.movie))
    thread.start()

    # Begin 3D rendering on main thread
    renderer = ThreeDLanderRenderer(env,
                                    viewangles=viewangles,
                                    outfile=args.movie)
    renderer.start()
コード例 #6
0
ファイル: 3dtest.py プロジェクト: simondlevy/gym-copter
def main():

    # Make a command-line parser
    parser = make_parser_3d()
    parser.add_argument('filename', metavar='FILENAME', help='input file')
    args = parser.parse_args()
    viewangles = parse_view_angles(args)

    # Load network, environment name, and number of hidden units from pickled
    # file
    parts, env_name, nhid = torch.load(open(args.filename, 'rb'))

    # Make a gym environment from the name
    env = gym.make(env_name)

    # Set random seed if indicated
    if args.seed is not None:
        env.seed(args.seed)
        torch.manual_seed(args.seed)
        np.random.seed(args.seed)

    # We use a different evaluator functions for TD3 vs. other algorithms
    fun = run_td3 if 'td3' in args.filename else run_other

    movie_name = None

    if args.movie:
        print('Running episode ...')
        movie_name = 'movie.mp4'

    # Begin 3D rendering on main thread
    renderer = ThreeDLanderRenderer(env,
                                    fun, (parts, nhid, movie_name),
                                    viewangles=viewangles,
                                    outfile=movie_name)
    renderer.start()
コード例 #7
0
import gym
from neat_gym import read_file, eval_net
from gym_copter.rendering.threed import ThreeDLanderRenderer
import threading


def _eval_net(net, env):

    print('Reward = %+03.f' % eval_net(net, env, render=True))


if __name__ == '__main__':

    # Load network and environment name from pickled file
    net, env_name, _, _ = read_file()

    # Make environment from name
    env = gym.make(env_name)

    # Create a three-D renderer
    renderer = ThreeDLanderRenderer(env)

    # Start the network-evaluation episode on a separate thread
    thread = threading.Thread(target=_eval_net, args=(net, env_name))
    thread.daemon = True
    thread.start()

    # Begin 3D rendering on main thread
    renderer.start()
コード例 #8
0
class Lander3DSimple(gym.Env, EzPickle):

    # Parameters to adjust  
    INITIAL_RANDOM_OFFSET      = 2.5  # perturbation factor for initial horizontal position
    INITIAL_ALTITUDE           = 5
    LANDING_RADIUS             = 2
    XY_PENALTY_FACTOR          = 25   # designed so that maximal penalty is around 100
    PITCH_ROLL_PENALTY_FACTOR  = 250   
    BOUNDS                     = 10
    OUT_OF_BOUNDS_PENALTY      = 100
    INSIDE_RADIUS_BONUS        = 100
    RESTING_DURATION           = 1.0  # for rendering for a short while after successful landing
    FRAMES_PER_SECOND          = 50
    MAX_ANGLE                  = 45   # big penalty if roll or pitch angles go beyond this
    EXCESS_ANGLE_PENALTY       = 100

    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second' : FRAMES_PER_SECOND
    }

    def __init__(self):

        EzPickle.__init__(self)
        self.seed()

        self.prev_reward = None

        # Observation is all state values
        self.observation_space = spaces.Box(-np.inf, np.inf, shape=(10,), dtype=np.float32)

        # Action is three floats (throttle, roll, pitch)
        self.action_space = spaces.Box(-1, +1, (3,), dtype=np.float32)

        # Support for rendering
        self.renderer = None
        self.pose = None

        # Pre-convert max-angle degrees to radian
        self.max_angle = np.radians(self.MAX_ANGLE)

        self.reset()

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self):

        self.prev_shaping = None

        # Create cusom dynamics model
        self.dynamics = DJIPhantomDynamics(self.FRAMES_PER_SECOND)

        # Initialize custom dynamics with random perturbation
        state = np.zeros(12)
        d = self.dynamics
        state[d.STATE_X] =  self.INITIAL_RANDOM_OFFSET #* np.random.randn()
        state[d.STATE_Y] =  self.INITIAL_RANDOM_OFFSET #* np.random.randn()
        state[d.STATE_Z] = -self.INITIAL_ALTITUDE
        self.dynamics.setState(state)

        return self.step(np.array([-1, 0, 0]))[0]

    def step(self, action):

        # Use mixer to convert demands into motor values
        t = np.clip(action[0], 0, 1) # truncate throttle below 0
        r = action[1]
        p = action[2]
        motors = [t-r-p, t+r+p, t+r-p, t-r+p] 

        # Abbreviation
        d = self.dynamics
        status = d.getStatus()

        # Stop motors after safe landing
        if status == d.STATUS_LANDED:
            d.setMotors(np.zeros(4))

        # In air, set motors from action
        else:
            d.setMotors(np.clip(motors, 0, 1))    # keep motors in interval [0,1]
            d.update()

        # Get new state from dynamics, removing yaw
        state = np.array(d.getState())[:10]

        # Extract pose from state
        x, y, z, phi, theta = state[0::2]

        # Set lander pose in display (with zero for yaw)
        self.pose = x, y, z, phi, theta, 0

        # Reward is a simple penalty for overall distance and angle and their first derivatives
        shaping = -(
                self.XY_PENALTY_FACTOR * np.sqrt(np.sum(state[0:6]**2)) + 
                self.PITCH_ROLL_PENALTY_FACTOR * np.sqrt(np.sum(state[6:10]**2))
                )
                                                                  
        reward = (shaping - self.prev_shaping) if (self.prev_shaping is not None) else 0
        self.prev_shaping = shaping

        # Assume we're not done yet
        done = False

        # Lose bigly if we go out of bounds
        if abs(x) >= self.BOUNDS or abs(y) >= self.BOUNDS:
            done = True
            reward = -self.OUT_OF_BOUNDS_PENALTY

        # Lose bigly for excess roll or pitch 
        if abs(phi) >= self.max_angle or abs(theta) >= self.max_angle:
            done = True
            reward = -self.OUT_OF_BOUNDS_PENALTY

        # It's all over once we're on the ground
        if status == d.STATUS_LANDED:

            done = True

            # Win bigly we land safely between the flags
            if x**2+y**2 < self.LANDING_RADIUS**2: 

                reward += self.INSIDE_RADIUS_BONUS

        elif status == d.STATUS_CRASHED:

            # Crashed!
            done = True

        return np.array(state, dtype=np.float32), reward, done, {}

    def render(self, mode='human'):

        from gym_copter.rendering.threed import ThreeDLanderRenderer

        # Create renderer if not done yet
        if self.renderer is None:
            self.renderer = ThreeDLanderRenderer(self, self.LANDING_RADIUS)

        return self.renderer.render()

    def close(self):

        return