예제 #1
0
def solve():
    env = EnvironmentWrapper(gym.make('LunarLander-v2'))
    n_episodes = []

    for i in range(1):
        returns = dqn_with_prioritized_experience(env, n_episodes=None)
        n_episodes.append(len(returns))

        if i == 0:
            baseline_returns = data_exploration(env,
                                                n_episodes=len(n_episodes))
            data.report([(returns, 'b', 'Solution'),
                         (baseline_returns, 'r', 'Baseline')],
                        title='Solution',
                        file='lunarlander_solve_dqn.png')

    n_episodes = np.array(n_episodes)
    print('LunarLander solved!')
    print('  Median: {} episodes'.format(np.median(n_episodes)))
    print('  Mean:   {} episodes'.format(np.mean(n_episodes)))
    print('  Std:    {} episodes'.format(np.std(n_episodes)))
    print('  Min:    {} episodes'.format(np.min(n_episodes)))
    print('  Max:    {} episodes'.format(np.max(n_episodes)))
    print('  % diverged: {}'.format(
        len(n_episodes[n_episodes >= 2000]) / float(len(n_episodes))))
예제 #2
0
def solve():
    env = EnvironmentWrapper(gym.make('CartPole-v1'))
    n_episodes = []

    for i in range(10):
        returns = dqn_with_fixed_targets(env, n_episodes=None)
        n_episodes.append(len(returns))

        if i == 0:
            baseline_returns = data_exploration(env,
                                                n_episodes=len(n_episodes))
            data.report([(returns, 'b', 'Solution'),
                         (baseline_returns, 'r', 'Baseline')],
                        title='Solution',
                        file='cartpole_solve_fixedq_dqn.png')

    n_episodes = np.array(n_episodes)
    print('CartPole solved!')
    print('  Median: {} episodes'.format(np.median(n_episodes)))
    print('  Mean:   {} episodes'.format(np.mean(n_episodes)))
    print('  Std:    {} episodes'.format(np.std(n_episodes)))
    print('  Min:    {} episodes'.format(np.min(n_episodes)))
    print('  Max:    {} episodes'.format(np.max(n_episodes)))
    print('  % diverged: {}'.format(
        len(n_episodes[n_episodes >= 2000]) / float(len(n_episodes))))
    ''' Sample output:
예제 #3
0
def run_single_trials():
    env = EnvironmentWrapper(gym.make('CartPole-v1'))
    n_episodes = 500

    baseline_returns = data_exploration(env, n_episodes)
    data.report([(baseline_returns, 'b', 'Baseline')],
                title='Random Walk',
                file='cartpole_single_random_walk.png')

    basic_dqn_returns = basic_dqn(env, n_episodes)
    data.report([(basic_dqn_returns, 'b', 'Basic DQN'),
                 (baseline_returns, 'r', 'Baseline')],
                title='Vanilla DQN',
                file='cartpole_single_basic_dqn.png')

    dqn_w_exp_returns = dqn_with_experience(env, n_episodes)
    data.report([(dqn_w_exp_returns, 'b', 'DQN w/ ER'),
                 (baseline_returns, 'r', 'Baseline')],
                title='Experience Replay',
                file='cartpole_single_er_dqn.png')

    dqn_w_fixed_targets_returns = dqn_with_fixed_targets(env, n_episodes)
    data.report([(dqn_w_fixed_targets_returns, 'b', 'DQN w/ Fixed-Q'),
                 (baseline_returns, 'r', 'Baseline')],
                title='Fixed-Q Targets',
                file='cartpole_single_fixedq_dqn.png')

    # Plot all the variations
    data.report([(basic_dqn_returns, 'b', 'Basic DQN'),
                 (dqn_w_exp_returns, 'g', 'DQN w/ ER'),
                 (dqn_w_fixed_targets_returns, 'm', 'DQN w/ Fixed-Q'),
                 (baseline_returns, 'r', 'Baseline')],
                title='All DQN Variants',
                file='cartpole_single_all_dqn.png')
예제 #4
0
def run_multiple_trials():
    env = EnvironmentWrapper(gym.make('LunarLander-v2'))
    n_episodes = 500
    n_trials = 10

    baseline_returns = []

    for i in range(n_trials):
        baseline_returns.append(data_exploration(env, n_episodes))

    data.report([(baseline_returns, 'b', 'Baseline')],
                title='Random Walk',
                file='lunarlander_multi_random_walk.png')

    basic_dqn_returns = []

    for i in range(n_trials):
        basic_dqn_returns.append(basic_dqn(env, n_episodes))

    data.report([(basic_dqn_returns, 'b', 'Basic DQN'),
                 (baseline_returns, 'r', 'Baseline')],
                title='Vanilla DQN',
                file='lunarlander_multi_basic_dqn.png')

    dqn_w_exp_returns = []

    for i in range(n_trials):
        dqn_w_exp_returns.append(dqn_with_experience(env, n_episodes))

    data.report([(dqn_w_exp_returns, 'b', 'DQN w/ ER'),
                 (baseline_returns, 'r', 'Baseline')],
                title='Experience Replay',
                file='lunarlander_multi_er_dqn.png')

    dqn_w_fixed_targets_returns = []

    for i in range(n_trials):
        dqn_w_fixed_targets_returns.append(
            dqn_with_fixed_targets(env, n_episodes))

    data.report([(dqn_w_fixed_targets_returns, 'b', 'DQN w/ Fixed-Q'),
                 (baseline_returns, 'r', 'Baseline')],
                title='Fixed-Q Targets',
                file='lunarlander_multi_fixedq_dqn.png')

    dqn_w_per_returns = []

    for i in range(n_trials):
        dqn_w_per_returns.append(
            dqn_with_prioritized_experience(env, n_episodes))

    data.report([(dqn_w_per_returns, 'b', 'DQN w/ PER'),
                 (baseline_returns, 'r', 'Baseline')],
                title='Prioritized ER',
                file='lunarlander_multi_per_dqn.png')

    # Plot all the variations
    data.report([(basic_dqn_returns, 'b', 'Basic DQN'),
                 (dqn_w_exp_returns, 'g', 'DQN w/ ER'),
                 (dqn_w_fixed_targets_returns, 'm', 'DQN w/ Fixed-Q'),
                 (dqn_w_per_returns, 'c', 'DQN w/ PER'),
                 (baseline_returns, 'r', 'Baseline')],
                title='All DQN Variants',
                file='lunarlander_multi_all_dqn.png')