def solve(): env = EnvironmentWrapper(gym.make('LunarLander-v2')) n_episodes = [] for i in range(1): returns = dqn_with_prioritized_experience(env, n_episodes=None) n_episodes.append(len(returns)) if i == 0: baseline_returns = data_exploration(env, n_episodes=len(n_episodes)) data.report([(returns, 'b', 'Solution'), (baseline_returns, 'r', 'Baseline')], title='Solution', file='lunarlander_solve_dqn.png') n_episodes = np.array(n_episodes) print('LunarLander solved!') print(' Median: {} episodes'.format(np.median(n_episodes))) print(' Mean: {} episodes'.format(np.mean(n_episodes))) print(' Std: {} episodes'.format(np.std(n_episodes))) print(' Min: {} episodes'.format(np.min(n_episodes))) print(' Max: {} episodes'.format(np.max(n_episodes))) print(' % diverged: {}'.format( len(n_episodes[n_episodes >= 2000]) / float(len(n_episodes))))
def solve(): env = EnvironmentWrapper(gym.make('CartPole-v1')) n_episodes = [] for i in range(10): returns = dqn_with_fixed_targets(env, n_episodes=None) n_episodes.append(len(returns)) if i == 0: baseline_returns = data_exploration(env, n_episodes=len(n_episodes)) data.report([(returns, 'b', 'Solution'), (baseline_returns, 'r', 'Baseline')], title='Solution', file='cartpole_solve_fixedq_dqn.png') n_episodes = np.array(n_episodes) print('CartPole solved!') print(' Median: {} episodes'.format(np.median(n_episodes))) print(' Mean: {} episodes'.format(np.mean(n_episodes))) print(' Std: {} episodes'.format(np.std(n_episodes))) print(' Min: {} episodes'.format(np.min(n_episodes))) print(' Max: {} episodes'.format(np.max(n_episodes))) print(' % diverged: {}'.format( len(n_episodes[n_episodes >= 2000]) / float(len(n_episodes)))) ''' Sample output:
def run_single_trials(): env = EnvironmentWrapper(gym.make('CartPole-v1')) n_episodes = 500 baseline_returns = data_exploration(env, n_episodes) data.report([(baseline_returns, 'b', 'Baseline')], title='Random Walk', file='cartpole_single_random_walk.png') basic_dqn_returns = basic_dqn(env, n_episodes) data.report([(basic_dqn_returns, 'b', 'Basic DQN'), (baseline_returns, 'r', 'Baseline')], title='Vanilla DQN', file='cartpole_single_basic_dqn.png') dqn_w_exp_returns = dqn_with_experience(env, n_episodes) data.report([(dqn_w_exp_returns, 'b', 'DQN w/ ER'), (baseline_returns, 'r', 'Baseline')], title='Experience Replay', file='cartpole_single_er_dqn.png') dqn_w_fixed_targets_returns = dqn_with_fixed_targets(env, n_episodes) data.report([(dqn_w_fixed_targets_returns, 'b', 'DQN w/ Fixed-Q'), (baseline_returns, 'r', 'Baseline')], title='Fixed-Q Targets', file='cartpole_single_fixedq_dqn.png') # Plot all the variations data.report([(basic_dqn_returns, 'b', 'Basic DQN'), (dqn_w_exp_returns, 'g', 'DQN w/ ER'), (dqn_w_fixed_targets_returns, 'm', 'DQN w/ Fixed-Q'), (baseline_returns, 'r', 'Baseline')], title='All DQN Variants', file='cartpole_single_all_dqn.png')
def run_multiple_trials(): env = EnvironmentWrapper(gym.make('LunarLander-v2')) n_episodes = 500 n_trials = 10 baseline_returns = [] for i in range(n_trials): baseline_returns.append(data_exploration(env, n_episodes)) data.report([(baseline_returns, 'b', 'Baseline')], title='Random Walk', file='lunarlander_multi_random_walk.png') basic_dqn_returns = [] for i in range(n_trials): basic_dqn_returns.append(basic_dqn(env, n_episodes)) data.report([(basic_dqn_returns, 'b', 'Basic DQN'), (baseline_returns, 'r', 'Baseline')], title='Vanilla DQN', file='lunarlander_multi_basic_dqn.png') dqn_w_exp_returns = [] for i in range(n_trials): dqn_w_exp_returns.append(dqn_with_experience(env, n_episodes)) data.report([(dqn_w_exp_returns, 'b', 'DQN w/ ER'), (baseline_returns, 'r', 'Baseline')], title='Experience Replay', file='lunarlander_multi_er_dqn.png') dqn_w_fixed_targets_returns = [] for i in range(n_trials): dqn_w_fixed_targets_returns.append( dqn_with_fixed_targets(env, n_episodes)) data.report([(dqn_w_fixed_targets_returns, 'b', 'DQN w/ Fixed-Q'), (baseline_returns, 'r', 'Baseline')], title='Fixed-Q Targets', file='lunarlander_multi_fixedq_dqn.png') dqn_w_per_returns = [] for i in range(n_trials): dqn_w_per_returns.append( dqn_with_prioritized_experience(env, n_episodes)) data.report([(dqn_w_per_returns, 'b', 'DQN w/ PER'), (baseline_returns, 'r', 'Baseline')], title='Prioritized ER', file='lunarlander_multi_per_dqn.png') # Plot all the variations data.report([(basic_dqn_returns, 'b', 'Basic DQN'), (dqn_w_exp_returns, 'g', 'DQN w/ ER'), (dqn_w_fixed_targets_returns, 'm', 'DQN w/ Fixed-Q'), (dqn_w_per_returns, 'c', 'DQN w/ PER'), (baseline_returns, 'r', 'Baseline')], title='All DQN Variants', file='lunarlander_multi_all_dqn.png')