import dqn.experience_replay import tensorflow.contrib.layers as layers env = gym.make('CartPole-v1') num_steps=200000 # Here we combine the same improvements from Rainbow, but use QR instead of C51 # Note that we are still using a DistributionalQNetwork, but this network uses n as the number of quantiles rather than the number of atoms # TODO: Do we want to allow noisy_net=False ? Does this make sense or not ? q_func = nn.DistributionalQNetwork([64], env.action_space.n, n=75, noisy_net=True, dueling=[32]) epsilon_scheduler = dqn.annealing_schedules.Constant(0) action_selection = dqn.algorithms.EpsilonGreedy(epsilon_scheduler) loss = dqn.algorithms.QuantileRegressionLoss() update_target = dqn.algorithms.HardUpdate() alpha_scheduler = dqn.annealing_schedules.Constant(0.7) beta_scheduler = dqn.annealing_schedules.Constant(0.5) memory = dqn.experience_replay.Proportional(capacity=100000, alpha_scheduler=alpha_scheduler, beta_scheduler=beta_scheduler) agent = DQNAgent(network=q_func, observation_space=env.observation_space, action_space=env.action_space, action_selection=action_selection, loss=loss, update_target=update_target, memory=memory, n_step=3, update_target_network_frequency=100) agent.load('save/qr_dqn') agent.run(env, num_timesteps=num_steps, render=True)
history_length = args.history_length num_actions = args.num_actions Q = CNN(state_dim, num_actions, history_length, hidden=256, lr=1e-3) Q_target = CNNTargetNetwork(state_dim, num_actions, history_length, hidden=256, lr=1e-3) agent = DQNAgent(Q, Q_target, num_actions, discount_factor=0.99, batch_size=64, epsilon=0.05) agent.load("./models_carracing/dqn_agent.ckpt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards
activation_fn=None) return out env = gym.make('CartPole-v1') num_steps = 100000 q_func = Network() epsilon_scheduler = dqn.annealing_schedules.Constant(0) action_selection = dqn.algorithms.EpsilonGreedy(epsilon_scheduler) loss = dqn.algorithms.TDErrorLoss(double_q=True, optimizer=None) update_target = dqn.algorithms.HardUpdate() alpha_scheduler = dqn.annealing_schedules.Constant(0.7) beta_scheduler = dqn.annealing_schedules.Constant(0.5) memory = dqn.experience_replay.Proportional(capacity=50000, alpha_scheduler=alpha_scheduler, beta_scheduler=beta_scheduler) agent = DQNAgent(network=q_func, observation_space=env.observation_space, action_space=env.action_space, action_selection=action_selection, loss=loss, update_target=update_target, memory=memory, n_step=3, update_target_network_frequency=2000) agent.load('data/test') agent.run(env, num_timesteps=num_steps, render=True)
stddev_scheduler = dqn.annealing_schedules.Linear(start=0, end=0, num_steps=num_steps) action_selection = dqn.algorithms.GaussianRandomProcess(stddev_scheduler) loss = dqn.algorithms.NAFLoss( ) #TODO: ADD IN ALL OPTIONS HERE AND IN OTHER ONES update_target = dqn.algorithms.SoftUpdate(tau=0.001) alpha_scheduler = dqn.annealing_schedules.Constant(0.7) beta_scheduler = dqn.annealing_schedules.Constant(0.5) memory = dqn.experience_replay.Proportional(capacity=1000000, alpha_scheduler=alpha_scheduler, beta_scheduler=beta_scheduler) agent = DQNAgent(network=q_func, observation_space=env.observation_space, action_space=env.action_space, action_selection=action_selection, loss=loss, update_target=update_target, memory=memory, n_step=1, batch_size=100, discount_factor=0.99, replay_period=1, replays_per_step=5, update_with_replay=True, update_target_network_frequency=1) agent.load('data/naf') agent.run(env, num_timesteps=num_steps, render=True)
from dqn.agent import DQNAgent from dqn.agent import EPISODES, EPISODE_LENGTH environment_name = 'CartPole-v1' environment = gym.make(environment_name) environment.max_episode_steps = EPISODE_LENGTH n_actions = environment.action_space.n n_state_features = environment.observation_space.shape[0] # Initialize DQN agent agent = DQNAgent(n_state_features, n_actions, epsilon=0.0) # Load pre-trained agent agent.load(f'./models/{environment_name}.h5') for episode in range(EPISODES): state = environment.reset() state = np.reshape(state, [1, n_state_features]) for t in range(EPISODE_LENGTH): # Visualize environment environment.render() # Predict next action using NN Value Function Approximation action = agent.get_action(state) # Interact with the environment and observe new state and reward
env.action_space.n, n=51, noisy_net=True, dueling=[32]) # Action selection in Rainbow is done using noisy nets with no epsilon epsilon_scheduler = dqn.annealing_schedules.Constant(0) action_selection = dqn.algorithms.EpsilonGreedy(epsilon_scheduler) loss = dqn.algorithms.CategoricalAlgorithm(double_q=True) update_target = dqn.algorithms.HardUpdate() alpha_scheduler = dqn.annealing_schedules.Constant(0.7) beta_scheduler = dqn.annealing_schedules.Constant(0.5) memory = dqn.experience_replay.Proportional(capacity=100000, alpha_scheduler=alpha_scheduler, beta_scheduler=beta_scheduler) agent = DQNAgent(network=q_func, observation_space=env.observation_space, action_space=env.action_space, action_selection=action_selection, loss=loss, update_target=update_target, memory=memory, n_step=3, update_target_network_frequency=200) agent.load('save_test/rainbow') agent.run(env, num_timesteps=num_steps, render=True)