Example #1
0
import dqn.experience_replay
import tensorflow.contrib.layers as layers

env = gym.make('CartPole-v1')
num_steps=200000

# Here we combine the same improvements from Rainbow, but use QR instead of C51
# Note that we are still using a DistributionalQNetwork, but this network uses n as the number of quantiles rather than the number of atoms
# TODO: Do we want to allow noisy_net=False ? Does this make sense or not ?
q_func = nn.DistributionalQNetwork([64], env.action_space.n, n=75, noisy_net=True, dueling=[32])
epsilon_scheduler = dqn.annealing_schedules.Constant(0)
action_selection = dqn.algorithms.EpsilonGreedy(epsilon_scheduler)
loss = dqn.algorithms.QuantileRegressionLoss()
update_target = dqn.algorithms.HardUpdate()
alpha_scheduler = dqn.annealing_schedules.Constant(0.7)
beta_scheduler = dqn.annealing_schedules.Constant(0.5)
memory = dqn.experience_replay.Proportional(capacity=100000, alpha_scheduler=alpha_scheduler, beta_scheduler=beta_scheduler)

agent = DQNAgent(network=q_func,
                 observation_space=env.observation_space,
                 action_space=env.action_space,
                 action_selection=action_selection,
                 loss=loss,
                 update_target=update_target,
                 memory=memory,
                 n_step=3,
                 update_target_network_frequency=100)

agent.load('save/qr_dqn')
agent.run(env, num_timesteps=num_steps, render=True)
Example #2
0
    history_length = args.history_length
    num_actions = args.num_actions

    Q = CNN(state_dim, num_actions, history_length, hidden=256, lr=1e-3)
    Q_target = CNNTargetNetwork(state_dim,
                                num_actions,
                                history_length,
                                hidden=256,
                                lr=1e-3)
    agent = DQNAgent(Q,
                     Q_target,
                     num_actions,
                     discount_factor=0.99,
                     batch_size=64,
                     epsilon=0.05)
    agent.load("./models_carracing/dqn_agent.ckpt")

    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env,
                            agent,
                            deterministic=True,
                            do_training=False,
                            rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
Example #3
0
                                     activation_fn=None)
        return out


env = gym.make('CartPole-v1')

num_steps = 100000
q_func = Network()
epsilon_scheduler = dqn.annealing_schedules.Constant(0)
action_selection = dqn.algorithms.EpsilonGreedy(epsilon_scheduler)
loss = dqn.algorithms.TDErrorLoss(double_q=True, optimizer=None)
update_target = dqn.algorithms.HardUpdate()
alpha_scheduler = dqn.annealing_schedules.Constant(0.7)
beta_scheduler = dqn.annealing_schedules.Constant(0.5)
memory = dqn.experience_replay.Proportional(capacity=50000,
                                            alpha_scheduler=alpha_scheduler,
                                            beta_scheduler=beta_scheduler)

agent = DQNAgent(network=q_func,
                 observation_space=env.observation_space,
                 action_space=env.action_space,
                 action_selection=action_selection,
                 loss=loss,
                 update_target=update_target,
                 memory=memory,
                 n_step=3,
                 update_target_network_frequency=2000)

agent.load('data/test')
agent.run(env, num_timesteps=num_steps, render=True)
Example #4
0
stddev_scheduler = dqn.annealing_schedules.Linear(start=0,
                                                  end=0,
                                                  num_steps=num_steps)
action_selection = dqn.algorithms.GaussianRandomProcess(stddev_scheduler)
loss = dqn.algorithms.NAFLoss(
)  #TODO: ADD IN ALL OPTIONS HERE AND IN OTHER ONES
update_target = dqn.algorithms.SoftUpdate(tau=0.001)
alpha_scheduler = dqn.annealing_schedules.Constant(0.7)
beta_scheduler = dqn.annealing_schedules.Constant(0.5)
memory = dqn.experience_replay.Proportional(capacity=1000000,
                                            alpha_scheduler=alpha_scheduler,
                                            beta_scheduler=beta_scheduler)

agent = DQNAgent(network=q_func,
                 observation_space=env.observation_space,
                 action_space=env.action_space,
                 action_selection=action_selection,
                 loss=loss,
                 update_target=update_target,
                 memory=memory,
                 n_step=1,
                 batch_size=100,
                 discount_factor=0.99,
                 replay_period=1,
                 replays_per_step=5,
                 update_with_replay=True,
                 update_target_network_frequency=1)

agent.load('data/naf')
agent.run(env, num_timesteps=num_steps, render=True)
Example #5
0
from dqn.agent import DQNAgent
from dqn.agent import EPISODES, EPISODE_LENGTH

environment_name = 'CartPole-v1'
environment = gym.make(environment_name)
environment.max_episode_steps = EPISODE_LENGTH

n_actions = environment.action_space.n
n_state_features = environment.observation_space.shape[0]

# Initialize DQN agent
agent = DQNAgent(n_state_features, n_actions, epsilon=0.0)

# Load pre-trained agent
agent.load(f'./models/{environment_name}.h5')

for episode in range(EPISODES):

    state = environment.reset()
    state = np.reshape(state, [1, n_state_features])

    for t in range(EPISODE_LENGTH):

        # Visualize environment
        environment.render()

        # Predict next action using NN Value Function Approximation
        action = agent.get_action(state)

        # Interact with the environment and observe new state and reward
Example #6
0
                                   env.action_space.n,
                                   n=51,
                                   noisy_net=True,
                                   dueling=[32])

# Action selection in Rainbow is done using noisy nets with no epsilon
epsilon_scheduler = dqn.annealing_schedules.Constant(0)
action_selection = dqn.algorithms.EpsilonGreedy(epsilon_scheduler)

loss = dqn.algorithms.CategoricalAlgorithm(double_q=True)
update_target = dqn.algorithms.HardUpdate()

alpha_scheduler = dqn.annealing_schedules.Constant(0.7)
beta_scheduler = dqn.annealing_schedules.Constant(0.5)
memory = dqn.experience_replay.Proportional(capacity=100000,
                                            alpha_scheduler=alpha_scheduler,
                                            beta_scheduler=beta_scheduler)

agent = DQNAgent(network=q_func,
                 observation_space=env.observation_space,
                 action_space=env.action_space,
                 action_selection=action_selection,
                 loss=loss,
                 update_target=update_target,
                 memory=memory,
                 n_step=3,
                 update_target_network_frequency=200)

agent.load('save_test/rainbow')
agent.run(env, num_timesteps=num_steps, render=True)