device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Current usable device is: ", device)

########################################
# Model hyperparameters
input_size = 4  # Size of state
output_size = 2  # Number of discrete actions
batch_size = 128
GAMMA = 0.999
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200
target_update = 10

# Create the models
policy_net = DQN(input_size, output_size).to(device)
target_net = DQN(input_size, output_size).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

# Set up replay memory
memory = ReplayMemory(10000)

# Set up optimizer
optimizer = optim.Adam(policy_net.parameters())

########################################
# Start training
num_episodes = 500
ckpt_dir = "DDDQN_CartPoleV1_obs_checkpoints/"
save_ckpt_interval = 100
Esempio n. 2
0
# Turn on pyplot's interactive mode
# VERY IMPORTANT because otherwise training stats plot will hault
plt.ion()

# Create OpenAI gym environment
env = gym.make(env_name)
if is_unwrapped:
    env = env.unwrapped

# Get device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Current usable device is: ", device)

# Create the models
policy_net = DQN(input_size, output_size).to(device)
target_net = DQN(input_size, output_size).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

# Set up replay memory
memory = ReplayMemory(replaybuffer_size)

# Set up optimizer - Minimal
# optimizer = optim.Adam(policy_net.parameters())
optimizer = optim.SGD(policy_net.parameters(), lr=learning_rate)

###################################################################
# Start training

# Dictionary for extra training information to save to checkpoints
Esempio n. 3
0
def load_checkpoint(file_dir,
                    i_episode,
                    input_size,
                    output_size,
                    device='cuda'):
    checkpoint = torch.load(os.path.join(file_dir,
                                         "ckpt_eps%d.pt" % i_episode))

    policy_net = DQN(input_size, output_size).to(device)
    policy_net.load_state_dict(checkpoint["policy_net"])
    policy_net.train()

    target_net = DQN(input_size, output_size).to(device)
    target_net.load_state_dict(checkpoint["target_net"])
    target_net.eval()

    learning_rate = checkpoint["learning_rate"]

    # optimizer = optim.Adam(policy_net.parameters())
    optimizer = optim.SGD(policy_net.parameters(), lr=learning_rate)
    optimizer.load_state_dict(checkpoint["optimizer"])

    checkpoint.pop("policy_net")
    checkpoint.pop("target_net")
    checkpoint.pop("optimizer")
    checkpoint.pop("i_episode")
    checkpoint.pop("learning_rate")

    return policy_net, target_net, optimizer, checkpoint