def load_checkpoint(file_dir,
                    i_episode,
                    input_size,
                    output_size,
                    device='cuda'):
    checkpoint = torch.load(os.path.join(file_dir,
                                         "ckpt_eps%d.pt" % i_episode))

    policy_net = DQN(input_size, output_size).to(device)
    policy_net.load_state_dict(checkpoint["policy_net"])
    policy_net.train()

    target_net = DQN(input_size, output_size).to(device)
    target_net.load_state_dict(checkpoint["target_net"])
    target_net.eval()

    learning_rate = checkpoint["learning_rate"]

    # optimizer = optim.Adam(policy_net.parameters())
    optimizer = optim.SGD(policy_net.parameters(), lr=learning_rate)
    optimizer.load_state_dict(checkpoint["optimizer"])

    checkpoint.pop("policy_net")
    checkpoint.pop("target_net")
    checkpoint.pop("optimizer")
    checkpoint.pop("i_episode")
    checkpoint.pop("learning_rate")

    return policy_net, target_net, optimizer, checkpoint
Beispiel #2
0
# Get device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Current usable device is: ", device)

# Create the models
policy_net = DQN(input_size, output_size).to(device)
target_net = DQN(input_size, output_size).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

# Set up replay memory
memory = ReplayMemory(replaybuffer_size)

# Set up optimizer - Minimal
# optimizer = optim.Adam(policy_net.parameters())
optimizer = optim.SGD(policy_net.parameters(), lr=learning_rate)

###################################################################
# Start training

# Dictionary for extra training information to save to checkpoints
training_info = {
    "memory": memory,
    "episode reward": [],
    "training loss": [],
    "episode loss": [],
    "max reward achieved": 0,
    "past 100 episodes mean reward": 0,
    "max TD loss recorded": 0,
    "max episode loss recorded": 0
}
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200
target_update = 10

# Create the models
policy_net = DQN(input_size, output_size).to(device)
target_net = DQN(input_size, output_size).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

# Set up replay memory
memory = ReplayMemory(10000)

# Set up optimizer
optimizer = optim.Adam(policy_net.parameters())

########################################
# Start training
num_episodes = 500
ckpt_dir = "DDDQN_CartPoleV1_obs_checkpoints/"
save_ckpt_interval = 100

episode_durations = []
episode_loss = []
i_episode = 0

policy_net.train()

while True:
    # Every save_ckpt_interval, Check if there is any checkpoint.