def load_checkpoint(file_dir,
                    i_episode,
                    input_size,
                    output_size,
                    device='cuda'):
    checkpoint = torch.load(os.path.join(file_dir,
                                         "ckpt_eps%d.pt" % i_episode))

    policy_net = DQN(input_size, output_size).to(device)
    policy_net.load_state_dict(checkpoint["policy_net"])
    policy_net.train()

    target_net = DQN(input_size, output_size).to(device)
    target_net.load_state_dict(checkpoint["target_net"])
    target_net.eval()

    learning_rate = checkpoint["learning_rate"]

    # optimizer = optim.Adam(policy_net.parameters())
    optimizer = optim.SGD(policy_net.parameters(), lr=learning_rate)
    optimizer.load_state_dict(checkpoint["optimizer"])

    checkpoint.pop("policy_net")
    checkpoint.pop("target_net")
    checkpoint.pop("optimizer")
    checkpoint.pop("i_episode")
    checkpoint.pop("learning_rate")

    return policy_net, target_net, optimizer, checkpoint
Beispiel #2
0
plt.ion()

# Create OpenAI gym environment
env = gym.make(env_name)
if is_unwrapped:
    env = env.unwrapped

# Get device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Current usable device is: ", device)

# Create the models
policy_net = DQN(input_size, output_size).to(device)
target_net = DQN(input_size, output_size).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

# Set up replay memory
memory = ReplayMemory(replaybuffer_size)

# Set up optimizer - Minimal
# optimizer = optim.Adam(policy_net.parameters())
optimizer = optim.SGD(policy_net.parameters(), lr=learning_rate)

###################################################################
# Start training

# Dictionary for extra training information to save to checkpoints
training_info = {
    "memory": memory,
    "episode reward": [],