def load_checkpoint(file_dir, i_episode, input_size, output_size, device='cuda'): checkpoint = torch.load(os.path.join(file_dir, "ckpt_eps%d.pt" % i_episode)) policy_net = DQN(input_size, output_size).to(device) policy_net.load_state_dict(checkpoint["policy_net"]) policy_net.train() target_net = DQN(input_size, output_size).to(device) target_net.load_state_dict(checkpoint["target_net"]) target_net.eval() learning_rate = checkpoint["learning_rate"] # optimizer = optim.Adam(policy_net.parameters()) optimizer = optim.SGD(policy_net.parameters(), lr=learning_rate) optimizer.load_state_dict(checkpoint["optimizer"]) checkpoint.pop("policy_net") checkpoint.pop("target_net") checkpoint.pop("optimizer") checkpoint.pop("i_episode") checkpoint.pop("learning_rate") return policy_net, target_net, optimizer, checkpoint
plt.ion() # Create OpenAI gym environment env = gym.make(env_name) if is_unwrapped: env = env.unwrapped # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Current usable device is: ", device) # Create the models policy_net = DQN(input_size, output_size).to(device) target_net = DQN(input_size, output_size).to(device) target_net.load_state_dict(policy_net.state_dict()) target_net.eval() # Set up replay memory memory = ReplayMemory(replaybuffer_size) # Set up optimizer - Minimal # optimizer = optim.Adam(policy_net.parameters()) optimizer = optim.SGD(policy_net.parameters(), lr=learning_rate) ################################################################### # Start training # Dictionary for extra training information to save to checkpoints training_info = { "memory": memory, "episode reward": [],