def load_checkpoint(file_dir, i_epoch, layer_sizes, input_size, device='cuda'): checkpoint = torch.load(os.path.join(file_dir, "ckpt_eps%d.pt" % i_epoch), map_location=device) policy_net = PolicyNet(layer_sizes).to(device) policy_net.load_state_dict(checkpoint["policy_net"]) policy_net.train() policy_lr = checkpoint["policy_lr"] policynet_optim = optim.Adam(policy_net.parameters(), lr=policy_lr) policynet_optim.load_state_dict(checkpoint["policynet_optim"]) checkpoint.pop("policy_net") checkpoint.pop("policynet_optim") checkpoint.pop("i_epoch") checkpoint.pop("policy_lr") return policy_net, policynet_optim, checkpoint
# Turn on pyplot's interactive mode # VERY IMPORTANT because otherwise training stats plot will hault plt.ion() # Create OpenAI gym environment env = gym.make(env_name) if is_unwrapped: env = env.unwrapped # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Current usable device is: ", device) # Create the model policy_net = PolicyNet(layer_sizes).to(device) # Policy network # Set up memory memory = Memory(capacity, GAMMA, LAMBDA, device) # Set up optimizer policynet_optimizer = optim.Adam(policy_net.parameters()) ################################################################### # Start training # Dictionary for extra training information to save to checkpoints training_info = { "epoch mean durations": [], "epoch mean rewards": [], "max reward achieved": 0,