def load_checkpoint(file_dir, i_epoch, layer_sizes, input_size, device='cuda'):
    checkpoint = torch.load(os.path.join(file_dir, "ckpt_eps%d.pt" % i_epoch), map_location=device)

    policy_net = PolicyNet(layer_sizes).to(device)
    policy_net.load_state_dict(checkpoint["policy_net"])
    policy_net.train()

    policy_lr = checkpoint["policy_lr"]

    policynet_optim = optim.Adam(policy_net.parameters(), lr=policy_lr)
    policynet_optim.load_state_dict(checkpoint["policynet_optim"])

    checkpoint.pop("policy_net")
    checkpoint.pop("policynet_optim")
    checkpoint.pop("i_epoch")
    checkpoint.pop("policy_lr")

    return policy_net, policynet_optim, checkpoint
    episode_rewards = []

    for i_episode in range(batch_size):

        # Keep track of the running reward
        running_reward = 0

        # Initialize the environment and state
        current_state = env.reset()

        # Store the first state and value estimate in memory
        memory.set_initial_state(current_state)

        for t in count():
            # Make sure that policy net and value net is in training mode
            policy_net.train()

            # Sample an action given the current state
            action, log_prob = policy_net(
                torch.tensor([current_state], device=device))
            log_prob = log_prob.squeeze()

            # Interact with the environment
            next_state, reward, done, _ = env.step(action.item())
            running_reward += reward

            # Render this episode
            if render and (render_each_episode or
                           (not finished_rendering_this_epoch)):
                env.render()