def main():

    parser = argparse.ArgumentParser(
        description="Run Extended Q-Learning with given config")
    parser.add_argument(
        "-c",
        "--config",
        type=str,
        metavar="",
        required=True,
        help="Config file name - file must be available as .json in ./configs")

    args = parser.parse_args()

    with open(os.path.join(".", "configs", args.config), "r") as read_file:
        config = json.load(read_file)

    np.random.seed(101)
    agent = CrossEntropyAgent(config)

    if config["is_training"]:
        total_reward = session.train(agent, config)
        agent.save()
        helper.plot_scores(total_reward)
    else:
        env = gym.make(config["env_name"])
        agent.load()
        session.evaluate(agent, env, config)
        env.env.close()
        env.close()
def main():
    parser = argparse.ArgumentParser(description="Run Extended Q-Learning with given config")
    parser.add_argument("-c",
                        "--config",
                        type=str,
                        metavar="",
                        required=True,
                        help="Config file name - file must be available as .json in ./configs")

    args = parser.parse_args()

    # load config files
    with open(os.path.join(".", "configs", args.config), "r") as read_file:
        config = json.load(read_file)

    env = UnityEnvironment(file_name=os.path.join(*config["env_path"]))
    normalizer = OnlineNormalizer(config["network"]["observation_size"])

    if config["run_training"]:
        elite_net = session.train(env, normalizer, config)
        checkpoint_dir = os.path.join(".", *config["checkpoint_dir"], config["env_name"])
        utils.save_state_dict(os.path.join(checkpoint_dir), elite_net.state_dict())
    else:
        trained_net = getattr(network, config["network"]["type"])(config["network"]).to(torch.device(config["device"]))
        checkpoint_dir = os.path.join(".", *config["checkpoint_dir"], config["env_name"])
        trained_net.load_state_dict(utils.load_latest_available_state_dict(os.path.join(checkpoint_dir, "*")))
        session.evaluate(trained_net, env, normalizer, config, num_test_runs=50)

    env.close()
Exemple #3
0
def main():
    parser = argparse.ArgumentParser(
        description="Run Extended Q-Learning with given config")
    parser.add_argument(
        "-c",
        "--config",
        type=str,
        metavar="",
        required=True,
        help="Config file name - file must be available as .json in ./configs")

    args = parser.parse_args()

    # load config files
    with open(os.path.join(".", "configs", args.config), "r") as read_file:
        config = json.load(read_file)

    env = UnityEnvironment(file_name=os.path.join(*config["env_path"]))

    noise = OrnsteinUhlenbeckNoise(config["n_actions"], config["mu"],
                                   config["theta"], config["sigma"],
                                   config["seed"])
    replay_buffer = ReplayBuffer(config["buffer_size"], config["device"],
                                 config["seed"])

    agent = DDPGAgent(config, noise, replay_buffer)

    if config["run_training"]:
        session.train(agent, env, config)
        checkpoint_dir = os.path.join(".", *config["checkpoint_dir"],
                                      config["env_name"])
        utils.save_state_dict(os.path.join(checkpoint_dir, "actor"),
                              agent.actor.state_dict())
        utils.save_state_dict(os.path.join(checkpoint_dir, "critic"),
                              agent.critic.state_dict())
    else:
        checkpoint_dir = os.path.join(".", *config["checkpoint_dir"],
                                      config["env_name"])
        agent.actor.load_state_dict(
            utils.load_latest_available_state_dict(
                os.path.join(checkpoint_dir, "actor", "*")))
        agent.critic.load_state_dict(
            utils.load_latest_available_state_dict(
                os.path.join(checkpoint_dir, "critic", "*")))
        session.evaluate(agent, env, num_test_runs=1)

    env.close()
Exemple #4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Run twin delayed deep deterministic policy gradient with given config"
    )
    parser.add_argument(
        "-c",
        "--config",
        type=str,
        metavar="",
        required=True,
        help="Config file name - file must be available as .json in ./configs")

    args = parser.parse_args()

    # load config files
    with open(os.path.join(".", "configs", args.config), "r") as read_file:
        config = json.load(read_file)

    env = gym.make(config["env_name"])
    env.seed(config["seed"])
    torch.manual_seed(config["seed"])
    np.random.seed(config["seed"])
    random.seed(config["seed"])

    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])

    agent = TwinDelayedDDPG(config, state_dim, action_dim, max_action)

    if config["run_training"]:
        session.train(agent, env, config)
        agent.save()
    else:
        if config["create_video"]:
            env = utils.set_up_monitoring(env, config)
        agent.load()
        session.evaluate(agent, env, config["eval_episodes"])
    env.close()
Exemple #5
0
def main():

    parser = argparse.ArgumentParser(
        description="Run Extended Q-Learning with given config")
    parser.add_argument(
        "-c",
        "--config",
        type=str,
        metavar="",
        required=True,
        help="Config file name - file must be available as .json in ./configs")

    args = parser.parse_args()

    with open(os.path.join(".", "configs", args.config), "r") as read_file:
        config = json.load(read_file)

    batch_handler = BatchHandler(config)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    generator_noise_input = torch.FloatTensor(16, 100, 1,
                                              1).normal_(0, 1).to(device)
    atari_gan = AtariGan(config)
    session.train(atari_gan, generator_noise_input, batch_handler, config)