Ejemplo n.º 1
0
    ARGS = parser.parse_args()

    #### Check the environment's spaces ################################################################
    env = gym.make("takeoff-aviary-v0")
    print("[INFO] Action space:", env.action_space)
    print("[INFO] Observation space:", env.observation_space)
    check_env(env, warn=True, skip_render_check=True)

    #### Train the model ###############################################################################
    if not ARGS.rllib:
        model = A2C(MlpPolicy, env, verbose=1)
        model.learn(total_timesteps=1000)  # e.g. 500000
    else:
        ray.shutdown()
        ray.init(ignore_reinit_error=True)
        register_env("takeoff-aviary-v0", lambda _: TakeoffAviary())
        config = ppo.DEFAULT_CONFIG.copy()
        config["num_workers"] = 2
        config["env"] = "takeoff-aviary-v0"
        agent = ppo.PPOTrainer(config)
        for i in range(10):  # e.g. 100
            results = agent.train()
            print("[INFO] {:d}: episode_reward max {:f} min {:f} mean {:f}".format(i, \
                    results["episode_reward_max"], results["episode_reward_min"], results["episode_reward_mean"]))
        policy = agent.get_policy()
        print(policy.model.base_model.summary())
        ray.shutdown()

    #### Show (and record a video of) the model's performance ##########################################
    env = TakeoffAviary(gui=True, record=False)
    logger = Logger(logging_freq_hz=int(env.SIM_FREQ / env.AGGR_PHY_STEPS),