Ejemplo n.º 1
0
        action = {i: np.array([0, 0, 0, 0]) for i in range(NUM_DRONES)}
    elif ACT==ActionType.PID:
         action = {i: np.array([0, 0, 0]) for i in range(NUM_DRONES)}
    else:
        print("[ERROR] unknown ActionType")
        exit()
    start = time.time()
    for i in range(6*int(test_env.SIM_FREQ/test_env.AGGR_PHY_STEPS)): # Up to 6''
        #### Deploy the policies ###################################
        temp = {}
        temp[0] = policy0.compute_single_action(np.hstack([action[1], obs[1], obs[0]])) # Counterintuitive order, check params.json
        temp[1] = policy1.compute_single_action(np.hstack([action[0], obs[0], obs[1]]))
        action = {0: temp[0][0], 1: temp[1][0]}
        obs, reward, done, info = test_env.step(action)
        test_env.render()
        if OBS==ObservationType.KIN: 
            for j in range(NUM_DRONES):
                logger.log(drone=j,
                           timestamp=i/test_env.SIM_FREQ,
                           state= np.hstack([obs[j][0:3], np.zeros(4), obs[j][3:15], np.resize(action[j], (4))]),
                           control=np.zeros(12)
                           )
        sync(np.floor(i*test_env.AGGR_PHY_STEPS), start, test_env.TIMESTEP)
        # if done["__all__"]: obs = test_env.reset() # OPTIONAL EPISODE HALT
    test_env.close()
    logger.save_as_csv("ma") # Optional CSV save
    logger.plot()

    #### Shut down Ray #########################################
    ray.shutdown()
Ejemplo n.º 2
0
    print(policy1.model.action_model)
    print(policy1.model.value_model)

    #### Create test environment ########################################################################
    env = FlockAviary(num_drones=ARGS.num_drones,
                      gui=True,
                      record=True,
                      obstacles=True)
    obs = env.reset()
    action = {i: np.array([0, 0, 0, 0]) for i in range(ARGS.num_drones)}
    start = time.time()
    for i in range(10 * env.SIM_FREQ):

        #### Deploy the policies ###########################################################################
        # print("Debug Obs", obs)
        temp = {}
        temp[0] = policy0.compute_single_action(
            np.hstack([obs[0], obs[1], action[1]]))
        temp[1] = policy1.compute_single_action(
            np.hstack([obs[1], obs[0], action[0]]))
        # print("Debug Act", temp)
        action = {0: temp[0][0], 1: temp[1][0]}
        obs, reward, done, info = env.step(action)
        env.render()
        sync(i, start, env.TIMESTEP)
        if done["__all__"]: obs = env.reset()
    env.close()

    #### Shut down Ray #################################################################################
    ray.shutdown()