num_drones=NUM_DRONES ) action = {i: action_space.sample() for i in range(NUM_DRONES)} start = time.time() policy0 = agent.get_policy("pol0") policy1 = agent.get_policy("pol1") START = time.time() for i in range(0, int(test_env.EPISODE_LEN_SEC * test_env.SIM_FREQ), test_env.AGGR_PHY_STEPS): #### Step the simulation ################################### temp = {} act0 = policy0.compute_actions(obs[0].reshape(1, -1))[0][0] act1 = policy1.compute_actions(obs[1].reshape(1, -1))[0][0] action = {0: act0, 1: act1} obs, reward, done, info = test_env.step(action) #### Printout ############################################## if i%test_env.SIM_FREQ == 0: #setiap 1 detik test_env.render() #### Sync the simulation ################################### if test_env.GUI: sync(i, START, test_env.TIMESTEP) if(done['__all__']): break #### Shut down Ray ######################################### ray.shutdown()
#### Initialize the logger ################################# logger = Logger(logging_freq_hz=int(ARGS.simulation_freq_hz / AGGR_PHY_STEPS), num_drones=ARGS.num_drones) #### Run the simulation #################################### env.TRAINING_PHASE = 12 CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / ARGS.control_freq_hz)) action = {i: np.array([0, 0, 0, 0, 0.1]) for i in range(ARGS.num_drones)} START = time.time() obs = env.reset() return_ = 0 env.reset() for i in range(0, int(ARGS.duration_sec * env.SIM_FREQ), AGGR_PHY_STEPS): obs, reward, done, info = env.step(action) print(obs[0].round(2)) drone0 = env._getDroneStateVector(0) drone1 = env._getDroneStateVector(1) if i % env.SIM_FREQ == 0: # setiap 1 detik # print("Jarak Drone 0 dengan drone lain: {}".format((drone1[0:3] - drone0[0:3]).round(3))) # print("Hasil Normalisasi: {}".format((obs[0][5:]).round(3))) # print("Jarak Drone 0 dengan goal: {}".format((env.DEST_POINT - drone0[0:3]).round(3))) # print("Hasil Normalisasi: {}\n".format((obs[0][2:5]).round(3))) env.render() if ARGS.gui: sync(i, START, env.TIMESTEP) if (done['__all__']): break