collision_time=ARGS.collision_time, ) #### Obtain the PyBullet Client ID from the environment #### PYB_CLIENT = env.getPyBulletClient() DRONE_IDS = env.getDroneIds() check_env(env, warn=True, skip_render_check=True) #### Compute number of control steps in the simlation ###### PERIOD = ARGS.duration_sec NUM_WP = ARGS.control_freq_hz * PERIOD #### Initialize the logger ################################# logger = Logger(logging_freq_hz=int(ARGS.simulation_freq_hz / AGGR_PHY_STEPS), num_drones=ARGS.num_drones) #### Run the simulation #################################### CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / ARGS.control_freq_hz)) START = time.time() #Start the model learning #policy_kwargs = dict(net_arch=[dict(pi=[256, 256, 256, 256], qf=[256, 256, 256, 256])]) #policy_kwargs = dict(net_arch=dict(pi=[256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256], qf=[256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256])) #For TD3, SAC, DDPG policy_kwargs = dict(net_arch=[256, 256, 256, 256]) model = DQN(MlpPolicy, env, verbose=1, tensorboard_log="./dqn_drone_tensorboard2/",
TARGET_POS = np.zeros((NUM_WP, 3)) for i in range(NUM_WP): TARGET_POS[i, :] = R * np.cos( (i / NUM_WP) * (2 * np.pi) + np.pi / 2) + INIT_XYZS[0, 0], R * np.sin( (i / NUM_WP) * (2 * np.pi) + np.pi / 2) - R + INIT_XYZS[0, 1], INIT_XYZS[0, 2] wp_counters = np.array( [int((i * NUM_WP / 6) % NUM_WP) for i in range(NUM_DRONES)]) #### Initialize the logger ######################################################################### if LOG: logger = Logger(logging_freq_hz=int(SIMULATION_FREQ_HZ / AGGR_PHY_STEPS), num_drones=NUM_DRONES) #### Initialize the controllers #################################################################### ctrl = [DSLPIDControl(env) for i in range(NUM_DRONES)] #### Debug environment used to print out the MARL's problem obs, reward and done ################### if NUM_DRONES > 1 and DEBUG_MARL: debug_env = MARLFlockAviary(drone_model=DRONE, num_drones=NUM_DRONES, initial_xyzs=INIT_XYZS, physics=PHYSICS, visibility_radius=10, \ freq=SIMULATION_FREQ_HZ, aggregate_phy_steps=AGGR_PHY_STEPS, gui=False, record=False, obstacles=True) #### Run the simulation ############################################################################ CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / CONTROL_FREQ_HZ)) action = {str(i): np.array([0, 0, 0, 0]) for i in range(NUM_DRONES)} START = time.time() temp_action = {}
#### Initialize the simulation ##################################################################### INIT_XYZS = np.array([[.5, 0, 1], [-.5, 0, .5]]) env = CtrlAviary(drone_model=DRONE, num_drones=NUM_DRONES, initial_xyzs=INIT_XYZS, physics=PHYSICS, \ visibility_radius=10, freq=SIMULATION_FREQ_HZ, gui=GUI, record=RECORD_VIDEO, obstacles=True) #### Initialize the trajectories ################################################################### PERIOD = 10 NUM_WP = CONTROL_FREQ_HZ * PERIOD TARGET_POS = np.zeros((NUM_WP, 2)) for i in range(NUM_WP): TARGET_POS[i, :] = [0.5 * np.cos(2 * np.pi * (i / NUM_WP)), 0] wp_counters = np.array([0, int(NUM_WP / 2)]) #### Initialize the logger ######################################################################### logger = Logger(logging_freq_hz=SIMULATION_FREQ_HZ, num_drones=NUM_DRONES, duration_sec=DURATION_SEC) #### Initialize the controllers #################################################################### ctrl = [DSLPIDControl(env) for i in range(NUM_DRONES)] #### Run the simulation ############################################################################ CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / CONTROL_FREQ_HZ)) action = {str(i): np.array([0, 0, 0, 0]) for i in range(NUM_DRONES)} START = time.time() temp_action = {} for i in range(DURATION_SEC * env.SIM_FREQ): #### Step the simulation ########################################################################### obs, reward, done, info = env.step(action)
initial_rpys=INIT_RPYS, physics=ARGS.physics, neighbourhood_radius=10, freq=ARGS.simulation_freq_hz, aggregate_phy_steps=AGGR_PHY_STEPS, gui=ARGS.gui, record=ARGS.record_video, obstacles=ARGS.obstacles, user_debug_gui=ARGS.user_debug_gui) #### Obtain the PyBullet Client ID from the environment #### PYB_CLIENT = env.getPyBulletClient() #### Initialize the logger ################################# logger = Logger(logging_freq_hz=int(ARGS.simulation_freq_hz / AGGR_PHY_STEPS), num_drones=ARGS.num_drones) #### Initialize the controllers ############################ if ARGS.drone in [DroneModel.CF2X, DroneModel.CF2P]: ctrl = [ DSLPIDControl(drone_model=ARGS.drone) for i in range(ARGS.num_drones) ] elif ARGS.drone in [DroneModel.HB]: ctrl = [ SimplePIDControl(drone_model=ARGS.drone) for i in range(ARGS.num_drones) ] #### Run the simulation ####################################
"""bool: Whether to save a video under /files/videos. Requires ffmpeg""" if __name__ == "__main__": #### Create the ENVironment ################################ ENV = CtrlAviary(num_drones=3, drone_model=DroneModel.CF2P, initial_xyzs=np.array([[.0, .0, .15], [-.3, .0, .15], [.3, .0, .15]]), gui=GUI, record=RECORD) PYB_CLIENT = ENV.getPyBulletClient() #### Initialize the LOGGER ################################# LOGGER = Logger( logging_freq_hz=ENV.SIM_FREQ, num_drones=3, ) #### Initialize the CONTROLLERS ############################ CTRL_0 = HW2Control(env=ENV, control_type=0) CTRL_1 = HW2Control(env=ENV, control_type=1) CTRL_2 = HW2Control(env=ENV, control_type=2) #### Initialize the ACTION ################################# ACTION = {} OBS = ENV.reset() STATE = OBS["0"]["state"] ACTION["0"] = CTRL_0.compute_control(current_position=STATE[0:3], current_velocity=STATE[10:13], current_rpy=STATE[7:10], target_position=STATE[0:3],
physics=Physics.PYB_GND, # physics=Physics.PYB, # For comparison neighbourhood_radius=10, freq=ARGS.simulation_freq_hz, aggregate_phy_steps=AGGR_PHY_STEPS, gui=ARGS.gui, record=ARGS.record_video, obstacles=ARGS.obstacles, user_debug_gui=ARGS.user_debug_gui) #### Obtain the PyBullet Client ID from the environment #### PYB_CLIENT = env.getPyBulletClient() #### Initialize the logger ################################# logger = Logger(logging_freq_hz=int(ARGS.simulation_freq_hz / AGGR_PHY_STEPS), num_drones=1) #### Initialize the controller ############################# ctrl = DSLPIDControl(drone_model=DroneModel.CF2X) #### Run the simulation #################################### CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / ARGS.control_freq_hz)) action = {"0": np.array([0, 0, 0, 0])} START = time.time() for i in range(0, int(ARGS.duration_sec * env.SIM_FREQ), AGGR_PHY_STEPS): #### Make it rain rubber ducks ############################# # if i/env.SIM_FREQ>5 and i%10==0 and i/env.SIM_FREQ<10: p.loadURDF("duck_vhacd.urdf", [0+random.gauss(0, 0.3),-0.5+random.gauss(0, 0.3),3], p.getQuaternionFromEuler([random.randint(0,360),random.randint(0,360),random.randint(0,360)]), physicsClientId=PYB_CLIENT) #### Step the simulation ###################################
#### Initialize a circular trajectory ############################################################## PERIOD = 10 NUM_WP = ARGS.control_freq_hz * PERIOD TARGET_POS = np.zeros((NUM_WP, 3)) for i in range(NUM_WP): TARGET_POS[i, :] = R * np.cos( (i / NUM_WP) * (2 * np.pi) + np.pi / 2) + INIT_XYZS[0, 0], R * np.sin( (i / NUM_WP) * (2 * np.pi) + np.pi / 2) - R + INIT_XYZS[0, 1], INIT_XYZS[0, 2] wp_counters = np.array( [int((i * NUM_WP / 6) % NUM_WP) for i in range(ARGS.num_drones)]) #### Initialize the logger ######################################################################### logger = Logger(logging_freq_hz=int(ARGS.simulation_freq_hz / AGGR_PHY_STEPS), num_drones=ARGS.num_drones) #### Initialize the controllers #################################################################### ctrl = [DSLPIDControl(env) for i in range(ARGS.num_drones)] # ctrl = [SimplePIDControl(env) for i in range(ARGS.num_drones)] #### Run the simulation ############################################################################ CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / ARGS.control_freq_hz)) action = {str(i): np.array([0, 0, 0, 0]) for i in range(ARGS.num_drones)} START = time.time() for i in range(0, int(ARGS.duration_sec * env.SIM_FREQ), AGGR_PHY_STEPS): #### Make it rain rubber ducks ##################################################################### # if i/env.SIM_FREQ>5 and i%10==0 and i/env.SIM_FREQ<10: p.loadURDF("duck_vhacd.urdf", [0+random.gauss(0, 0.3),-0.5+random.gauss(0, 0.3),3], p.getQuaternionFromEuler([random.randint(0,360),random.randint(0,360),random.randint(0,360)]), physicsClientId=PYB_CLIENT)
elif ARGS.exp.split("-")[1] == 'meetup': test_env = MeetupAviary( num_drones=NUM_DRONES, aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=OBS, act=ACT, gui=True, record=True) else: print("[ERROR] environment not yet implemented") exit() #### Show, record a video, and log the model's performance # obs = test_env.reset() logger = Logger(logging_freq_hz=int(test_env.SIM_FREQ / test_env.AGGR_PHY_STEPS), num_drones=NUM_DRONES) if ACT in [ ActionType.ONE_D_RPM, ActionType.ONE_D_DYN, ActionType.ONE_D_PID ]: action = {i: np.array([0]) for i in range(NUM_DRONES)} elif ACT in [ActionType.RPM, ActionType.DYN, ActionType.VEL]: action = {i: np.array([0, 0, 0, 0]) for i in range(NUM_DRONES)} elif ACT == ActionType.PID: action = {i: np.array([0, 0, 0]) for i in range(NUM_DRONES)} else: print("[ERROR] unknown ActionType") exit() start = time.time() for i in range( 6 * int(test_env.SIM_FREQ / test_env.AGGR_PHY_STEPS)): # Up to 6''
#### Create test environment ############################### test_env = PayloadCoop(num_drones=NUM_DRONES, aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=OBS, act=ACT, gui=True, record=True ) for ep in range(10): #### Show, record a video, and log the model's performance # test_env.set_phase(12) obs = test_env.reset() logger = Logger(logging_freq_hz=int(test_env.SIM_FREQ/test_env.AGGR_PHY_STEPS), num_drones=NUM_DRONES ) action = {i: action_space.sample() for i in range(NUM_DRONES)} start = time.time() policy0 = agent.get_policy("pol0") policy1 = agent.get_policy("pol1") START = time.time() for i in range(0, int(test_env.EPISODE_LEN_SEC * test_env.SIM_FREQ), test_env.AGGR_PHY_STEPS): #### Step the simulation ################################### temp = {} act0 = policy0.compute_actions(obs[0].reshape(1, -1))[0][0] act1 = policy1.compute_actions(obs[1].reshape(1, -1))[0][0] action = {0: act0, 1: act1} obs, reward, done, info = test_env.step(action)
physics=ARGS.physics, neighbourhood_radius=10, freq=ARGS.simulation_freq_hz, aggregate_phy_steps=AGGR_PHY_STEPS, gui=ARGS.gui, record=ARGS.record_video, obstacles=ARGS.obstacles, user_debug_gui=ARGS.user_debug_gui ) #### Obtain the PyBullet Client ID from the environment #### PYB_CLIENT = env.getPyBulletClient() #### Initialize the logger ################################# logger = Logger(logging_freq_hz=int(ARGS.simulation_freq_hz/AGGR_PHY_STEPS), num_drones=ARGS.num_drones ) #### Initialize the controllers ############################ if ARGS.drone in [DroneModel.CF2X, DroneModel.CF2P]: ctrl = [DSLPIDControl(drone_model=ARGS.drone) for i in range(ARGS.num_drones)] elif ARGS.drone in [DroneModel.HB]: ctrl = [SimplePIDControl(drone_model=ARGS.drone) for i in range(ARGS.num_drones)] #### Run the simulation #################################### CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ/ARGS.control_freq_hz)) action = {str(i): np.array([0,0,0,0]) for i in range(ARGS.num_drones)} START = time.time() for i in range(0, int(ARGS.duration_sec*env.SIM_FREQ), AGGR_PHY_STEPS): #### Make it rain rubber ducks #############################
config = pickle.load(open(ARGS.params, "rb")) agent = ppo.PPOTrainer(config, env="shoot-and-defend-v0") agent.restore(ARGS.checkpoint) print("=====================================") print(agent.get_policy("shooter").model) print("=====================================") print(agent.get_policy("defender").model) defender_policy = agent.get_policy("defender") shooter_policy = agent.get_policy("shooter") #### Obtain the PyBullet Client ID from the environment #### PYB_CLIENT = env.getPyBulletClient() #### Initialize the logger ################################# logger = Logger(logging_freq_hz=int(ARGS.simulation_freq_hz / AGGR_PHY_STEPS), num_drones=num_drones) #### Initialize the controllers ############################ if ARGS.drone in [DroneModel.CF2X, DroneModel.CF2P]: ctrl = [ DSLPIDControl(drone_model=ARGS.drone) for i in range(num_drones) ] elif ARGS.drone in [DroneModel.HB]: ctrl = [ SimplePIDControl(drone_model=ARGS.drone) for i in range(num_drones) ] #### Run the simulation #################################### CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / ARGS.control_freq_hz)) # Action of [0, 0, 0, 0] puts the drone into a hover position
#### Compute number of control steps in the simlation ###### PERIOD = ARGS.duration_sec NUM_WP = ARGS.control_freq_hz*PERIOD wp_counters = np.array([0 for i in range(4)]) #### Initialize the velocity target ######################## TARGET_VEL = np.zeros((4,NUM_WP,4)) for i in range(NUM_WP): TARGET_VEL[0, i, :] = [1, 1, 0, 0.99] #if i < (NUM_WP/8) else [3.6, -1, 0, 0.99] TARGET_VEL[1, i, :] = [0, 1, 0, 0.99] #if i < (NUM_WP/8+NUM_WP/6) else [0, -1, 0, 0.99] TARGET_VEL[2, i, :] = [0.2, 1, 0.2, 0.99] #if i < (NUM_WP/8+2*NUM_WP/6) else [-0.2, -1, -0.2, 0.99] TARGET_VEL[3, i, :] = [0, 1, 0.5, 0.99] #if i < (NUM_WP/8+3*NUM_WP/6) else [0, -1, -0.5, 0.99] #### Initialize the logger ################################# logger = Logger(logging_freq_hz=int(ARGS.simulation_freq_hz/AGGR_PHY_STEPS), num_drones=4 ) #### Run the simulation #################################### CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ/ARGS.control_freq_hz)) action = {str(i): np.array([0,0,0,0]) for i in range(4)} START = time.time() for i in range(0, int(ARGS.duration_sec*env.SIM_FREQ), AGGR_PHY_STEPS): ############################################################ # for j in range(3): env._showDroneLocalAxes(j) #### Step the simulation ################################### obs, reward, done, info = env.step(action) #### Compute control at the desired frequency ##############
physics=ARGS.physics, freq=SIMULATION_FREQ_HZ, gui=ARGS.gui, record=ARGS.record_video, obstacles=False ) INITIAL_STATE = env.reset() action = {"0": np.zeros(4)} pos_err = 9999. #### TRACE_FILE starts at [0,0,0], sim starts at [0,0,INITIAL_STATE[2]] TRACE_CTRL_REFERENCE[:, 2] = INITIAL_STATE["0"]["state"][2] #### Initialize the logger ################################# logger = Logger(logging_freq_hz=SIMULATION_FREQ_HZ, num_drones=2, duration_sec=DURATION_SEC ) #### Initialize the controller ############################# ctrl = DSLPIDControl(env) #### Run the comparison #################################### START = time.time() for i in range(DURATION_SEC*env.SIM_FREQ): #### Step the simulation ################################### obs, reward, done, info = env.step(action) #### Compute next action using the set points from the trace action["0"], pos_err, yaw_err = ctrl.computeControlFromState(control_timestep=env.TIMESTEP, state=obs["0"]["state"],
freq=ARGS.simulation_freq_hz, gui=ARGS.gui, record=ARGS.record_video, obstacles=True) #### Initialize the trajectories ########################### PERIOD = 10 NUM_WP = ARGS.control_freq_hz * PERIOD TARGET_POS = np.zeros((NUM_WP, 2)) for i in range(NUM_WP): TARGET_POS[i, :] = [0.5 * np.cos(2 * np.pi * (i / NUM_WP)), 0] wp_counters = np.array([0, int(NUM_WP / 2)]) #### Initialize the logger ################################# logger = Logger(logging_freq_hz=ARGS.simulation_freq_hz, num_drones=2, duration_sec=ARGS.duration_sec) #### Initialize the controllers ############################ ctrl = [DSLPIDControl(drone_model=ARGS.drone) for i in range(2)] #### Run the simulation #################################### CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / ARGS.control_freq_hz)) action = {str(i): np.array([0, 0, 0, 0]) for i in range(2)} START = time.time() for i in range(ARGS.duration_sec * env.SIM_FREQ): #### Step the simulation ################################### obs, reward, done, info = env.step(action) #### Compute control at the desired frequency ##############
freq=ARGS.simulation_freq_hz, gui=ARGS.gui, record=ARGS.record_video, obstacles=True) #### Initialize the trajectories ########################### PERIOD = 10 NUM_WP = ARGS.control_freq_hz * PERIOD TARGET_POS = np.zeros((NUM_WP, 2)) for i in range(NUM_WP): TARGET_POS[i, :] = [0.5 * np.cos(2 * np.pi * (i / NUM_WP)), 0] wp_counters = np.array([0, int(NUM_WP / 2)]) #### Initialize the logger ################################# logger = Logger(logging_freq_hz=ARGS.simulation_freq_hz, num_drones=2, duration_sec=ARGS.duration_sec) #### Initialize the controllers ############################ ctrl = [DSLPIDControl(env) for i in range(2)] #### Run the simulation #################################### CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / ARGS.control_freq_hz)) action = {str(i): np.array([0, 0, 0, 0]) for i in range(2)} START = time.time() for i in range(ARGS.duration_sec * env.SIM_FREQ): #### Step the simulation ################################### obs, reward, done, info = env.step(action) #### Compute control at the desired frequency ##############
register_env("takeoff-aviary-v0", lambda _: TakeoffAviary()) config = ppo.DEFAULT_CONFIG.copy() config["num_workers"] = 2 config["env"] = "takeoff-aviary-v0" agent = ppo.PPOTrainer(config) for i in range(10): # e.g. 100 results = agent.train() print("[INFO] {:d}: episode_reward max {:f} min {:f} mean {:f}".format(i, \ results["episode_reward_max"], results["episode_reward_min"], results["episode_reward_mean"])) policy = agent.get_policy() print(policy.model.base_model.summary()) ray.shutdown() #### Show (and record a video of) the model's performance ########################################## env = TakeoffAviary(gui=True, record=False) logger = Logger(logging_freq_hz=int(env.SIM_FREQ / env.AGGR_PHY_STEPS), num_drones=1) obs = env.reset() start = time.time() for i in range(3 * env.SIM_FREQ): if not ARGS.rllib: action, _states = model.predict(obs, deterministic=True) else: action, _states, _dict = policy.compute_single_action(obs) obs, reward, done, info = env.step(action) logger.log(drone=0, timestamp=i / env.SIM_FREQ, state=np.hstack([ obs[0:3], np.zeros(4), obs[3:15], np.resize(action, (4)) ]),
aggregate_phy_steps=AGGR_PHY_STEPS, gui=ARGS.gui, record=ARGS.record_video, obstacles=True) #### Initialize the trajectories ########################### PERIOD = 5 NUM_WP = ARGS.control_freq_hz * PERIOD TARGET_POS = np.zeros((NUM_WP, 2)) for i in range(NUM_WP): TARGET_POS[i, :] = [0.5 * np.cos(2 * np.pi * (i / NUM_WP)), 0] wp_counters = np.array([0, int(NUM_WP / 2)]) #### Initialize the logger ################################# logger = Logger(logging_freq_hz=int(ARGS.simulation_freq_hz / AGGR_PHY_STEPS), num_drones=2, duration_sec=ARGS.duration_sec) #### Initialize the controllers ############################ ctrl = [DSLPIDControl(drone_model=ARGS.drone) for i in range(2)] #### Run the simulation #################################### CTRL_EVERY_N_STEPS = int(np.floor(env.SIM_FREQ / ARGS.control_freq_hz)) action = {str(i): np.array([0, 0, 0, 0]) for i in range(2)} START = time.time() for i in range(0, int(ARGS.duration_sec * env.SIM_FREQ), AGGR_PHY_STEPS): #### Step the simulation ################################### obs, reward, done, info = env.step(action) #### Compute control at the desired frequency ##############
import numpy as np from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary from gym_pybullet_drones.utils.Logger import Logger from gym_pybullet_drones.utils.utils import sync from hw1_control import HW1Control DURATION = 10 # int: the duration of the simulation in seconds GUI = True # bool: whether to use PyBullet graphical interface if __name__ == "__main__": #### Create the ENVironment ################################ ENV = CtrlAviary(gui=GUI) #### Initialize the LOGGER ################################# LOGGER = Logger(logging_freq_hz=ENV.SIM_FREQ) #### Initialize the controller ############################# CTRL = HW1Control(ENV) #### Initialize the ACTION ################################# ACTION = {} OBS = ENV.reset() STATE = OBS["0"]["state"] ACTION["0"] = CTRL.compute_control(current_position=STATE[0:3], current_quaternion=STATE[3:7], current_velocity=STATE[10:13], current_angular_velocity=STATE[13:16], target_position=STATE[0:3], target_velocity=np.zeros(3))
#### Initialize the simulation ##################################################################### H = .1; H_STEP = .05; R = .3; INIT_XYZS = np.array([ [R*np.cos((i/6)*2*np.pi+np.pi/2), R*np.sin((i/6)*2*np.pi+np.pi/2)-R, H+i*H_STEP] for i in range(NUM_DRONES) ]) #### Create the environment with or without video capture part of each drone's state ############### if VISION: env = VisionCtrlAviary(drone_model=DRONE, num_drones=NUM_DRONES, initial_xyzs=INIT_XYZS, physics=PHYSICS, \ visibility_radius=10, freq=SIMULATION_FREQ_HZ, gui=GUI, record=RECORD_VIDEO, obstacles=True) else: env = CtrlAviary(drone_model=DRONE, num_drones=NUM_DRONES, initial_xyzs=INIT_XYZS, physics=PHYSICS, \ visibility_radius=10, freq=SIMULATION_FREQ_HZ, gui=GUI, record=RECORD_VIDEO, obstacles=True) #### Initialize a circular trajectory ############################################################## PERIOD = 10; NUM_WP = CONTROL_FREQ_HZ*PERIOD; TARGET_POS = np.zeros((NUM_WP,3)) for i in range(NUM_WP): TARGET_POS[i,:] = R*np.cos((i/NUM_WP)*(2*np.pi)+np.pi/2)+INIT_XYZS[0,0], R*np.sin((i/NUM_WP)*(2*np.pi)+np.pi/2)-R+INIT_XYZS[0,1], INIT_XYZS[0,2] wp_counters = np.array([ int((i*NUM_WP/6)%NUM_WP) for i in range(NUM_DRONES) ]) #### Initialize the logger ######################################################################### logger = Logger(logging_freq_hz=SIMULATION_FREQ_HZ, num_drones=NUM_DRONES, duration_sec=DURATION_SEC) #### Initialize the controllers #################################################################### ctrl = [DSLPIDControl(env) for i in range(NUM_DRONES)] # ctrl = [SimplePIDControl(env) for i in range(NUM_DRONES)] #### Run the simulation ############################################################################ CTRL_EVERY_N_STEPS= int(np.floor(env.SIM_FREQ/CONTROL_FREQ_HZ)) action = { str(i): np.array([0,0,0,0]) for i in range(NUM_DRONES) } START = time.time(); temp_action = {} for i in range(DURATION_SEC*env.SIM_FREQ): #### Step the simulation ########################################################################### obs, reward, done, info = env.step(action) #### Compute control at the desired frequency @@@@@#################################################
dynamics_model = DynamicsModel(10, 6, checkpoint["model_hyper_params"]) dynamics_model.model.load_state_dict(checkpoint["model_params"]) episode_max_steps = checkpoint["episode_max_steps"] env = gym.make("takeoff-aviary-v0", gui=True) num_trial = 5 policy = RandomPolicy( max_action=env.action_space.high[0], act_dim=env.action_space.high.size) for episode_idx in range(num_trial): total_rew = 0. obs = env.reset() logger = Logger(logging_freq_hz=env.SIM_FREQ, num_drones=1) for i in range(episode_max_steps): # init obs.shape=(12, ) assert len(obs.shape) == 1 and obs.shape[0] == 12 # parse obses を使うためにバッチサイズ1の様に扱う batched_obs = obs.reshape((1, 12)) # RSを使って1ステップだけ進める act = random_shooting(batched_obs) # act もバッチサイズ1の様に扱う assert len(act.shape) == 1 and act.shape[0] == 4 batched_act = act.reshape((1, 4)) next_obs, _, done, _ = env.step(act) logger.log(drone=0, timestamp=i/env.SIM_FREQ,
eval_env, n_eval_episodes=10) print("\n\n\nMean reward ", mean_reward, " +- ", std_reward, "\n\n") with np.load(ARGS.exp + '/evaluations.npz') as data: for j in range(data['timesteps'].shape[0]): print(str(data['timesteps'][j]) + "," + str(data['results'][j][0])) #### Show, record a video, and log the model's performance # test_env = gym.make(env_name, gui=True, record=True, aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=OBS, act=ACT) logger = Logger(logging_freq_hz=int(test_env.SIM_FREQ / test_env.AGGR_PHY_STEPS), num_drones=1) obs = test_env.reset() start = time.time() for i in range( 6 * int(test_env.SIM_FREQ / test_env.AGGR_PHY_STEPS)): # Up to 6'' action, _states = model.predict( obs, deterministic=True # OPTIONAL 'deterministic=False' ) obs, reward, done, info = test_env.step(action) test_env.render() if OBS == ObservationType.KIN: logger.log(drone=0, timestamp=i / test_env.SIM_FREQ, state=np.hstack(
from stable_baselines3.common.evaluation import evaluate_policy from gym_pybullet_drones.utils.Logger import Logger from gym_pybullet_drones.envs.RLTetherAviary import RLTetherAviary ############# This script will play and record a video and the performance of a trained model if __name__ == "__main__": np.random.seed(2020) #### Initialize the environment ######################################################################### env = RLTetherAviary(gui=False, record=False) #### Initialize the logger ######################################################################### logger = Logger(logging_freq_hz=int(env.SIM_FREQ), num_drones=1) model_name = "<path-to-model>" # model = DDPG.load(model_name) # model = PPO.load(model_name) loaded = keras.models.model_from_json(model_name) obs = env.reset() done = False for i in range(10 * env.SIM_FREQ): action, _states = model.predict(obs, deterministic=True) obs, rewards, done, info = env.step(action) logger.log(drone=0, timestamp=1 / env.SIM_FREQ,