from environments.initial_positions import * import psutil from os import getpid from network.Memory import Memory from aux_functions import * from configs.read_cfg import read_cfg # TF Debug message suppressed os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Get memory handler process = psutil.Process(getpid()) # Read the config file cfg = read_cfg(config_filename='configs/config.cfg', verbose=True) # Start the environment env_process = start_environment(env_name=cfg.env_name) # Load PyGame Screen screen = pygame_connect() # Load the initial positions for the environment reset_array, level_name, crash_threshold, initZ = initial_positions( cfg.env_name) #Generate path where the weighst will be saved cfg = save_network_path(cfg=cfg) # Replay Memory for RL
def check_user_input(active, automate, agent, client, old_posit, initZ, fig_z, fig_nav, env_folder, cfg, algorithm_cfg): # algorithm_cfg.learning_rate, algorithm_cfg.epsilon,algorithm_cfg.network_path,cfg.mode, for event in pygame.event.get(): if event.type == pygame.QUIT: active = False pygame.quit() # Training keys control if event.type == pygame.KEYDOWN and cfg.mode == 'train': if event.key == pygame.K_l: # Load the parameters - epsilon path = 'configs/' + cfg.algorithm + '.cfg' algorithm_cfg = read_cfg(config_filename=path, verbose=False) cfg, algorithm_cfg = save_network_path( cfg=cfg, algorithm_cfg=algorithm_cfg) print('Updated Parameters') if event.key == pygame.K_RETURN: # take_action(-1) automate = False print('Saving Model') # agent.save_network(iter, save_path, ' ') agent.save_network(algorithm_cfg.network_path) # agent.save_data(iter, data_tuple, tuple_path) print('Model Saved: ', algorithm_cfg.network_path) if event.key == pygame.K_BACKSPACE: automate = automate ^ True if event.key == pygame.K_r: client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents) agent.client = client # Set the routine for manual control if not automate if not automate: # print('manual') # action=[-1] if event.key == pygame.K_UP: action = 0 elif event.key == pygame.K_RIGHT: action = 1 elif event.key == pygame.K_LEFT: action = 2 elif event.key == pygame.K_d: action = 3 elif event.key == pygame.K_a: action = 4 elif event.key == pygame.K_DOWN: action = -2 elif event.key == pygame.K_y: pos = client.getPosition() client.moveToPosition(pos.x_val, pos.y_val, 3 * initZ, 1) time.sleep(0.5) elif event.key == pygame.K_h: client.reset() # agent.take_action(action) elif event.type == pygame.KEYDOWN and cfg.mode == 'infer': if event.key == pygame.K_s: # Save the figures file_path = env_folder + 'results/' fig_z.savefig(file_path + 'altitude_variation.png', dpi=1000) fig_nav.savefig(file_path + 'navigation.png', dpi=1000) print('Figures saved') if event.key == pygame.K_BACKSPACE: client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=0.1) automate = automate ^ True return active, automate, algorithm_cfg, client
parser = argparse.ArgumentParser(description='Input config files') # parser.add_argument('--alg_cfg', type=str, default="", help='') parser.add_argument('--main_cfg', type=str, default="configs/config.cfg", help='') args = parser.parse_args() print(args) # print(args['main_cfg']) # Read the config file # cfg = read_cfg(config_filename='configs/config.cfg', verbose=True) cfg = read_cfg(config_filename=args.main_cfg, verbose=True) if cfg.mode == 'infer': cfg.num_agents = 1 can_proceed = generate_json(cfg) # Check if NVIDIA GPU is available try: nvidia_smi.nvmlInit() cfg.NVIDIA_GPU = True except: cfg.NVIDIA_GPU = False if can_proceed: # Start the environment env_process, env_folder = start_environment(env_name=cfg.env_name)
def check_user_input(active, automate, lr, epsilon, agent, network_path, client, old_posit, initZ): for event in pygame.event.get(): if event.type == pygame.QUIT: active = False pygame.quit() if event.type == pygame.KEYDOWN: if event.key == pygame.K_l: # Load the parameters - epsilon cfg = read_cfg(config_filename='configs/config.cfg', verbose=False) lr = cfg.lr print('Updated Parameters') print('Learning Rate: ', cfg.lr) if event.key == pygame.K_RETURN: # take_action(-1) automate = False print('Saving Model') # agent.save_network(iter, save_path, ' ') agent.save_network(network_path) # agent.save_data(iter, data_tuple, tuple_path) print('Model Saved: ', network_path) if event.key == pygame.K_BACKSPACE: automate = automate ^ True if event.key == pygame.K_r: # reconnect client = [] client = airsim.MultirotorClient() client.confirmConnection() # posit1_old = client.simGetVehiclePose() client.simSetVehiclePose(old_posit, ignore_collison=True) agent.client = client if event.key == pygame.K_m: agent.get_state() print('got_state') # automate = automate ^ True # Set the routine for manual control if not automate if not automate: # print('manual') # action=[-1] if event.key == pygame.K_UP: action = 0 elif event.key == pygame.K_RIGHT: action = 1 elif event.key == pygame.K_LEFT: action = 2 elif event.key == pygame.K_d: action = 3 elif event.key == pygame.K_a: action = 4 elif event.key == pygame.K_DOWN: action = -2 elif event.key == pygame.K_y: pos = client.getPosition() client.moveToPosition(pos.x_val, pos.y_val, 3 * initZ, 1) time.sleep(0.5) elif event.key == pygame.K_h: client.reset() # agent.take_action(action) return active, automate, lr, client
def DeepQLearning(cfg, env_process, env_folder): algorithm_cfg = read_cfg(config_filename='configs/DeepQLearning.cfg', verbose=True) algorithm_cfg.algorithm = cfg.algorithm if 'GlobalLearningGlobalUpdate-SA' in algorithm_cfg.distributed_algo: # algorithm_cfg = update_algorithm_cfg(algorithm_cfg, cfg) cfg.num_agents = 1 # # Start the environment # env_process, env_folder = start_environment(env_name=cfg.env_name) # Connect to Unreal Engine and get the drone handle: client client, old_posit, initZ = connect_drone(ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents) print(old_posit) initial_pos = old_posit.copy() # Load the initial positions for the environment # if Target: if cfg.Target: print("Traget Iniitial") reset_array, reset_array_raw, level_name, crash_threshold = initial_positions( cfg.env_name, initZ, cfg.num_agents) else: print("Source Iniitial") reset_array, reset_array_raw, level_name, crash_threshold = initial_positions2( cfg.env_name + "2", initZ, cfg.num_agents) # "" print("reset_array", reset_array) print("reset_array_raw", reset_array_raw) print("level_name", level_name) print("crash_threshold", crash_threshold) # Initialize System Handlers process = psutil.Process(getpid()) # nvidia_smi.nvmlInit() # Load PyGame Screen screen = pygame_connect(phase=cfg.mode) fig_z = [] fig_nav = [] debug = False # Generate path where the weights will be saved cfg, algorithm_cfg = save_network_path(cfg=cfg, algorithm_cfg=algorithm_cfg) current_state = {} new_state = {} posit = {} name_agent_list = [] agent = {} # Replay Memory for RL if cfg.mode == 'train': ReplayMemory = {} target_agent = {} if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA': print_orderly('global', 40) # Multiple agent acts as data collecter and one global learner global_agent = PedraAgent(algorithm_cfg, client, name='DQN', vehicle_name='global', Target=cfg.Target, data_collect=cfg.data_collect) ReplayMemory = Memory(algorithm_cfg.buffer_len) target_agent = PedraAgent(algorithm_cfg, client, name='Target', vehicle_name='global', Target=cfg.Target) for drone in range(cfg.num_agents): name_agent = "drone" + str(drone) name_agent_list.append(name_agent) print_orderly(name_agent, 40) # TODO: turn the neural network off if global agent is present agent[name_agent] = PedraAgent(algorithm_cfg, client, name='DQN', vehicle_name=name_agent, Target=cfg.Target, data_collect=cfg.data_collect) if algorithm_cfg.distributed_algo != 'GlobalLearningGlobalUpdate-MA': ReplayMemory[name_agent] = Memory(algorithm_cfg.buffer_len) target_agent[name_agent] = PedraAgent(algorithm_cfg, client, name='Target', vehicle_name=name_agent, Target=cfg.Target) current_state[name_agent] = agent[name_agent].get_state() elif cfg.mode == 'infer': name_agent = 'drone0' name_agent_list.append(name_agent) agent[name_agent] = PedraAgent(algorithm_cfg, client, name=name_agent + 'DQN', vehicle_name=name_agent, Target=cfg.Target) env_cfg = read_cfg(config_filename=env_folder + 'config_' + ("Target" if cfg.Target else "Source") + '.cfg') nav_x = [] nav_y = [] altitude = {} altitude[name_agent] = [] p_z, f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav = initialize_infer( env_cfg=env_cfg, client=client, env_folder=env_folder) nav_text = ax_nav.text(0, 0, '') # Select initial position reset_to_initial(0, reset_array, client, vehicle_name=name_agent) old_posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) # Initialize variables iter = 1 # num_collisions = 0 episode = {} active = True print_interval = 1 automate = True choose = False print_qval = False last_crash = {} ret = {} distance = {} num_collisions = {} level = {} level_state = {} level_posit = {} times_switch = {} last_crash_array = {} ret_array = {} distance_array = {} epi_env_array = {} log_files = {} # If the phase is inference force the num_agents to 1 hyphens = '-' * int((80 - len('Log files')) / 2) print(hyphens + ' ' + 'Log files' + ' ' + hyphens) for name_agent in name_agent_list: ret[name_agent] = 0 num_collisions[name_agent] = 0 last_crash[name_agent] = 0 level[name_agent] = 0 episode[name_agent] = 0 level_state[name_agent] = [None] * len(reset_array[name_agent]) level_posit[name_agent] = [None] * len(reset_array[name_agent]) times_switch[name_agent] = 0 last_crash_array[name_agent] = np.zeros(shape=len( reset_array[name_agent]), dtype=np.int32) ret_array[name_agent] = np.zeros(shape=len(reset_array[name_agent])) distance_array[name_agent] = np.zeros( shape=len(reset_array[name_agent])) epi_env_array[name_agent] = np.zeros(shape=len( reset_array[name_agent]), dtype=np.int32) distance[name_agent] = 0 # Log file log_path = algorithm_cfg.network_path + '/' + name_agent + '/' + cfg.mode + 'log.txt' print("Log path: ", log_path) log_files[name_agent] = open(log_path, 'w') print_orderly('Simulation begins', 80) while active: try: active, automate, algorithm_cfg, client = check_user_input( active, automate, agent[name_agent], client, old_posit[name_agent], initZ, fig_z, fig_nav, env_folder, cfg, algorithm_cfg) if automate: if cfg.mode == 'train': if iter % algorithm_cfg.switch_env_steps == 0: switch_env = True else: switch_env = False for name_agent in name_agent_list: start_time = time.time() if switch_env: posit1_old = client.simGetVehiclePose( vehicle_name=name_agent) times_switch[ name_agent] = times_switch[name_agent] + 1 level_state[name_agent][ level[name_agent]] = current_state[name_agent] level_posit[name_agent][ level[name_agent]] = posit1_old last_crash_array[name_agent][ level[name_agent]] = last_crash[name_agent] ret_array[name_agent][ level[name_agent]] = ret[name_agent] distance_array[name_agent][ level[name_agent]] = distance[name_agent] epi_env_array[name_agent][ level[name_agent]] = episode[name_agent] level[name_agent] = (level[name_agent] + 1) % len( reset_array[name_agent]) print(name_agent + ' :Transferring to level: ', level[name_agent], ' - ', level_name[name_agent][level[name_agent]]) if times_switch[name_agent] < len( reset_array[name_agent]): reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) else: current_state[name_agent] = level_state[ name_agent][level[name_agent]] posit1_old = level_posit[name_agent][ level[name_agent]] reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) client.simSetVehiclePose( posit1_old, ignore_collison=True, vehicle_name=name_agent) time.sleep(0.1) last_crash[name_agent] = last_crash_array[ name_agent][level[name_agent]] ret[name_agent] = ret_array[name_agent][ level[name_agent]] distance[name_agent] = distance_array[name_agent][ level[name_agent]] episode[name_agent] = epi_env_array[name_agent][ int(level[name_agent] / 3)] # environ = environ^True else: # TODO: policy from one global agent: DONE if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA': agent_this_drone = global_agent ReplayMemory_this_drone = ReplayMemory target_agent_this_drone = target_agent else: agent_this_drone = agent[name_agent] ReplayMemory_this_drone = ReplayMemory[ name_agent] target_agent_this_drone = target_agent[ name_agent] action, action_type, algorithm_cfg.epsilon, qvals = policy( algorithm_cfg.epsilon, current_state[name_agent], iter, algorithm_cfg.epsilon_saturation, algorithm_cfg.epsilon_model, algorithm_cfg.wait_before_train, algorithm_cfg.num_actions, agent_this_drone) action_word = translate_action( action, algorithm_cfg.num_actions) # Take the action agent[name_agent].take_action( action, algorithm_cfg.num_actions, Mode='static') # time.sleep(0.05) new_state[name_agent] = agent[ name_agent].get_state() new_depth1, thresh = agent[ name_agent].get_CustomDepth(cfg) # Get GPS information posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) position = posit[name_agent].position old_p = np.array([ old_posit[name_agent].position.x_val, old_posit[name_agent].position.y_val ]) new_p = np.array([position.x_val, position.y_val]) # calculate distance distance[name_agent] = distance[ name_agent] + np.linalg.norm(new_p - old_p) old_posit[name_agent] = posit[name_agent] reward, crash = agent[name_agent].reward_gen( new_depth1, action, crash_threshold, thresh, debug, cfg) ret[name_agent] = ret[name_agent] + reward agent_state = agent[name_agent].GetAgentState() if agent_state.has_collided or distance[ name_agent] < 0.1: num_collisions[name_agent] = num_collisions[ name_agent] + 1 print('crash') crash = True reward = -1 data_tuple = [] data_tuple.append([ current_state[name_agent], action, new_state[name_agent], reward, crash ]) # TODO: one replay memory global, target_agent, agent: DONE err = get_errors( data_tuple, choose, ReplayMemory_this_drone, algorithm_cfg.input_size, agent_this_drone, target_agent_this_drone, algorithm_cfg.gamma, algorithm_cfg.Q_clip) ReplayMemory_this_drone.add(err, data_tuple) # Train if sufficient frames have been stored if iter > algorithm_cfg.wait_before_train: if iter % algorithm_cfg.train_interval == 0: # Train the RL network old_states, Qvals, actions, err, idx = minibatch_double( data_tuple, algorithm_cfg.batch_size, choose, ReplayMemory_this_drone, algorithm_cfg.input_size, agent_this_drone, target_agent_this_drone, algorithm_cfg.gamma, algorithm_cfg.Q_clip) # TODO global replay memory: DONE for i in range(algorithm_cfg.batch_size): ReplayMemory_this_drone.update( idx[i], err[i]) if print_qval: print(Qvals) # TODO global agent, target_agent: DONE if choose: # Double-DQN target_agent_this_drone.network_model.train_n( old_states, Qvals, actions, algorithm_cfg.batch_size, algorithm_cfg.dropout_rate, algorithm_cfg.learning_rate, algorithm_cfg.epsilon, iter) else: agent_this_drone.network_model.train_n( old_states, Qvals, actions, algorithm_cfg.batch_size, algorithm_cfg.dropout_rate, algorithm_cfg.learning_rate, algorithm_cfg.epsilon, iter) time_exec = time.time() - start_time gpu_memory, gpu_utilization, sys_memory = get_SystemStats( process, cfg.NVIDIA_GPU) for i in range(0, len(gpu_memory)): tag_mem = 'GPU' + str(i) + '-Memory-GB' tag_util = 'GPU' + str(i) + 'Utilization-%' agent[ name_agent].network_model.log_to_tensorboard( tag=tag_mem, group='SystemStats', value=gpu_memory[i], index=iter) agent[ name_agent].network_model.log_to_tensorboard( tag=tag_util, group='SystemStats', value=gpu_utilization[i], index=iter) agent[name_agent].network_model.log_to_tensorboard( tag='Memory-GB', group='SystemStats', value=sys_memory, index=iter) s_log = '{:<6s} - Level {:>2d} - Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} Eps: {:<1.4f} lr: {:>1.8f} Ret = {:<+6.4f} Last Crash = {:<5d} t={:<1.3f} SF = {:<5.4f} Reward: {:<+1.4f} '.format( name_agent, int(level[name_agent]), iter, episode[name_agent], action_word, action_type, algorithm_cfg.epsilon, algorithm_cfg.learning_rate, ret[name_agent], last_crash[name_agent], time_exec, distance[name_agent], reward) if iter % print_interval == 0: print(s_log) log_files[name_agent].write(s_log + '\n') last_crash[name_agent] = last_crash[name_agent] + 1 if debug: cv2.imshow( name_agent, np.hstack( (np.squeeze(current_state[name_agent], axis=0), np.squeeze(new_state[name_agent], axis=0)))) cv2.waitKey(1) if crash: if distance[name_agent] < 0.01: # Drone won't move, reconnect print( 'Recovering from drone mobility issue') agent[ name_agent].client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents, client=client) else: agent[ name_agent].network_model.log_to_tensorboard( tag='Return', group=name_agent, value=ret[name_agent], index=episode[name_agent]) agent[ name_agent].network_model.log_to_tensorboard( tag='Safe Flight', group=name_agent, value=distance[name_agent], index=episode[name_agent]) ret[name_agent] = 0 distance[name_agent] = 0 episode[ name_agent] = episode[name_agent] + 1 last_crash[name_agent] = 0 reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) # time.sleep(0.2) current_state[name_agent] = agent[ name_agent].get_state() old_posit[ name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) else: current_state[name_agent] = new_state[ name_agent] if iter % algorithm_cfg.max_iters == 0: automate = False # TODO define and state agents if iter % algorithm_cfg.update_target_interval == 0 and iter > algorithm_cfg.wait_before_train: if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA': print('global' + ' - Switching Target Network') global_agent.network_model.save_network( algorithm_cfg.network_path, episode[name_agent]) else: for name_agent in name_agent_list: agent[name_agent].take_action( [-1], algorithm_cfg.num_actions, Mode='static') print(name_agent + ' - Switching Target Network') agent[name_agent].network_model.save_network( algorithm_cfg.network_path, episode[name_agent]) choose = not choose # if iter % algorithm_cfg.communication_interval == 0 and iter > algorithm_cfg.wait_before_train: # print('Communicating the weights and averaging them') # communicate_across_agents(agent, name_agent_list, algorithm_cfg) # communicate_across_agents(target_agent, name_agent_list, algorithm_cfg) iter += 1 elif cfg.mode == 'infer': # Inference phase agent_state = agent[name_agent].GetAgentState() if agent_state.has_collided: print('Drone collided') print("Total distance traveled: ", np.round(distance[name_agent], 2)) active = False client.moveByVelocityAsync( vx=0, vy=0, vz=0, duration=1, vehicle_name=name_agent).join() if nav_x: # Nav_x is empty if the drone collides in first iteration ax_nav.plot(nav_x.pop(), nav_y.pop(), 'r*', linewidth=20) file_path = env_folder + 'results/' fig_z.savefig(file_path + 'altitude_variation.png', dpi=500) fig_nav.savefig(file_path + 'navigation.png', dpi=500) close_env(env_process) print('Figures saved') else: posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) distance[name_agent] = distance[ name_agent] + np.linalg.norm( np.array([ old_posit[name_agent].position.x_val - posit[name_agent].position.x_val, old_posit[name_agent].position.y_val - posit[name_agent].position.y_val ])) # altitude[name_agent].append(-posit[name_agent].position.z_val+p_z) altitude[name_agent].append( -posit[name_agent].position.z_val - f_z) quat = (posit[name_agent].orientation.w_val, posit[name_agent].orientation.x_val, posit[name_agent].orientation.y_val, posit[name_agent].orientation.z_val) yaw = euler_from_quaternion(quat)[2] x_val = posit[name_agent].position.x_val y_val = posit[name_agent].position.y_val z_val = posit[name_agent].position.z_val nav_x.append(env_cfg.alpha * x_val + env_cfg.o_x) nav_y.append(env_cfg.alpha * y_val + env_cfg.o_y) nav.set_data(nav_x, nav_y) nav_text.remove() nav_text = ax_nav.text( 25, 55, 'Distance: ' + str(np.round(distance[name_agent], 2)), style='italic', bbox={ 'facecolor': 'white', 'alpha': 0.5 }) line_z.set_data(np.arange(len(altitude[name_agent])), altitude[name_agent]) ax_z.set_xlim(0, len(altitude[name_agent])) fig_z.canvas.draw() fig_z.canvas.flush_events() current_state[name_agent] = agent[ name_agent].get_state() action, action_type, algorithm_cfg.epsilon, qvals = policy( 1, current_state[name_agent], iter, algorithm_cfg.epsilon_saturation, 'inference', algorithm_cfg.wait_before_train, algorithm_cfg.num_actions, agent[name_agent]) action_word = translate_action( action, algorithm_cfg.num_actions) # Take continuous action agent[name_agent].take_action( action, algorithm_cfg.num_actions, Mode='static') old_posit[name_agent] = posit[name_agent] # Verbose and log making s_log = 'Position = ({:<3.2f},{:<3.2f}, {:<3.2f}) Orientation={:<1.3f} Predicted Action: {:<8s} '.format( x_val, y_val, z_val, yaw, action_word) print(s_log) log_files[name_agent].write(s_log + '\n') except Exception as e: if str(e) == 'cannot reshape array of size 1 into shape (0,0,3)': print('Recovering from AirSim error') client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents) agent[name_agent].client = client else: print('------------- Error -------------') exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) print(exc_obj) automate = False print('Hit r and then backspace to start from this point')
import matplotlib.pyplot as plt from configs.read_cfg import read_cfg from unreal_envs.initial_positions import * from tkinter import filedialog from tkinter import * import cv2, os root = Tk() filename =filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("PNG files","*.PNG"),("png files","*.png"),("All files","*.*")) ) root.destroy() filename_split = os.path.split(filename) folder = filename_split[0] floor_cfg_filepath = folder+'/config.cfg' floor_cfg = read_cfg(floor_cfg_filepath) name = floor_cfg.env_name+'()' orig_ip, levels, crash_threshold = eval(name) player_x_env = orig_ip[0][0] player_y_env = orig_ip[0][1] env_cfg_filepath = 'environments' env_cfg = read_cfg(env_cfg_filepath) coords =[] fig = plt.figure(figsize=(10,10)) ax = fig.add_subplot(111) floor_image = cv2.imread(filename) floor_image = cv2.cvtColor(floor_image, cv2.COLOR_BGR2RGB) plt.imshow(floor_image)
from DeepNet.network.heat_map import heat_map from DeepNet.util.Memory import Memory from RL_functions import * import numpy as np from sys import platform import time import os, sys import configparser as cp from configs.read_cfg import read_cfg import cv2 from dotmap import DotMap # Read the configuration file cfg = read_cfg(verbose=True) # ---------- Initialize necessary variables stat_path = 'DeepNet/models/' + cfg.run_name + '/' + cfg.env_type + '/stat.npy' network_path_half = 'DeepNet/models/' + cfg.run_name + '/' + cfg.env_type + '/' network_path = network_path_half + '/agent/agent' data_path = network_path_half + 'data_tuple.npy' input_size = 227 epsilon = 0 action_type = 'Begin' data_tuple, stat, iteration = load_data(cfg.load_data, cfg.load_data_path) epi = 0 old_state = [] choose = False reverse_action = [3, 2, 1, 0] recovering = False
def check_user_input(active, automate, lr, epsilon, agent, network_path, client, old_posit, initZ, phase, fig_z, fig_nav, env_folder): for event in pygame.event.get(): if event.type == pygame.QUIT: active = False pygame.quit() # Training keys control if event.type == pygame.KEYDOWN and phase =='train': if event.key == pygame.K_l: # Load the parameters - epsilon cfg = read_cfg(config_filename='configs/config.cfg', verbose=False) lr = cfg.lr print('Updated Parameters') print('Learning Rate: ', cfg.lr) if event.key == pygame.K_RETURN: # take_action(-1) automate = False print('Saving Model') # agent.save_network(iter, save_path, ' ') agent.save_network(network_path) # agent.save_data(iter, data_tuple, tuple_path) print('Model Saved: ', network_path) if event.key == pygame.K_BACKSPACE: automate = automate ^ True if event.key == pygame.K_r: # reconnect client = [] client = airsim.MultirotorClient() client.confirmConnection() # posit1_old = client.simGetVehiclePose() client.simSetVehiclePose(old_posit, ignore_collison=True) agent.client = client if event.key == pygame.K_m: agent.get_state() print('got_state') # automate = automate ^ True # Set the routine for manual control if not automate if not automate: # print('manual') # action=[-1] if event.key == pygame.K_UP: action = 0 elif event.key == pygame.K_RIGHT: action = 1 elif event.key == pygame.K_LEFT: action = 2 elif event.key == pygame.K_d: action = 3 elif event.key == pygame.K_a: action = 4 elif event.key == pygame.K_DOWN: action = -2 elif event.key == pygame.K_y: pos = client.getPosition() client.moveToPosition(pos.x_val, pos.y_val, 3 * initZ, 1) time.sleep(0.5) elif event.key == pygame.K_h: client.reset() # agent.take_action(action) elif event.type == pygame.KEYDOWN and phase == 'infer': if event.key == pygame.K_s: # Save the figures file_path = env_folder + 'results/' fig_z.savefig(file_path+'altitude_variation.png', dpi=1000) fig_nav.savefig(file_path+'navigation.png', dpi=1000) print('Figures saved') if event.key == pygame.K_BACKSPACE: client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=0.1) automate = automate ^ True return active, automate, lr, client
def DeepREINFORCE(cfg, env_process, env_folder): algorithm_cfg = read_cfg(config_filename='configs/DeepREINFORCE.cfg', verbose=True) algorithm_cfg.algorithm = cfg.algorithm if 'GlobalLearningGlobalUpdate-SA' in algorithm_cfg.distributed_algo: # algorithm_cfg = update_algorithm_cfg(algorithm_cfg, cfg) cfg.num_agents = 1 client = [] # Connect to Unreal Engine and get the drone handle: client client, old_posit, initZ = connect_drone(ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents, client=client) initial_pos = old_posit.copy() # Load the initial positions for the environment reset_array, reset_array_raw, level_name, crash_threshold = initial_positions( cfg.env_name, initZ, cfg.num_agents) # Initialize System Handlers process = psutil.Process(getpid()) # Load PyGame Screen screen = pygame_connect(phase=cfg.mode) fig_z = [] fig_nav = [] debug = False # Generate path where the weights will be saved cfg, algorithm_cfg = save_network_path(cfg=cfg, algorithm_cfg=algorithm_cfg) current_state = {} new_state = {} posit = {} name_agent_list = [] data_tuple = {} agent = {} epi_num = {} if cfg.mode == 'train': iter = {} wait_for_others = {} if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA': print_orderly('global', 40) # Multiple agent acts as data collecter and one global learner global_agent = PedraAgent(algorithm_cfg, client, name='DQN', vehicle_name='global') for drone in range(cfg.num_agents): name_agent = "drone" + str(drone) wait_for_others[name_agent] = False iter[name_agent] = 1 epi_num[name_agent] = 1 data_tuple[name_agent] = [] name_agent_list.append(name_agent) print_orderly(name_agent, 40) # TODO: turn the neural network off if global agent is present agent[name_agent] = PedraAgent(algorithm_cfg, client, name='DQN', vehicle_name=name_agent) current_state[name_agent] = agent[name_agent].get_state() elif cfg.mode == 'infer': iter = 1 name_agent = 'drone0' name_agent_list.append(name_agent) agent[name_agent] = PedraAgent(algorithm_cfg, client, name=name_agent + 'DQN', vehicle_name=name_agent) env_cfg = read_cfg(config_filename=env_folder + 'config.cfg') nav_x = [] nav_y = [] altitude = {} altitude[name_agent] = [] p_z, f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav = initialize_infer( env_cfg=env_cfg, client=client, env_folder=env_folder) nav_text = ax_nav.text(0, 0, '') # Select initial position reset_to_initial(0, reset_array, client, vehicle_name=name_agent) old_posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) # Initialize variables # iter = 1 # num_collisions = 0 episode = {} active = True print_interval = 1 automate = True choose = False print_qval = False last_crash = {} ret = {} distance = {} num_collisions = {} level = {} level_state = {} level_posit = {} times_switch = {} last_crash_array = {} ret_array = {} distance_array = {} epi_env_array = {} log_files = {} # If the phase is inference force the num_agents to 1 hyphens = '-' * int((80 - len('Log files')) / 2) print(hyphens + ' ' + 'Log files' + ' ' + hyphens) for name_agent in name_agent_list: ret[name_agent] = 0 num_collisions[name_agent] = 0 last_crash[name_agent] = 0 level[name_agent] = 0 episode[name_agent] = 0 level_state[name_agent] = [None] * len(reset_array[name_agent]) level_posit[name_agent] = [None] * len(reset_array[name_agent]) times_switch[name_agent] = 0 last_crash_array[name_agent] = np.zeros(shape=len( reset_array[name_agent]), dtype=np.int32) ret_array[name_agent] = np.zeros(shape=len(reset_array[name_agent])) distance_array[name_agent] = np.zeros( shape=len(reset_array[name_agent])) epi_env_array[name_agent] = np.zeros(shape=len( reset_array[name_agent]), dtype=np.int32) distance[name_agent] = 0 # Log file log_path = algorithm_cfg.network_path + '/' + name_agent + '/' + cfg.mode + 'log.txt' print("Log path: ", log_path) log_files[name_agent] = open(log_path, 'w') # switch_env = False print_orderly('Simulation begins', 80) # save_posit = old_posit # last_crash_array = np.zeros(shape=len(level_name), dtype=np.int32) # ret_array = np.zeros(shape=len(level_name)) # distance_array = np.zeros(shape=len(level_name)) # epi_env_array = np.zeros(shape=len(level_name), dtype=np.int32) while active: try: active, automate, algorithm_cfg, client = check_user_input( active, automate, agent[name_agent], client, old_posit[name_agent], initZ, fig_z, fig_nav, env_folder, cfg, algorithm_cfg) if automate: if cfg.mode == 'train': if iter[name_agent] % algorithm_cfg.switch_env_steps == 0: switch_env = True else: switch_env = False for name_agent in name_agent_list: while not wait_for_others[name_agent]: start_time = time.time() if switch_env: posit1_old = client.simGetVehiclePose( vehicle_name=name_agent) times_switch[ name_agent] = times_switch[name_agent] + 1 level_state[name_agent][level[ name_agent]] = current_state[name_agent] level_posit[name_agent][ level[name_agent]] = posit1_old last_crash_array[name_agent][ level[name_agent]] = last_crash[name_agent] ret_array[name_agent][ level[name_agent]] = ret[name_agent] distance_array[name_agent][ level[name_agent]] = distance[name_agent] epi_env_array[name_agent][ level[name_agent]] = episode[name_agent] level[name_agent] = ( level[name_agent] + 1) % len( reset_array[name_agent]) print( name_agent + ' :Transferring to level: ', level[name_agent], ' - ', level_name[name_agent][level[name_agent]]) if times_switch[name_agent] < len( reset_array[name_agent]): reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) else: current_state[name_agent] = level_state[ name_agent][level[name_agent]] posit1_old = level_posit[name_agent][ level[name_agent]] reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) client.simSetVehiclePose( posit1_old, ignore_collison=True, vehicle_name=name_agent) time.sleep(0.1) last_crash[name_agent] = last_crash_array[ name_agent][level[name_agent]] ret[name_agent] = ret_array[name_agent][ level[name_agent]] distance[name_agent] = distance_array[ name_agent][level[name_agent]] episode[name_agent] = epi_env_array[ name_agent][int(level[name_agent] / 3)] # environ = environ^True else: # TODO: policy from one global agent: DONE if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA': agent_this_drone = global_agent else: agent_this_drone = agent[name_agent] action, action_type = policy_REINFORCE( current_state[name_agent], agent_this_drone) # print('Evaluated Policy') action_word = translate_action( action, algorithm_cfg.num_actions) # Take the action # agent[name_agent].take_action(action, algorithm_cfg.num_actions, SimMode=cfg.SimMode) agent[name_agent].take_action( action, algorithm_cfg.num_actions, Mode='static') # print('Took Action') new_state[name_agent] = agent[ name_agent].get_state() # print('got state') new_depth1, thresh = agent[ name_agent].get_CustomDepth(cfg) # print('Got state') # Get GPS information posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) position = posit[name_agent].position old_p = np.array([ old_posit[name_agent].position.x_val, old_posit[name_agent].position.y_val ]) new_p = np.array( [position.x_val, position.y_val]) # calculate distance distance[name_agent] = distance[ name_agent] + np.linalg.norm(new_p - old_p) old_posit[name_agent] = posit[name_agent] # print('calculated distance') reward, crash = agent[name_agent].reward_gen( new_depth1, action, crash_threshold, thresh, debug, cfg) ret[name_agent] = ret[name_agent] + reward agent_state = agent[name_agent].GetAgentState() # print('generated reward') if agent_state.has_collided or distance[ name_agent] < 0.01: num_collisions[ name_agent] = num_collisions[ name_agent] + 1 if agent_state.has_collided: print( 'Crash: Collision detected from environment' ) else: print( 'Crash: Collision detected from distance' ) crash = True reward = -1 data_tuple[name_agent].append([ current_state[name_agent], action, reward, crash ]) # Train if episode ends if crash: # Don't start another episode unless all agents have ended their episodes wait_for_others[name_agent] = True if distance[name_agent] < 0.01: # Drone won't move, reconnect print( 'Recovering from drone mobility issue' ) agent[ name_agent].client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents, client=client) agent[ name_agent].client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents, client=client) time.sleep(2) wait_for_others[name_agent] = False else: agent[ name_agent].network_model.log_to_tensorboard( tag='Return', group=name_agent, value=ret[name_agent], index=epi_num[name_agent]) agent[ name_agent].network_model.log_to_tensorboard( tag='Safe Flight', group=name_agent, value=distance[name_agent], index=epi_num[name_agent]) agent[ name_agent].network_model.log_to_tensorboard( tag='Episode Length', group=name_agent, value=len( data_tuple[name_agent]), index=epi_num[name_agent]) # Train episode train_REINFORCE( data_tuple[name_agent], algorithm_cfg.batch_size, agent_this_drone, algorithm_cfg.learning_rate, algorithm_cfg.input_size, algorithm_cfg.gamma, epi_num[name_agent]) c = agent_this_drone.network_model.get_vars( )[15] agent_this_drone.network_model.log_to_tensorboard( tag='weight', group=name_agent, value=c[0], index=epi_num[name_agent]) data_tuple[name_agent] = [] epi_num[name_agent] += 1 ret[name_agent] = 0 distance[name_agent] = 0 last_crash[name_agent] = 0 # print('train done, now reset') reset_to_initial( level[name_agent], reset_array, client, vehicle_name=name_agent) # time.sleep(0.2) current_state[name_agent] = agent[ name_agent].get_state() old_posit[ name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) if epi_num[name_agent] % 100 == 0: agent_this_drone.network_model.save_network( algorithm_cfg.network_path, epi_num[name_agent]) # if all are waiting for others, then we can reset. if all(wait_for_others.values()): # Now we can communicate weights print( 'Communicating the weights and averaging them' ) communicate_across_agents( agent, name_agent_list, algorithm_cfg) for n in name_agent_list: wait_for_others[n] = False else: current_state[name_agent] = new_state[ name_agent] time_exec = time.time() - start_time gpu_memory, gpu_utilization, sys_memory = get_SystemStats( process, cfg.NVIDIA_GPU) for i in range(0, len(gpu_memory)): tag_mem = 'GPU' + str(i) + '-Memory-GB' tag_util = 'GPU' + str(i) + 'Utilization-%' agent_this_drone.network_model.log_to_tensorboard( tag=tag_mem, group='SystemStats', value=gpu_memory[i], index=iter[name_agent]) agent_this_drone.network_model.log_to_tensorboard( tag=tag_util, group='SystemStats', value=gpu_utilization[i], index=iter[name_agent]) agent_this_drone.network_model.log_to_tensorboard( tag='Memory-GB', group='SystemStats', value=sys_memory, index=iter[name_agent]) s_log = '{:<6s} - Level {:>2d} - Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} lr: {:>1.8f} Ret = {:>+6.4f} Last Crash = {:<5d} t={:<1.3f} SF = {:<5.4f} Reward: {:<+1.4f} '.format( name_agent, int(level[name_agent]), iter[name_agent], epi_num[name_agent], action_word, action_type, algorithm_cfg.learning_rate, ret[name_agent], last_crash[name_agent], time_exec, distance[name_agent], reward) if iter[name_agent] % print_interval == 0: print(s_log) log_files[name_agent].write(s_log + '\n') last_crash[ name_agent] = last_crash[name_agent] + 1 if debug: cv2.imshow( name_agent, np.hstack( (np.squeeze( current_state[name_agent], axis=0), np.squeeze(new_state[name_agent], axis=0)))) cv2.waitKey(1) if epi_num[ name_agent] % algorithm_cfg.total_episodes == 0: print(automate) automate = False iter[name_agent] += 1 # if iter % algorithm_cfg.communication_interval == 0: # print('Communicating the weights and averaging them') # communicate_across_agents(agent, name_agent_list, algorithm_cfg) # iter += 1 elif cfg.mode == 'infer': # Inference phase agent_state = agent[name_agent].GetAgentState() if agent_state.has_collided: print('Drone collided') print("Total distance traveled: ", np.round(distance[name_agent], 2)) active = False client.moveByVelocityAsync( vx=0, vy=0, vz=0, duration=1, vehicle_name=name_agent).join() if nav_x: # Nav_x is empty if the drone collides in first iteration ax_nav.plot(nav_x.pop(), nav_y.pop(), 'r*', linewidth=20) file_path = env_folder + 'results/' fig_z.savefig(file_path + 'altitude_variation.png', dpi=500) fig_nav.savefig(file_path + 'navigation.png', dpi=500) close_env(env_process) print('Figures saved') else: posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) distance[name_agent] = distance[ name_agent] + np.linalg.norm( np.array([ old_posit[name_agent].position.x_val - posit[name_agent].position.x_val, old_posit[name_agent].position.y_val - posit[name_agent].position.y_val ])) # altitude[name_agent].append(-posit[name_agent].position.z_val+p_z) altitude[name_agent].append( -posit[name_agent].position.z_val - f_z) quat = (posit[name_agent].orientation.w_val, posit[name_agent].orientation.x_val, posit[name_agent].orientation.y_val, posit[name_agent].orientation.z_val) yaw = euler_from_quaternion(quat)[2] x_val = posit[name_agent].position.x_val y_val = posit[name_agent].position.y_val z_val = posit[name_agent].position.z_val nav_x.append(env_cfg.alpha * x_val + env_cfg.o_x) nav_y.append(env_cfg.alpha * y_val + env_cfg.o_y) nav.set_data(nav_x, nav_y) nav_text.remove() nav_text = ax_nav.text( 25, 55, 'Distance: ' + str(np.round(distance[name_agent], 2)), style='italic', bbox={ 'facecolor': 'white', 'alpha': 0.5 }) line_z.set_data(np.arange(len(altitude[name_agent])), altitude[name_agent]) ax_z.set_xlim(0, len(altitude[name_agent])) fig_z.canvas.draw() fig_z.canvas.flush_events() current_state[name_agent] = agent[ name_agent].get_state() action, action_type = policy_REINFORCE( current_state[name_agent], agent[name_agent]) action_word = translate_action( action, algorithm_cfg.num_actions) # Take continuous action # agent[name_agent].take_action(action, algorithm_cfg.num_actions, Mode='static') agent[name_agent].take_action( action, algorithm_cfg.num_actions, Mode='static') old_posit[name_agent] = posit[name_agent] # Verbose and log making s_log = 'Position = ({:<3.2f},{:<3.2f}, {:<3.2f}) Orientation={:<1.3f} Predicted Action: {:<8s} '.format( x_val, y_val, z_val, yaw, action_word) print(s_log) log_files[name_agent].write(s_log + '\n') except Exception as e: if str(e) == 'cannot reshape array of size 1 into shape (0,0,3)': print('Recovering from AirSim error') client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents, client=client) time.sleep(2) agent[name_agent].client = client wait_for_others[name_agent] = False else: print('------------- Error -------------') exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) print(exc_obj) automate = False print('Hit r and then backspace to start from this point')