def generate_observation_continuous(self): """ Generate observation of size N_aircraft x state_size""" minlat, maxlat = 50.75428888888889, 55. minlon, maxlon = 2., 7.216944444444445 destidx = navdb.getaptidx('EHAM') lat, lon = navdb.aptlat[destidx], navdb.aptlon[destidx] qdr, dist = qdrdist(traf.lat, traf.lon, lat*np.ones(traf.lat.shape), lon*np.ones(traf.lon.shape)) # qdr, dist = np.asarray(qdr)[0], np.asarray(dist)[0] self.qdr, self.dist = qdr, dist obs = np.array([traf.lat, traf.lon, traf.hdg, qdr, dist, traf.cas]).transpose() # Normalize input data to the range [-1, ,1] obs = np.array([normalize(traf.lat, minlat, maxlat), normalize(traf.lon, minlon, maxlon), normalize(traf.hdg, 0, 360), normalize(qdr+180, 0, 360), normalize(dist, 0, self.dist_scale), normalize(traf.cas, 80, 200)]).transpose() obs = np.array([normalize(traf.lat, minlat, maxlat), normalize(traf.lon, minlon, maxlon), degtoqdr180(traf.hdg, qdr)/180., normalize(dist, 0, self.dist_scale) ]).transpose() # print('obs', degtoqdr180(traf.hdg, qdr)/180.) return obs
def generate_shared_observation(self): """ Generate the shared observation sequences for the aircraft States are distance, heading, bearing, latitude longitude, (speed) The observation are Ntraf sequences of size (Ntraf - 1 , x) """ minlat, maxlat = 50.75428888888889, 55. minlon, maxlon = 2., 7.216944444444445 # Generate distance matrix qdr, dist = qdrdist_matrix(np.mat(traf.lat), np.mat(traf.lon), np.mat(traf.lat), np.mat(traf.lon)) qdr, dist = np.array(qdr), np.array(dist) shared_obs = np.zeros((traf.ntraf, traf.ntraf, self.shared_state_size)) destidx = navdb.getaptidx('EHAM') lat, lon = navdb.aptlat[destidx], navdb.aptlon[destidx] qdr1, dist1 = qdrdist(traf.lat, traf.lon, lat * np.ones(traf.lat.shape), lon * np.ones(traf.lon.shape)) for i in range(traf.ntraf): # shared_obs = np.zeros((traf.ntraf - 1, self.shared_state_size)) shared_obs[i, :, 0] = normalize(dist[i,:], 0, 200) shared_obs[i, :, 1] = degtoqdr180(traf.hdg, qdr1)/180. # divide by 180 shared_obs[i, :, 2] = normalize(traf.lat, minlat, maxlat) shared_obs[i, :, 3] = normalize(traf.lon, minlon, maxlon) # if traf.ntraf==1: # shared_obs = np.zeros((traf.ntraf+1, traf.ntraf, self.shared_state_size)) # print('shard_obs', shared_obs.shape) return shared_obs
def load_waypoints(self): wpts = np.loadtxt('plugins/ml/routes/testroute.txt') rows = wpts.shape[0] cols = wpts.shape[1] wpts = wpts.reshape((int(rows * cols / 2), 2)) destidx = navdb.getaptidx('EHAM') lat, lon = navdb.aptlat[destidx], navdb.aptlon[destidx] wpts = np.vstack((wpts, np.array([lat,lon]))) wpts = norm(wpts, axis=0) eham = wpts[-1,:] wpts = wpts[:-1,:].reshape((rows, cols)) wp_db = dict() i = 0 for j in range(wpts.shape[0]): for k in range(int(wpts.shape[1] / 2)): # stack.stack('DEFWPT RL{}{}{}, {}, {}, FIX'.format(i, j, k, wpts[j,k*2], wpts[j,k*2+1])) wp_db['RL{}{}{}'.format(i, j, k)] = [wpts[j, k * 2], wpts[j, k * 2 + 1]] wp_db['EHAM'] = [eham[0], eham[1]] wp_db['dummy']=[0,0] return wp_db
def act_continuous(self, state): state_copy = state.copy() # No action should be taken if there are no aircraft, otherwise the program crashes. # n_aircraft = int(np.prod(obs.shape) / self.state_size) if type(state)==list: n_aircraft = int(np.prod(state[0].shape) / self.state_size) if len(state_copy[0].shape)==2: state_copy[0] = np.expand_dims(state_copy[0], axis=0) if len(state_copy[1].shape)==2: state_copy[1] = np.expand_dims(state_copy[1], axis=0) obs = state_copy[0] state_copy[1] = state[1].reshape(1, n_aircraft * n_aircraft, state[1].shape[-1]) else: n_aircraft = int(np.prod(state.shape)/self.state_size) if len(state_copy.shape)==2: state_copy = np.expand_dims(state_copy, axis=0) obs = state_copy if n_aircraft==0: return # state[0] = state[0].reshape(1, state[0].shape[0], state[0].shape[1]) # state[0] = self.pad_zeros(state[0], self.max_agents).reshape((1, self.max_agents, self.state_size)) # state[1] = self.pad_nines(state[1], self.max_agents).reshape((1, self.max_agents, self.max_agents-1, self.shared_obs_size)) # state_copy = state.copy() # state_copy[0] = state[0].reshape(1, state[0].shape[0], state[0].shape[1]) # if n_aircraft==1: # state_copy[1] = state[1].reshape(1, n_aircraft, state[1].shape[-1]) # else: # state_copy[1] = state[1].reshape(1, n_aircraft*(n_aircraft-1), state[1].shape[-1]) # state_copy[1] = state[1].reshape(1, n_aircraft * (n_aircraft - 1), state[1].shape[-1]) self.action = self.actor.predict(state_copy) # state[0] = state[0].reshape(1, state[0].shape[0], state[0].shape[1]) # data = state # model = self.target.model # print_intermediate_layer_output(model, data, 'merged_mask') # print_intermediate_layer_output(model, data, 'pre_brnn') # print_intermediate_layer_output(model, data, 'brnn') # print_intermediate_layer_output(model, data, 'post_brnn') qdr, dist = qdrdist(traf.lat, traf.lon, traf.actwp.lat, traf.actwp.lon) # [deg][nm]) # check which aircraft have reached their destination by checkwaypoint type = 3 (destination) and the relative # heading to this waypoint is exceeding 150 dest = np.asarray([traf.ap.route[i].wptype[traf.ap.route[i].iactwp] for i in range(traf.ntraf)]) == 3 away = np.abs(degto180(traf.trk % 360. - qdr % 360.)) > 100. d = dist < 2 done_idx = np.where(away * dest * d)[0] tas = np.delete(traf.tas, done_idx, 0) bank = np.delete(traf.bank, done_idx, 0) hdg = np.delete(traf.hdg, done_idx, 0) traflat = np.delete(traf.lat, done_idx, 0) traflon = np.delete(traf.lon, done_idx, 0) # data = [state[0], state[1], self.action] # model = self.critic.model # print_intermediate_layer_output(model, data, 'input_actions') # print_intermediate_layer_output(model, data, 'max_pool') # print_intermediate_layer_output(model, data, 'concatenate_inputs') # print_intermediate_layer_output(model, data, 'input_mask') # print_intermediate_layer_output(model, data, 'pre_brnn') # print_intermediate_layer_output(model, data, 'brnn') # print_intermediate_layer_output(model, data, 'post_brnn') # Exploration noise is added only when no test episode is running # print('OU', self.OU()) annealing_factor = 1. if self.summary_counter > 1000: annealing_factor = 1 - 0.0003 * self.summary_counter annealing_factor = np.max([annealing_factor, 0.15]) noise = self.OU()[0:n_aircraft].reshape(self.action.shape) # print(not self.summary_counter % CONF.test_freq, not self.train_indicator) # print('action', self.action) # print('noise', noise) # if not self.summary_counter % CONF.test_freq == 0 or not self.train_indicator: if self.train_indicator: if not self.summary_counter % CONF.test_freq == 0: # Add exploration noise and clip to range [-1, 1] for action space self.action = self.action + noise * annealing_factor # print('noise', noise, self.action) self.action = np.maximum(-1*np.ones(self.action.shape), self.action) self.action = np.minimum(np.ones(self.action.shape), self.action) # print('action', self.action) # Apply Bell curve here to sample from to get action values # mu = 0 # sigma = 0.5 # y_offset = 0.107982 + 6.69737e-08 # action = bell_curve(self.action[0], mu=mu, sigma=sigma, y_offset=y_offset, scaled=True) dist_limit = 5 # nm # dist = state[0][:,:, 4] * 110 """ dist = (obs[:,:,4]+1)/2 * 110 # denormalize # dist = dist.reshape(action.shape) mul_factor = 80*np.ones(dist.shape) """ # Compute multiplier for angle difference in action command turnrad = np.square(tas) / (np.maximum(0.01 * np.ones(tas.shape), np.tan(bank)) * g0) # [m] max_angle = (tas * CONF.update_interval) / (2 * np.pi * turnrad) * 360 dheading = self.action.reshape(max_angle.shape) * max_angle destidx = navdb.getaptidx('EHAM') lat, lon = navdb.aptlat[destidx], navdb.aptlon[destidx] qdr, dist = qdrdist(traflat, traflon, lat*np.ones(traflat.shape), lon*np.ones(traflon.shape)) # qdr, dist = np.asarray(qdr)[0], np.asarray(dist)[0] # print('qdr', qdr) qdr = (qdr+360)%360 # qdr = degtoqdr180(traf.hdg + dheading, qdr) heading = degtoqdr180(hdg + dheading, qdr) # traf.hdg + dheading + 360 # heading = np.maximum(-90*np.ones(heading.shape), heading) # heading = np.minimum(+90*np.ones(heading.shape), heading) # print("maxa {},\n dhdg {},\n qdr {},\n, , \n hdg1 {}\n, hdg2{}, \ntarget_hdg\n{}".format(max_angle, dheading, qdr, traf.hdg, heading, qdr180todeg(heading, qdr))) heading = qdr180todeg(heading, qdr) # print('HEADING', heading) return heading.ravel()[0:n_aircraft] wheredist = np.where(dist<dist_limit) mul_factor[wheredist] = dist[wheredist] / dist_limit * 80 minus = np.where(self.action.reshape(self.action.shape[1])<0)[0] plus = np.where(self.action.reshape(self.action.shape[1])>=0)[0] # dheading = np.zeros(action.shape) # dheading[minus] = (action[minus] - 1) * mul_factor[minus] # dheading[plus] = np.abs(action[plus]-1) * mul_factor[plus] # action[minus] = -1*action[minus] # qdr = state[0][:,:,3].transpose()*360-180 # Denormalize qdr = obs[:,:,3].transpose() * 180 #- 180 dheading = self.action * mul_factor.reshape(self.action.shape) # dheading = 90*np.ones(dheading.shape) # print('heading', dheading, self.action) heading = qdr + dheading print(qdr, dheading, heading, mul_factor, wheredist, dist) # print('action', self.action.ravel()) return heading.ravel()[0:n_aircraft]
def calc_state(): # Required information: # traf.lat # traf.lon # traf.hdg # traf.id # latlong eham # multi agents obs: destination ID, lat, long, hdg, dist_wpt, hdg_wpt, dist_plane1, hdg_plane1, dist_plane2, hdg_plane2 (nm/deg) # Determine lat long for destination set # -3 = EHDL # -2 = EHEH # -1 = EHAM if settings.multi_destination: # lat and long reference list: # NAME [EHAM 18R, EHAM 09, EHAM 27, EHGR 28, EHGR 20, EHDL 02, EHDL 20] # IDX [ 0 , 1 , 2 , 3 , 4 , 5 , 6 ] # -IDX [ -7 , -6 , -5 , -4 , -3 , -2 , -1 ] # DEST translation # [ 1 , 2 , 3 , 4 , 5 , 6 , 7 ] # for dest_idx in range(apt_list): # lat, lon = zip(*cone_centre[dest_idx]) # dest_idx_list = [navdb.getaptidx('EHDL'), navdb.getaptidx('EHEH'), navdb.getaptidx('EHAM')] # lat_dest_list = navdb.aptlat[dest_idx_list] # lon_dest_list = navdb.aptlon[dest_idx_list] lat_dest_list = np.asarray(lat_cone_list) lon_dest_list = np.asarray(lon_cone_list) else: dest_idx_list = navdb.getaptidx('EHAM') lat_dest_list = navdb.aptlat[dest_idx_list] lon_dest_list = navdb.aptlon[dest_idx_list] # Retrieve latitude and longitude, and add destination lat/long to the end of the list for distance # and qdr calculation. # lat_list = np.append(traf.lat, lat_eham) # lon_list = np.append(traf.lon, lon_eham) lat_list = np.append(traf.lat, lat_dest_list) lon_list = np.append(traf.lon, lon_dest_list) traf_id_list = np.tile(traf.id, (len(traf.lon), 1)) qdr, dist = tools.geo.kwikqdrdist_matrix(np.asmatrix(lat_list), np.asmatrix(lon_list), np.asmatrix(lat_list), np.asmatrix(lon_list)) qdr = degto180(qdr) traf_idx = np.arange(len(traf.id)) dist_destination = dist[traf_idx, -traf.dest_temp] qdr_destination = qdr[traf_idx, -traf.dest_temp] # # Reshape into; Rows = each aircraft, columns are states. OLD # obs_matrix_first = np.concatenate([traf.lat.reshape(-1, 1), traf.lon.reshape(-1, 1), traf.hdg.reshape(-1, 1), # np.asarray(dist[:-1, -1]), np.asarray(qdr[:-1, -1])], axis=1) # Reshape into; Rows = each aircraft, columns are states obs_matrix_first = np.concatenate([ traf.dest_temp.reshape(-1, 1), traf.lat.reshape(-1, 1), traf.lon.reshape(-1, 1), traf.hdg.reshape(-1, 1), np.asarray(dist_destination.reshape(-1, 1)), np.asarray(qdr_destination.reshape(-1, 1)) ], axis=1) # Remove last entry, that is distance calculated to destination. if settings.multi_destination: dist = np.asarray(dist[:-7, :-7]) qdr = np.asarray(qdr[:-7, :-7]) else: dist = np.asarray(dist[:-1, :-1]) qdr = np.asarray(qdr[:-1, :-1]) # Sort value's on clostest distance. sort_idx = np.argsort(dist, axis=1) dist = np.take_along_axis(dist, sort_idx, axis=1) qdr = np.take_along_axis(qdr, sort_idx, axis=1) traf_id_list_sort = np.take_along_axis(traf_id_list, sort_idx, axis=1) traf_send_flag = True # Determine amount of current aircraft and neighbours. n_ac_neighbours = np.size(dist, axis=1) - 1 n_ac_current = np.size(dist, axis=0) if n_ac_neighbours > 0 and settings.n_neighbours > 0: # Split up values to make [dist, qdr] stacks. dist = np.split(dist[:, 1:], np.size(dist[:, 1:], axis=1), axis=1) qdr = np.split(qdr[:, 1:], np.size(qdr[:, 1:], axis=1), axis=1) # traf_id_list_sort = traf_id_list_sort[:, 1:] # When current number of neighbours is lower than the amount set in settings, fill rest with dummy variables. if n_ac_neighbours < settings.n_neighbours: nr_fill = settings.n_neighbours - n_ac_neighbours # Dummy values fill_mask_dist = np.full((n_ac_current, 1), -1) #settings.max_dist fill_mask_qdr = np.full((n_ac_current, 1), -1) fill_mask = np.concatenate([fill_mask_dist, fill_mask_qdr], axis=1) comb_ac = np.hstack( [np.hstack([dist[i], qdr[i]]) for i in range(n_ac_neighbours)]) for i in range(nr_fill): comb_ac = np.hstack((comb_ac, fill_mask)) n_ac_neighbours_send = n_ac_neighbours # If there are still enough neighbouring aircraft, the state space can be made without the use of dummies. elif n_ac_neighbours >= settings.n_neighbours and settings.n_neighbours > 0: comb_ac = np.hstack([ np.hstack([dist[i], qdr[i]]) for i in range(settings.n_neighbours) ]) n_ac_neighbours_send = settings.n_neighbours traf_id_list_sort = traf_id_list_sort[:, :settings.n_neighbours + 1] # Combine S0 (lat, long, hdg, wpt dist, hdg dist) with other agent information. obs_matrix_first = np.concatenate([obs_matrix_first, comb_ac], axis=1) elif n_ac_neighbours == 0: # Dummy values nr_fill = settings.n_neighbours fill_mask_dist = np.full((n_ac_current, 1), -1) fill_mask_qdr = np.full((n_ac_current, 1), -1) fill_mask = np.concatenate([fill_mask_dist, fill_mask_qdr], axis=1) for i in range(nr_fill): obs_matrix_first = np.concatenate([obs_matrix_first, fill_mask], axis=1) n_ac_neighbours_send = n_ac_neighbours traf_id_list_sort = [] traf_send_flag = False if settings.n_neighbours == 0: n_ac_neighbours_send = settings.n_neighbours traf_id_list_sort = [] traf_send_flag = False # Create final observation dict, with corresponding traffic ID's. obs_c = dict(zip(traf.id, obs_matrix_first)) info = dict.fromkeys(traf.id, {}) # Remove check later. if not traf.id: print('ikzithier') n_ac_neighbours_send = 0 if traf_send_flag: for idx, keys in enumerate(traf.id): if keys == traf_id_list_sort[idx, 0]: info[keys] = {'sequence': list(traf_id_list_sort[idx])} else: raise ValueError("Info dict is wrongly constructed") return obs_c, n_ac_neighbours_send, info