def walk(self, handle, position, direction): possible_transitions = self.env.rail.get_transitions( *position, direction) num_transitions = fast_count_nonzero(possible_transitions) if num_transitions == 1: new_direction = fast_argmax(possible_transitions) new_position = get_new_position(position, new_direction) dist = self.env.distance_map.get()[handle, new_position[0], new_position[1], new_direction] return new_position, new_direction, dist, RailEnvActions.MOVE_FORWARD, possible_transitions else: min_distances = [] positions = [] directions = [] for new_direction in [(direction + i) % 4 for i in range(-1, 2)]: if possible_transitions[new_direction]: new_position = get_new_position(position, new_direction) min_distances.append( self.env.distance_map.get()[handle, new_position[0], new_position[1], new_direction]) positions.append(new_position) directions.append(new_direction) else: min_distances.append(np.inf) positions.append(None) directions.append(None) a = self.get_action(handle, min_distances) return positions[a], directions[a], min_distances[ a], a + 1, possible_transitions
def _explore(self, handle, new_position, new_direction, depth=0): has_opp_agent = 0 has_same_agent = 0 has_switch = 0 visited = [] # stop exploring (max_depth reached) if depth >= self.max_depth: return has_opp_agent, has_same_agent, has_switch, visited # max_explore_steps = 100 cnt = 0 while cnt < 100: cnt += 1 visited.append(new_position) opp_a = self.env.agent_positions[new_position] if opp_a != -1 and opp_a != handle: if self.env.agents[opp_a].direction != new_direction: # opp agent found has_opp_agent = 1 return has_opp_agent, has_same_agent, has_switch, visited else: has_same_agent = 1 return has_opp_agent, has_same_agent, has_switch, visited # convert one-hot encoding to 0,1,2,3 agents_on_switch, \ agents_near_to_switch, \ agents_near_to_switch_all, \ agents_on_switch_all = \ self.check_agent_decision(new_position, new_direction) if agents_near_to_switch: return has_opp_agent, has_same_agent, has_switch, visited possible_transitions = self.env.rail.get_transitions(*new_position, new_direction) if agents_on_switch: f = 0 for dir_loop in range(4): if possible_transitions[dir_loop] == 1: f += 1 hoa, hsa, hs, v = self._explore(handle, get_new_position(new_position, dir_loop), dir_loop, depth + 1) visited.append(v) has_opp_agent += hoa has_same_agent += hsa has_switch += hs f = max(f, 1.0) return has_opp_agent / f, has_same_agent / f, has_switch / f, visited else: new_direction = fast_argmax(possible_transitions) new_position = get_new_position(new_position, new_direction) return has_opp_agent, has_same_agent, has_switch, visited
def _explore(self, handle, new_position, new_direction, distance_map, depth=0): has_opp_agent = 0 has_same_agent = 0 has_target = 0 has_opp_target = 0 visited = [] min_dist = distance_map[handle, new_position[0], new_position[1], new_direction] # stop exploring (max_depth reached) if depth >= self.max_depth: return has_opp_agent, has_same_agent, has_target, has_opp_target, visited, min_dist # max_explore_steps = 100 -> just to ensure that the exploration ends cnt = 0 while cnt < 100: cnt += 1 visited.append(new_position) opp_a = self.env.agent_positions[new_position] if opp_a != -1 and opp_a != handle: if self.env.agents[opp_a].direction != new_direction: # opp agent found -> stop exploring. This would be a strong signal. has_opp_agent = 1 return has_opp_agent, has_same_agent, has_target, has_opp_target, visited, min_dist else: # same agent found # the agent can follow the agent, because this agent is still moving ahead and there shouldn't # be any dead-lock nor other issue -> agent is just walking -> if other agent has a deadlock # this should be avoided by other agents -> one edge case would be when other agent has it's # target on this branch -> thus the agents should scan further whether there will be an opposite # agent walking on same track has_same_agent = 1 # !NOT stop exploring! return has_opp_agent, has_same_agent, has_target, has_opp_target, visited, min_dist # agents_on_switch == TRUE -> Current cell is a switch where the agent can decide (branch) in exploration # agent_near_to_switch == TRUE -> One cell before the switch, where the agent can decide # agents_on_switch, agents_near_to_switch, _, _ = \ self.agent_can_choose_helper.check_agent_decision(new_position, new_direction) if agents_near_to_switch: # The exploration was walking on a path where the agent can not decide # Best option would be MOVE_FORWARD -> Skip exploring - just walking return has_opp_agent, has_same_agent, has_target, has_opp_target, visited, min_dist if self.env.agents[handle].target in self.agents_target: has_opp_target = 1 if self.env.agents[handle].target == new_position: has_target = 1 return has_opp_agent, has_same_agent, has_target, has_opp_target, visited, min_dist possible_transitions = self.env.rail.get_transitions(*new_position, new_direction) if agents_on_switch: orientation = new_direction possible_transitions_nonzero = fast_count_nonzero(possible_transitions) if possible_transitions_nonzero == 1: orientation = fast_argmax(possible_transitions) for dir_loop, branch_direction in enumerate( [(orientation + dir_loop) % 4 for dir_loop in range(-1, 3)]): # branch the exploration path and aggregate the found information # --- OPEN RESEARCH QUESTION ---> is this good or shall we use full detailed information as # we did in the TreeObservation (FLATLAND) ? if possible_transitions[dir_loop] == 1: hoa, hsa, ht, hot, v, m_dist = self._explore(handle, get_new_position(new_position, dir_loop), dir_loop, distance_map, depth + 1) visited.append(v) has_opp_agent = max(hoa, has_opp_agent) has_same_agent = max(hsa, has_same_agent) has_target = max(has_target, ht) has_opp_target = max(has_opp_target, hot) min_dist = min(min_dist, m_dist) return has_opp_agent, has_same_agent, has_target, has_opp_target, visited, min_dist else: new_direction = fast_argmax(possible_transitions) new_position = get_new_position(new_position, new_direction) min_dist = min(min_dist, distance_map[handle, new_position[0], new_position[1], new_direction]) return has_opp_agent, has_same_agent, has_target, has_opp_target, visited, min_dist
def get(self, handle: int = 0): # all values are [0,1] # observation[0] : 1 path towards target (direction 0) / otherwise 0 -> path is longer or there is no path # observation[1] : 1 path towards target (direction 1) / otherwise 0 -> path is longer or there is no path # observation[2] : 1 path towards target (direction 2) / otherwise 0 -> path is longer or there is no path # observation[3] : 1 path towards target (direction 3) / otherwise 0 -> path is longer or there is no path # observation[4] : int(agent.status == RailAgentStatus.READY_TO_DEPART) # observation[5] : int(agent.status == RailAgentStatus.ACTIVE) # observation[6] : int(agent.status == RailAgentStatus.DONE or agent.status == RailAgentStatus.DONE_REMOVED) # observation[7] : current agent is located at a switch, where it can take a routing decision # observation[8] : current agent is located at a cell, where it has to take a stop-or-go decision # observation[9] : current agent is located one step before/after a switch # observation[10] : 1 if there is a path (track/branch) otherwise 0 (direction 0) # observation[11] : 1 if there is a path (track/branch) otherwise 0 (direction 1) # observation[12] : 1 if there is a path (track/branch) otherwise 0 (direction 2) # observation[13] : 1 if there is a path (track/branch) otherwise 0 (direction 3) # observation[14] : If there is a path with step (direction 0) and there is a agent with opposite direction -> 1 # observation[15] : If there is a path with step (direction 1) and there is a agent with opposite direction -> 1 # observation[16] : If there is a path with step (direction 2) and there is a agent with opposite direction -> 1 # observation[17] : If there is a path with step (direction 3) and there is a agent with opposite direction -> 1 # observation[18] : If there is a path with step (direction 0) and there is a agent with same direction -> 1 # observation[19] : If there is a path with step (direction 1) and there is a agent with same direction -> 1 # observation[20] : If there is a path with step (direction 2) and there is a agent with same direction -> 1 # observation[21] : If there is a path with step (direction 3) and there is a agent with same direction -> 1 # observation[22] : If there is a switch on the path which agent can not use -> 1 # observation[23] : If there is a switch on the path which agent can not use -> 1 # observation[24] : If there is a switch on the path which agent can not use -> 1 # observation[25] : If there is a switch on the path which agent can not use -> 1 observation = np.zeros(self.observation_dim) visited = [] agent = self.env.agents[handle] agent_done = False if agent.status == RailAgentStatus.READY_TO_DEPART: agent_virtual_position = agent.initial_position observation[4] = 1 elif agent.status == RailAgentStatus.ACTIVE: agent_virtual_position = agent.position observation[5] = 1 else: observation[6] = 1 agent_virtual_position = (-1, -1) agent_done = True if not agent_done: visited.append(agent_virtual_position) distance_map = self.env.distance_map.get() current_cell_dist = distance_map[handle, agent_virtual_position[0], agent_virtual_position[1], agent.direction] possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction) orientation = agent.direction if fast_count_nonzero(possible_transitions) == 1: orientation = fast_argmax(possible_transitions) for dir_loop, branch_direction in enumerate([(orientation + dir_loop) % 4 for dir_loop in range(-1, 3)]): if possible_transitions[branch_direction]: new_position = get_new_position(agent_virtual_position, branch_direction) new_cell_dist = distance_map[handle, new_position[0], new_position[1], branch_direction] if not (np.math.isinf(new_cell_dist) and np.math.isinf(current_cell_dist)): observation[dir_loop] = int(new_cell_dist < current_cell_dist) has_opp_agent, has_same_agent, has_target, has_opp_target, v, min_dist = self._explore(handle, new_position, branch_direction, distance_map) visited.append(v) if not (np.math.isinf(min_dist) and np.math.isinf(current_cell_dist)): observation[11 + dir_loop] = int(min_dist < current_cell_dist) observation[15 + dir_loop] = has_opp_agent observation[19 + dir_loop] = has_same_agent observation[23 + dir_loop] = has_target observation[27 + dir_loop] = has_opp_target agents_on_switch, \ agents_near_to_switch, \ agents_near_to_switch_all, \ agents_on_switch_all = \ self.agent_can_choose_helper.check_agent_decision(agent_virtual_position, agent.direction) observation[7] = int(agents_on_switch) observation[8] = int(agents_on_switch_all) observation[9] = int(agents_near_to_switch) observation[10] = int(agents_near_to_switch_all) action = self.dead_lock_avoidance_agent.act(handle, None, eps=0) observation[30] = action == RailEnvActions.DO_NOTHING observation[31] = action == RailEnvActions.MOVE_LEFT observation[32] = action == RailEnvActions.MOVE_FORWARD observation[33] = action == RailEnvActions.MOVE_RIGHT observation[34] = action == RailEnvActions.STOP_MOVING self.env.dev_obs_dict.update({handle: visited}) observation[np.isinf(observation)] = -1 observation[np.isnan(observation)] = -1 return observation