def walk(self, handle, position, direction): possible_transitions = self.env.rail.get_transitions( *position, direction) num_transitions = fast_count_nonzero(possible_transitions) if num_transitions == 1: new_direction = fast_argmax(possible_transitions) new_position = get_new_position(position, new_direction) dist = self.env.distance_map.get()[handle, new_position[0], new_position[1], new_direction] return new_position, new_direction, dist, RailEnvActions.MOVE_FORWARD, possible_transitions else: min_distances = [] positions = [] directions = [] for new_direction in [(direction + i) % 4 for i in range(-1, 2)]: if possible_transitions[new_direction]: new_position = get_new_position(position, new_direction) min_distances.append( self.env.distance_map.get()[handle, new_position[0], new_position[1], new_direction]) positions.append(new_position) directions.append(new_direction) else: min_distances.append(np.inf) positions.append(None) directions.append(None) a = self.get_action(handle, min_distances) return positions[a], directions[a], min_distances[ a], a + 1, possible_transitions
def fix_inner_nodes(grid_map: GridTransitionMap, inner_node_pos: IntVector2D, rail_trans: RailEnvTransitions): """ Fix inner city nodes by connecting it to its neighbouring parallel track :param grid_map: :param inner_node_pos: inner city node to fix :param rail_trans: :return: """ corner_directions = [] for direction in range(4): tmp_pos = get_new_position(inner_node_pos, direction) if grid_map.grid[tmp_pos] > 0: corner_directions.append(direction) if len(corner_directions) == 2: transition = 0 transition = rail_trans.set_transition(transition, mirror(corner_directions[0]), corner_directions[1], 1) transition = rail_trans.set_transition(transition, mirror(corner_directions[1]), corner_directions[0], 1) grid_map.grid[inner_node_pos] = transition tmp_pos = get_new_position(inner_node_pos, corner_directions[0]) transition = grid_map.grid[tmp_pos] transition = rail_trans.set_transition(transition, corner_directions[0], mirror(corner_directions[0]), 1) grid_map.grid[tmp_pos] = transition tmp_pos = get_new_position(inner_node_pos, corner_directions[1]) transition = grid_map.grid[tmp_pos] transition = rail_trans.set_transition(transition, corner_directions[1], mirror(corner_directions[1]), 1) grid_map.grid[tmp_pos] = transition return
def reset(self): self.target_positions = {agent.target: 1 for agent in self.env.agents} self.edge_positions = defaultdict( list ) # (cell.position, direction) -> [(start, end, direction, distance)] self.edge_paths = defaultdict( list) # (node.position, direction) -> [(cell.position, direction)] # First, we find a node by starting at one of the agents and following the rails until we reach a junction agent = first(self.env.agents) position = agent.initial_position direction = agent.direction while not self.is_junction(position) and not self.is_target(position): direction = first( self.get_possible_transitions(position, direction)) position = get_new_position(position, direction) # Now we create a graph representation of the rail network, starting from this node transitions = self.get_all_transitions(position) root_nodes = { t: RailNode(position, t, self.is_target(position)) for t in transitions if t } self.graph = {(*position, d): root_nodes[t] for d, t in enumerate(transitions) if t} for transitions, node in root_nodes.items(): for direction in transitions: self.explore_branch(node, get_new_position(position, direction), direction)
def conflict(self, handle, pos, movement): conflict_handles = [a.handle for a in self.env.agents if pos == a.position and a.handle != handle] potential_conflicts = [] if len(conflict_handles) > 0: for conflict_handle in conflict_handles: other_direction = self.env.agents[conflict_handle].direction other_possible_moves = self.env.rail.get_transitions(*pos, other_direction) other_movement = np.argmax(other_possible_moves) own_possible_moves = self.env.rail.get_transitions(*pos, movement) own_movement = np.argmax(own_possible_moves) own_next_pos = get_new_position(pos, own_movement) other_next_pos = get_new_position(pos, other_movement) conflict = own_next_pos != other_next_pos if self._asserts: assert np.all(np.array(own_next_pos) > 0) assert np.all(np.array(other_next_pos) > 0) potential_conflicts.append(conflict) if conflict: self._conflict_map[handle].append(conflict_handle) return np.any(potential_conflicts)
def get_valid_move_actions_(agent_direction: Grid4TransitionsEnum, agent_position: Tuple[int, int], rail: GridTransitionMap) -> Set[RailEnvNextAction]: """ Get the valid move actions (forward, left, right) for an agent. TODO https://gitlab.aicrowd.com/flatland/flatland/issues/299 The implementation could probably be more efficient and more elegant. But given the few calls this has no priority now. Parameters ---------- agent_direction : Grid4TransitionsEnum agent_position: Tuple[int,int] rail : GridTransitionMap Returns ------- Set of `RailEnvNextAction` (tuples of (action,position,direction)) Possible move actions (forward,left,right) and the next position/direction they lead to. It is not checked that the next cell is free. """ valid_actions: Set[RailEnvNextAction] = OrderedSet() possible_transitions = rail.get_transitions(*agent_position, agent_direction) num_transitions = np.count_nonzero(possible_transitions) # Start from the current orientation, and see which transitions are available; # organize them as [left, forward, right], relative to the current orientation # If only one transition is possible, the forward branch is aligned with it. if rail.is_dead_end(agent_position): action = RailEnvActions.MOVE_FORWARD exit_direction = (agent_direction + 2) % 4 if possible_transitions[exit_direction]: new_position = get_new_position(agent_position, exit_direction) valid_actions.add( RailEnvNextAction(action, new_position, exit_direction)) elif num_transitions == 1: action = RailEnvActions.MOVE_FORWARD for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]: if possible_transitions[new_direction]: new_position = get_new_position(agent_position, new_direction) valid_actions.add( RailEnvNextAction(action, new_position, new_direction)) else: for new_direction in [(agent_direction + i) % 4 for i in range(-1, 2)]: if possible_transitions[new_direction]: if new_direction == agent_direction: action = RailEnvActions.MOVE_FORWARD elif new_direction == (agent_direction + 1) % 4: action = RailEnvActions.MOVE_RIGHT elif new_direction == (agent_direction - 1) % 4: action = RailEnvActions.MOVE_LEFT else: raise Exception("Illegal state") new_position = get_new_position(agent_position, new_direction) valid_actions.add( RailEnvNextAction(action, new_position, new_direction)) return valid_actions
def _explore(self, handle, new_position, new_direction, depth=0): has_opp_agent = 0 has_same_agent = 0 has_switch = 0 visited = [] # stop exploring (max_depth reached) if depth >= self.max_depth: return has_opp_agent, has_same_agent, has_switch, visited # max_explore_steps = 100 cnt = 0 while cnt < 100: cnt += 1 visited.append(new_position) opp_a = self.env.agent_positions[new_position] if opp_a != -1 and opp_a != handle: if self.env.agents[opp_a].direction != new_direction: # opp agent found has_opp_agent = 1 return has_opp_agent, has_same_agent, has_switch, visited else: has_same_agent = 1 return has_opp_agent, has_same_agent, has_switch, visited # convert one-hot encoding to 0,1,2,3 agents_on_switch, \ agents_near_to_switch, \ agents_near_to_switch_all, \ agents_on_switch_all = \ self.check_agent_decision(new_position, new_direction) if agents_near_to_switch: return has_opp_agent, has_same_agent, has_switch, visited possible_transitions = self.env.rail.get_transitions(*new_position, new_direction) if agents_on_switch: f = 0 for dir_loop in range(4): if possible_transitions[dir_loop] == 1: f += 1 hoa, hsa, hs, v = self._explore(handle, get_new_position(new_position, dir_loop), dir_loop, depth + 1) visited.append(v) has_opp_agent += hoa has_same_agent += hsa has_switch += hs f = max(f, 1.0) return has_opp_agent / f, has_same_agent / f, has_switch / f, visited else: new_direction = fast_argmax(possible_transitions) new_position = get_new_position(new_position, new_direction) return has_opp_agent, has_same_agent, has_switch, visited
def get(self, handle: int = 0): self.env: RailEnv = self.env agent = self.env.agents[handle] if agent.status == RailAgentStatus.READY_TO_DEPART: agent_virtual_position = agent.initial_position elif agent.status == RailAgentStatus.ACTIVE: agent_virtual_position = agent.position elif agent.status == RailAgentStatus.DONE: agent_virtual_position = agent.target else: return None possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction) distance_map = self.env.distance_map.get() nan_inf_mask = ((distance_map != np.inf) * (np.abs(np.isnan(distance_map) - 1))).astype(np.bool) max_distance = np.max(distance_map[nan_inf_mask]) assert not np.isnan(max_distance) assert max_distance != np.inf possible_steps = [] # look in all directions for possible moves for movement in self._directions: if possible_transitions[movement]: next_move = movement pos = get_new_position(agent_virtual_position, movement) distance = distance_map[agent.handle][pos + (movement,)] distance = max_distance if ( distance == np.inf or np.isnan(distance)) else distance conflict = self.conflict(handle, pos, movement) next_possible_moves = self.env.rail.get_transitions(*pos, movement) while np.count_nonzero(next_possible_moves) == 1 and not conflict: movement = np.argmax(next_possible_moves) pos = get_new_position(pos, movement) conflict = self.conflict(handle, pos, movement) next_possible_moves = self.env.rail.get_transitions(*pos, movement) if self._encode_one_hot: next_move_one_hot = np.zeros(len(self._directions)) next_move_one_hot[next_move] = 1 next_move = next_move_one_hot possible_steps.append((next_move, [distance / max_distance], [int(conflict)])) possible_steps = sorted(possible_steps, key=lambda step: step[1]) obs = np.full(self._path_size * 2, fill_value=0) for i, path in enumerate(possible_steps): obs[i * self._path_size:self._path_size * (i + 1)] = np.concatenate([arr for arr in path]) priority = 0. return np.concatenate([obs, [priority, agent.status.value != RailAgentStatus.READY_TO_DEPART]])
def TL_detector(env, obs, actions): obs_paths = {} for idx, agent in enumerate(env.agents): if agent.position is None: continue new_direction, transition_valid = env.check_action(agent, actions[idx]) new_position = get_new_position(agent.position, new_direction) transition_bit = bin(env.rail.get_full_transitions(*new_position)) total_transitions = transition_bit.count("1") if total_transitions == 4: agent_obs_path = copy.deepcopy(obs[idx]) transformer.clip_tree_for_shortest_path(agent_obs_path) agent_obs_path = transformer.transform_agent_observation( agent_obs_path) agent_obs_path = transformer.split_node_list( agent_obs_path, env.obs_builder.branches) agent_obs_path = transformer.filter_agent_obs(agent_obs_path) print("agent") else: agent_obs_path = None obs_paths[idx] = agent_obs_path return obs_paths
def naive_solver(env, obs): actions = {} for idx, agent in enumerate(env.agents): try: possible_transitions = env.rail.get_transitions( *agent.position, agent.direction) except: possible_transitions = env.rail.get_transitions( *agent.initial_position, agent.direction) num_transitions = np.count_nonzero(possible_transitions) if num_transitions == 1: actions[idx] = 2 else: min_distances = [] for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]: if possible_transitions[direction]: new_position = get_new_position(agent.position, direction) min_distances.append( env.distance_map.get()[idx, new_position[0], new_position[1], direction]) else: min_distances.append(np.inf) actions[idx] = np.argmin(min_distances) + 1 return actions
def find_alternative(env, possible_transitions, agent_pos, agent_dir, prediction): # Approccio naive - se non mi trovo su un fork mi blocco # altrimenti si potrebbe far ricalcolare uno shortestpath che non consideri il binario su cui si trova il treno che confligge possible_directions = [] neighbours = [] for j, branch_direction in enumerate([(agent_dir + j) % 4 for j in range(-1, 3)]): if possible_transitions[branch_direction]: possible_directions.append(branch_direction) for direction in possible_directions: neighbour_cell = get_new_position(agent_pos, direction) new_direction = get_direction(pos1=agent_pos, pos2=neighbour_cell) neighbours.append((neighbour_cell, new_direction)) # Compute all possible moves except the ones of the shortest path next_cell = (prediction[0][0], prediction[0][1]) neighbours = [n for n in neighbours if next_cell not in n] actions = [ get_action_for_move(agent_pos, agent_dir, n[0], n[1], env.rail) for n in neighbours ] return actions
def get(self, handle: int = 0): self.env: RailEnv = self.env agent = self.env.agents[handle] if agent.status == RailAgentStatus.READY_TO_DEPART: agent_virtual_position = agent.initial_position elif agent.status == RailAgentStatus.ACTIVE: agent_virtual_position = agent.position elif agent.status == RailAgentStatus.DONE: agent_virtual_position = agent.target else: return None possible_transitions = self.env.rail.get_transitions( *agent_virtual_position, agent.direction) distance_map = self.env.distance_map.get() nan_inf_mask = ((distance_map != np.inf) * (np.abs(np.isnan(distance_map) - 1))).astype(np.bool) max_distance = np.max(distance_map[nan_inf_mask]) assert not np.isnan(max_distance) assert max_distance != np.inf possible_steps = [] # look in all directions for possible moves for movement in self._directions: if possible_transitions[movement]: next_move = movement pos = get_new_position(agent_virtual_position, movement) distance = distance_map[agent.handle][pos + (movement, )] distance = max_distance if ( distance == np.inf or np.isnan(distance)) else distance cell_transitions = self.env.rail.get_transitions( *pos, movement) _, ch = self.detect_conflicts( 1, np.reciprocal(agent.speed_data["speed"]), pos, cell_transitions, handle, movement) conflict = ch is not None if conflict and len(possible_steps) == 0: self._shortest_path_conflict_map[handle].append(ch) elif conflict: self._other_path_conflict_map[handle].append(ch) if self._encode_one_hot: next_move_one_hot = np.zeros(len(self._directions)) next_move_one_hot[next_move] = 1 next_move = next_move_one_hot possible_steps.append( (next_move, [distance / max_distance], [int(conflict)], [int(not conflict)])) # priority field possible_steps = sorted(possible_steps, key=lambda step: step[1]) obs = np.full(self._path_size * 2, fill_value=0) for i, path in enumerate(possible_steps): obs[i * self._path_size:self._path_size * (i + 1)] = np.concatenate([arr for arr in path]) return obs, int(agent.status.value != RailAgentStatus.READY_TO_DEPART)
def find_safe_edges(self, env): for idx, agent in enumerate(env.agents): current_pos = agent.position current_dir = agent.direction if current_pos is None: current_pos = agent.initial_position self.add_to_safe_map(current_pos, current_dir) while not current_pos == agent.target: possible_transitions = env.rail.get_transitions( *current_pos, current_dir) min_distances = [] min_distance = None for direction in [(current_dir + i) % 4 for i in range(-1, 2)]: if possible_transitions[direction]: new_position = get_new_position(current_pos, direction) min_distances.append( env.distance_map.get()[idx, new_position[0], new_position[1], direction]) if min_distance is None or min_distance > min_distances[ -1]: min_distance = min_distances[-1] next_pos = new_position else: min_distances.append(np.inf) current_dir = find_new_direction(current_dir, np.argmin(min_distances) + 1) current_pos = next_pos self.add_to_safe_map(current_pos, current_dir)
def get_shortest_path_position(self, position, direction, handle): distance_map = self.env.distance_map.get() nan_inf_mask = ((distance_map != np.inf) * (np.abs(np.isnan(distance_map) - 1))).astype(np.bool) max_dist = np.max(self.env.distance_map.get()[nan_inf_mask]) possible_transitions = self.env.rail.get_transitions( *position, direction) min_dist = np.inf sp_move = None sp_pos = None for movement in self._directions: if possible_transitions[movement]: pos = get_new_position(position, movement) distance = self.env.distance_map.get()[handle][pos + (movement, )] distance = max_dist if (distance == np.inf or np.isnan(distance)) else distance if distance <= min_dist: min_dist = distance sp_move = movement sp_pos = pos return sp_pos, sp_move
def find_all_cell_where_agent_can_choose(self): switches = {} for h in range(self.env.height): for w in range(self.env.width): pos = (h, w) for dir in range(4): possible_transitions = self.env.rail.get_transitions(*pos, dir) num_transitions = fast_count_nonzero(possible_transitions) if num_transitions > 1: if pos not in switches.keys(): switches.update({pos: [dir]}) else: switches[pos].append(dir) switches_neighbours = {} for h in range(self.env.height): for w in range(self.env.width): # look one step forward for dir in range(4): pos = (h, w) possible_transitions = self.env.rail.get_transitions(*pos, dir) for d in range(4): if possible_transitions[d] == 1: new_cell = get_new_position(pos, d) if new_cell in switches.keys() and pos not in switches.keys(): if pos not in switches_neighbours.keys(): switches_neighbours.update({pos: [dir]}) else: switches_neighbours[pos].append(dir) self.switches = switches self.switches_neighbours = switches_neighbours
def find_decision_cells(env): """ :param env: The RailEnv to inspect :return: A set containing decision cells, made by switches and their neighbors """ switches = [] switches_neighbors = [] directions = list(range(4)) for h in range(env.height): for w in range(env.width): pos = (h, w) is_switch = False # Check for switch counting the outgoing transition for orientation in directions: possible_transitions = env.rail.get_transitions(*pos, orientation) num_transitions = np.count_nonzero(possible_transitions) if num_transitions > 1: switches.append(pos) is_switch = True break if is_switch: # Add all neighbouring rails, if pos is a switch for orientation in directions: possible_transitions = env.rail.get_transitions(*pos, orientation) for movement in directions: if possible_transitions[movement]: switches_neighbors.append(get_new_position(pos, movement)) return set(switches).union(set(switches_neighbors))
def _find_all_decision_cells(self): switches = [] switches_neighbors = [] directions = list(range(4)) for h in range(self.unwrapped.rail_env.height): for w in range(self.unwrapped.rail_env.width): pos = (h, w) is_switch = False # Check for switch: if there is more than one outgoing transition for orientation in directions: possible_transitions = self.unwrapped.rail_env.rail.get_transitions( *pos, orientation) num_transitions = np.count_nonzero(possible_transitions) if num_transitions > 1: switches.append(pos) is_switch = True break if is_switch: # Add all neighbouring rails, if pos is a switch for orientation in directions: possible_transitions = self.unwrapped.rail_env.rail.get_transitions( *pos, orientation) for movement in directions: if possible_transitions[movement]: switches_neighbors.append( get_new_position(pos, movement)) decision_cells = switches + switches_neighbors return tuple(map(set, (switches, switches_neighbors, decision_cells)))
def check_path_exists(self, start: IntVector2DArray, direction: int, end: IntVector2DArray): """ Breath first search for a possible path from one node with a certain orientation to a target node. :param start: Start cell rom where we want to check the path :param direction: Start direction for the path we are testing :param end: Cell that we try to reach from the start cell :return: True if a path exists, False otherwise """ visited = OrderedSet() stack = [(start, direction)] while stack: node = stack.pop() node_position = node[0] node_direction = node[1] if Vec2d.is_equal(node_position, end): return True if node not in visited: visited.add(node) moves = self.get_transitions(node_position[0], node_position[1], node_direction) for move_index in range(4): if moves[move_index]: stack.append( (get_new_position(node_position, move_index), move_index)) return False
def get(self, handle: int = 0) -> List[int]: agent = self.env.agents[handle] if agent.status == RailAgentStatus.READY_TO_DEPART: agent_virtual_position = agent.initial_position elif agent.status == RailAgentStatus.ACTIVE: agent_virtual_position = agent.position elif agent.status == RailAgentStatus.DONE: agent_virtual_position = agent.target else: return None possible_transitions = self.env.rail.get_transitions(*agent_virtual_position, agent.direction) num_transitions = np.count_nonzero(possible_transitions) # Start from the current orientation, and see which transitions are available; # organize them as [left, forward, right], relative to the current orientation # If only one transition is possible, the forward branch is aligned with it. if num_transitions == 1: observation = [0, 1, 0] else: min_distances = [] for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]: if possible_transitions[direction]: new_position = get_new_position(agent_virtual_position, direction) min_distances.append( self.env.distance_map.get()[handle, new_position[0], new_position[1], direction]) else: min_distances.append(np.inf) observation = [0, 0, 0] observation[np.argmin(min_distances)] = 1 return observation
def get(self, handle: int = 0): self.env: RailEnv = self.env agent = self.env.agents[handle] if agent.status == RailAgentStatus.READY_TO_DEPART: agent_virtual_position = agent.initial_position elif agent.status == RailAgentStatus.ACTIVE: agent_virtual_position = agent.position elif agent.status == RailAgentStatus.DONE: agent_virtual_position = agent.target else: return None possible_transitions = self.env.rail.get_transitions( *agent_virtual_position, agent.direction) distance_map = self.env.distance_map.get() nan_inf_mask = ((distance_map != np.inf) * (np.abs(np.isnan(distance_map) - 1))).astype(np.bool) max_distance = np.max(distance_map[nan_inf_mask]) possible_paths = [] for movement in self._directions: if possible_transitions[movement]: pos = get_new_position(agent_virtual_position, movement) distance = distance_map[agent.handle][pos + (movement, )] distance = max_distance if ( distance == np.inf or np.isnan(distance)) else distance if handle in self._relevant_handles and np.count_nonzero(possible_transitions) > 1 \ and agent.status != RailAgentStatus.READY_TO_DEPART: conflict, malf = self.conflict_detector.detect_conflicts_multi( position=pos, direction=movement, handles=self._relevant_handles, break_after_first=False, agent=self.env.agents[handle]) malf = np.max(malf) if len(malf) > 0 else 0 else: conflict = [] malf = 0 possible_paths.append( np.array([ distance / max_distance, # normalized distance to target malf / self.env.malfunction_process_data.max_duration, len(set(conflict)) / self.env.get_num_agents(), int(len(conflict) > 0) ])) possible_steps = sorted(possible_paths, key=lambda path: path[1]) obs = np.full(self.path_size * 2, fill_value=0, dtype=np.float32) for i, path in enumerate(possible_steps): obs[i * self.path_size:self.path_size * (i + 1)] = path return obs
def explore_branch(self, node, position, direction): original_direction = direction edge_positions = {} distance = 1 # Explore until we find a junction while not self.is_junction(position) and not self.is_target(position): next_direction = first( self.get_possible_transitions(position, direction)) edge_positions[(*position, direction)] = (distance, next_direction) position = get_new_position(position, next_direction) direction = next_direction distance += 1 # Create any nodes that aren't in the graph yet transitions = self.get_all_transitions(position) nodes = { t: RailNode(position, t, self.is_target(position)) for d, t in enumerate(transitions) if t and (*position, d) not in self.graph } for d, t in enumerate(transitions): if t in nodes: self.graph[(*position, d)] = nodes[t] # Connect the previous node to the next one, and update self.edge_positions next_node = self.graph[(*position, direction)] node.edges[original_direction] = (next_node, distance) for key, (distance, next_direction) in edge_positions.items(): self.edge_positions[key].append( (node, next_node, original_direction, distance)) self.edge_paths[node.position, original_direction].append( (*key, next_direction)) # Call ourselves recursively since we're exploring depth-first for transitions, node in nodes.items(): for direction in transitions: self.explore_branch(node, get_new_position(position, direction), direction)
def _step_along_shortest_path(env, obs_builder, rail): actions = {} expected_next_position = {} for agent in env.agents: agent: EnvAgent shortest_distance = np.inf for exit_direction in range(4): neighbour = get_new_position(agent.position, exit_direction) if neighbour[0] >= 0 and neighbour[0] < env.height and neighbour[ 1] >= 0 and neighbour[1] < env.width: desired_movement_from_new_cell = (exit_direction + 2) % 4 # Check all possible transitions in new_cell for agent_orientation in range(4): # Is a transition along movement `entry_direction` to the neighbour possible? is_valid = obs_builder.env.rail.get_transition( (neighbour[0], neighbour[1], agent_orientation), desired_movement_from_new_cell) if is_valid: distance_to_target = obs_builder.env.distance_map.get( )[(agent.handle, *agent.position, exit_direction)] print("agent {} at {} facing {} taking {} distance {}". format(agent.handle, agent.position, agent.direction, exit_direction, distance_to_target)) if distance_to_target < shortest_distance: shortest_distance = distance_to_target actions_to_be_taken_when_facing_north = { Grid4TransitionsEnum.NORTH: RailEnvActions.MOVE_FORWARD, Grid4TransitionsEnum.EAST: RailEnvActions.MOVE_RIGHT, Grid4TransitionsEnum.WEST: RailEnvActions.MOVE_LEFT, Grid4TransitionsEnum.SOUTH: RailEnvActions.DO_NOTHING, } print(" improved (direction) -> {}".format( exit_direction)) actions[ agent. handle] = actions_to_be_taken_when_facing_north[ (exit_direction - agent.direction) % len(rail.transitions.get_direction_enum())] expected_next_position[agent.handle] = neighbour print(" improved (action) -> {}".format( actions[agent.handle])) _, rewards, dones, _ = env.step(actions) return rewards
def _get_and_update_neighbors(self, rail: GridTransitionMap, position, target_nr, current_distance, enforce_target_direction=-1): """ Utility function used by _distance_map_walker to perform a BFS walk over the rail, filling in the minimum distances from each target cell. """ neighbors = [] possible_directions = [0, 1, 2, 3] if enforce_target_direction >= 0: # The agent must land into the current cell with orientation `enforce_target_direction'. # This is only possible if the agent has arrived from the cell in the opposite direction! possible_directions = [(enforce_target_direction + 2) % 4] for neigh_direction in possible_directions: new_cell = get_new_position(position, neigh_direction) if new_cell[0] >= 0 and new_cell[0] < self.env_height and new_cell[ 1] >= 0 and new_cell[1] < self.env_width: desired_movement_from_new_cell = (neigh_direction + 2) % 4 # Check all possible transitions in new_cell for agent_orientation in range(4): # Is a transition along movement `desired_movement_from_new_cell' to the current cell possible? is_valid = rail.get_transition( (new_cell[0], new_cell[1], agent_orientation), desired_movement_from_new_cell) if is_valid: """ # TODO: check that it works with deadends! -- still bugged! movement = desired_movement_from_new_cell if isNextCellDeadEnd: movement = (desired_movement_from_new_cell+2) % 4 """ new_distance = min( self.distance_map[target_nr, new_cell[0], new_cell[1], agent_orientation], current_distance + 1) neighbors.append((new_cell[0], new_cell[1], agent_orientation, new_distance)) self.distance_map[target_nr, new_cell[0], new_cell[1], agent_orientation] = new_distance return neighbors
def _check_action_on_agent(self, action: RailEnvActions, agent: EnvAgent): """ Parameters ---------- action : RailEnvActions agent : EnvAgent Returns ------- bool Is it a legal move? 1) transition allows the new_direction in the cell, 2) the new cell is not empty (case 0), 3) the cell is free, i.e., no agent is currently in that cell """ # compute number of possible transitions in the current # cell used to check for invalid actions new_direction, transition_valid = self.check_action(agent, action) new_position = get_new_position(agent.position, new_direction) new_cell_valid = ( np.array_equal( # Check the new position is still in the grid new_position, np.clip(new_position, [0, 0], [self.height - 1, self.width - 1])) and # check the new position has some transitions (ie is not an empty cell) self.rail.get_full_transitions(*new_position) > 0) # If transition validity hasn't been checked yet. if transition_valid is None: transition_valid = self.rail.get_transition( (*agent.position, agent.direction), new_direction) # only call cell_free() if new cell is inside the scene if new_cell_valid: # Check the new position is not the same as any of the existing agent positions # (including itself, for simplicity, since it is moving) cell_free = self.cell_free(new_position) if not self.remove_collisions else True else: # if new cell is outside of scene -> cell_free is False cell_free = False return cell_free, new_cell_valid, new_direction, new_position, transition_valid
def get_deadlocks(self, agent: EnvAgent, seen: List[int]) -> EnvAgent: # abort if agent already checked if agent.handle in seen: # handle circular deadlock seen.append(agent.handle) # return return [] # add agent to seen agents seen.append(agent.handle) # get rail environment rail_env: RailEnv = self.unwrapped.rail_env # get transitions for agent's position and direction transitions = rail_env.rail.get_transitions(*agent.position, agent.direction) num_possible_transitions = np.count_nonzero(transitions) # initialize list to assign deadlocked agents to directions deadlocked_agents = [None] * len(transitions) # check if all possible transitions are blocked for direction, transition in enumerate(transitions): # only check transitions > 0 but iterate through all to get direction if transition > 0: # get opposite agent in direction of travel if cell is occuppied new_position = get_new_position(agent.position, direction) i_opp_agent = rail_env.agent_positions[new_position] if i_opp_agent != -1: opp_agent = rail_env.agents[i_opp_agent] # get blocking agents of opposite agent blocking_agents = self.get_deadlocks(opp_agent, seen) # add opposite agent to deadlocked agents if blocked by # checking agent. also add opposite agent if it is part # of a circular blocking structure. if agent in blocking_agents or seen[0] == seen[-1]: deadlocked_agents[direction] = opp_agent # return deadlocked agents if applicable num_deadlocked_agents = np.count_nonzero(deadlocked_agents) if num_deadlocked_agents > 0: # deadlock has to be resolved only if no transition is possible if num_deadlocked_agents == num_possible_transitions: return deadlocked_agents # workaround for already commited agent inside cell that is blocked by at least one agent if agent.speed_data['position_fraction'] > 1: return deadlocked_agents return []
def _check_feasible_transitions(self, pos, env): ''' Function used to collect chains of blocked agents ''' transitions = env.rail.get_transitions(*pos) n_transitions = 0 occupied = 0 agent_in_path = None for direction, values in enumerate(MOVEMENT_ARRAY): if transitions[direction] == 1: n_transitions += 1 new_position = get_new_position(pos, direction) for agent in range(env.get_num_agents()): if env.agents[agent].position == new_position: occupied += 1 agent_in_path = agent if n_transitions > occupied: return None return agent_in_path
def get_shortest_path_action(env,handle): distance_map = env.distance_map.get() agent = env.agents[handle] if agent.status == RailAgentStatus.READY_TO_DEPART: agent_virtual_position = agent.initial_position elif agent.status == RailAgentStatus.ACTIVE: agent_virtual_position = agent.position elif agent.status == RailAgentStatus.DONE: agent_virtual_position = agent.target else: return None if agent.position: possible_transitions = env.rail.get_transitions( *agent.position, agent.direction) else: possible_transitions = env.rail.get_transitions( *agent.initial_position, agent.direction) num_transitions = np.count_nonzero(possible_transitions) min_distances = [] for direction in [(agent.direction + i) % 4 for i in range(-1, 2)]: if possible_transitions[direction]: new_position = get_new_position( agent_virtual_position, direction) min_distances.append( distance_map[handle, new_position[0], new_position[1], direction]) else: min_distances.append(np.inf) if num_transitions == 1: observation = [0, 1, 0] elif num_transitions == 2: idx = np.argpartition(np.array(min_distances), 2) observation = [0, 0, 0] observation[idx[0]] = 1 return np.argmax(observation) + 1
def check_deadlock(self): # -> Set[int]: rail_env: RailEnv = self.unwrapped.rail_env new_deadlocked_agents = [] for agent in rail_env.agents: if agent.status == RailAgentStatus.ACTIVE and agent.handle not in self._deadlocked_agents: position = agent.position direction = agent.direction while position is not None: possible_transitions = rail_env.rail.get_transitions( *position, direction) num_transitions = np.count_nonzero(possible_transitions) if num_transitions == 1: new_direction_me = np.argmax(possible_transitions) new_cell_me = get_new_position(position, new_direction_me) opp_agent = rail_env.agent_positions[new_cell_me] if opp_agent != -1: opp_position = rail_env.agents[opp_agent].position opp_direction = rail_env.agents[ opp_agent].direction opp_possible_transitions = rail_env.rail.get_transitions( *opp_position, opp_direction) opp_num_transitions = np.count_nonzero( opp_possible_transitions) if opp_num_transitions == 1: if opp_direction != direction: self._deadlocked_agents.append( agent.handle) new_deadlocked_agents.append(agent.handle) position = None else: position = new_cell_me direction = new_direction_me else: position = new_cell_me direction = new_direction_me else: position = None else: position = None return new_deadlocked_agents
def check_if_all_blocked(env): """ Checks whether all the agents are blocked (full deadlock situation). In that case it is pointless to keep running inference as no agent will be able to move. :param env: current environment :return: """ # First build a map of agents in each position location_has_agent = {} for agent in env.agents: if agent.status in [RailAgentStatus.ACTIVE, RailAgentStatus.DONE ] and agent.position: location_has_agent[tuple(agent.position)] = 1 # Looks for any agent that can still move for handle in env.get_agent_handles(): agent = env.agents[handle] if agent.status == RailAgentStatus.READY_TO_DEPART: agent_virtual_position = agent.initial_position elif agent.status == RailAgentStatus.ACTIVE: agent_virtual_position = agent.position elif agent.status == RailAgentStatus.DONE: agent_virtual_position = agent.target else: continue possible_transitions = env.rail.get_transitions( *agent_virtual_position, agent.direction) orientation = agent.direction for branch_direction in [(orientation + i) % 4 for i in range(-1, 3)]: if possible_transitions[branch_direction]: new_position = get_new_position(agent_virtual_position, branch_direction) if new_position not in location_has_agent: return False # No agent can move at all: full deadlock! return True
def _generate_edges(self): ''' Translate the environment grid to the unpacked cell orientation graph ''' edges = [] for i, row in enumerate(self.grid): for j, _ in enumerate(row): if self.grid[i][j] != 0: trans_int = self.grid[i][j] trans_bitmap = format(trans_int, 'b').rjust(16, '0') num_ones = trans_bitmap.count('1') if num_ones == 2: self._straight_rails.add((i, j)) elif num_ones == 1: self._dead_ends.add((i, j)) tmp_edges, tmp_actions = [], dict() for k, bit in enumerate(trans_bitmap): if bit == '1': original_dir, final_dir = self._BITMAP_TO_TRANS[k] new_position_x, new_position_y = get_new_position( [i, j], final_dir.value) tmp_action = env_utils.agent_action( original_dir, final_dir) tmp_edges.append(((i, j, original_dir.value), (new_position_x, new_position_y, final_dir.value), tmp_action)) tmp_actions.setdefault( (i, j, original_dir.value), np.full((env_utils.get_num_actions(), ), False))[tmp_action.value] = True for tmp_edge in tmp_edges: tmp_choice = self.map_action_to_choice( tmp_edge[2], tmp_actions[tmp_edge[0]]) edge = (tmp_edge[0], tmp_edge[1], { 'weight': 1, 'action': tmp_edge[2], 'choice': tmp_choice }) edges.append(edge) return edges
def check_agent_decision(self, position, direction): switches = self.switches switches_neighbours = self.switches_neighbours agents_on_switch = False agents_on_switch_all = False agents_near_to_switch = False agents_near_to_switch_all = False if position in switches.keys(): agents_on_switch = direction in switches[position] agents_on_switch_all = True if position in switches_neighbours.keys(): new_cell = get_new_position(position, direction) if new_cell in switches.keys(): if not direction in switches[new_cell]: agents_near_to_switch = direction in switches_neighbours[position] else: agents_near_to_switch = direction in switches_neighbours[position] agents_near_to_switch_all = direction in switches_neighbours[position] return agents_on_switch, agents_near_to_switch, agents_near_to_switch_all, agents_on_switch_all