# Use last action cache USE_ACTION_CACHE = False # Observation parameters (must match training parameters!) if new: observation_tree_depth = 8 max_depth=3 else: observation_tree_depth = 2 observation_radius = 10 observation_max_path_depth = 30 #################################################### remote_client = FlatlandRemoteClient() # Observation builder predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth) if new: tree_observation = TreeObsForRailEnv(max_depth=max_depth, predictor=predictor) else: tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth, predictor=predictor) # Calculates state and action sizes if new: n_nodes = observation_tree_depth state_size = (11 +1)* n_nodes-1 else: n_nodes = sum([np.power(4, i) for i in range(observation_tree_depth + 1)]) state_size = tree_observation.observation_dim * n_nodes
from src.graph_observations import GraphObsForRailEnv from flatland.evaluators.client import FlatlandRemoteClient import numpy as np import r2_solver import time remote_client = FlatlandRemoteClient() my_observation_builder = GlobalObsForRailEnv() evaluation_number = 0 while True: evaluation_number += 1 time_start = time.time() observation, info = remote_client.env_create( obs_builder_object=my_observation_builder ) env_creation_time = time.time() - time_start if not observation: break print("Evaluation Number : {}".format(evaluation_number)) local_env = remote_client.env solver = r2_solver.Solver(evaluation_number) time_taken_by_controller = [] time_taken_per_step = [] steps = 0 while True:
##################################################################### # Define which device the controller should run on, if supported by # the controller ##################################################################### if USE_GPU and torch.cuda.is_available(): device = torch.device("cuda:0") print("🐇 Using GPU") else: device = torch.device("cpu") print("🐢 Using CPU") ##################################################################### # Instantiate a Remote Client ##################################################################### remote_client = FlatlandRemoteClient() ##################################################################### # Instantiate your custom Observation Builder # # You can build your own Observation Builder by following # the example here : # https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/envs/observations.py#L14 ##################################################################### obs_tree_depth = 2 obs_max_path_depth = 20 predictor = ShortestPathPredictorForRailEnv(obs_max_path_depth) obs_builder = TreeObsForRailEnv(max_depth=obs_tree_depth, predictor=predictor) # Or if you want to use your own approach to build the observation from the env_step, # please feel free to pass a DummyObservationBuilder() object as mentioned below,
from flatlander.envs.utils.priorization.priorizer import NrAgentsSameStart, DistToTargetPriorizer import numpy as np os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ["CUDA_VISIBLE_DEVICES"] = "-1" from collections import defaultdict from flatlander.envs.utils.robust_gym_env import RobustFlatlandGymEnv from flatland.evaluators.client import FlatlandRemoteClient, TimeoutException from flatlander.envs.observations import make_obs from flatlander.submission.helper import episode_start_info, episode_end_info, init_run, get_agent from time import time import tensorflow as tf tf.compat.v1.disable_eager_execution() remote_client = FlatlandRemoteClient() TIME_LIMIT = 60 * 60 * 8 EXPLORE = True def skip(done): print("Skipping episode") while not done['__all__']: observation, all_rewards, done, info = remote_client.env_step({}) print('!', end='', flush=True) def evaluate(config, run): start_time = time() obs_builder = make_obs(
import sys import torch from pathlib import Path from importlib_resources import path from flatland.evaluators.client import FlatlandRemoteClient # For evaluation from src.graph_observations import GraphObsForRailEnv from src.predictions import ShortestPathPredictorForRailEnv from src.dueling_double_dqn import Agent import src.nets base_dir = Path(__file__).resolve().parent.parent sys.path.append(str(base_dir)) remote_client = FlatlandRemoteClient() # Init remote client for eval prediction_depth = 40 observation_builder = GraphObsForRailEnv(bfs_depth=4, predictor=ShortestPathPredictorForRailEnv(max_depth=prediction_depth)) state_size = prediction_depth + 5 network_action_size = 2 controller = Agent('fc', state_size, network_action_size) railenv_action_dict = dict() with path(src.nets, "exp_graph_obs_4_prio100.pth") as file_in: controller.qnetwork_local.load_state_dict(torch.load(file_in)) evaluation_number = 0
def evaluate_remote(): remote_client = FlatlandRemoteClient() my_observation_builder = SimpleObservation(max_depth=3, neighbours_depth=3, timetable=Judge(LinearOnAgentNumberSizeGenerator(0.03, 5), lr=0, batch_size=0, optimization_epochs=0, device=torch.device("cpu")), deadlock_checker=DeadlockChecker(), greedy_checker=GreedyChecker(), parallel=False, eval=True) params = torch.load("generated/params.torch") params.neighbours_depth=my_observation_builder.neighbours_depth controller = PPOController(params, torch.device("cpu")) controller.load_controller("generated/controller.torch") my_observation_builder.timetable.load_judge("generated/judge.torch") render = False sum_reward, sum_percent_done = 0., 0. for evaluation_number in itertools.count(): time_start = time.time() observation, info = remote_client.env_create(obs_builder_object=my_observation_builder) if not observation: break local_env = FlatlandWrapper(remote_client.env, FakeRewardShaper()) local_env.n_agents = len(local_env.agents) log().check_time() if render: env_renderer = RenderTool( local_env.env, agent_render_variant=AgentRenderVariant.ONE_STEP_BEHIND, show_debug=True, screen_height=600, screen_width=800 ) env_creation_time = time.time() - time_start print("Evaluation Number : {}".format(evaluation_number)) time_taken_by_controller = [] time_taken_per_step = [] steps = 0 done = defaultdict(lambda: False) while True: try: if render: env_renderer.render_env(show=True, show_observations=False, show_predictions=False) time_start = time.time() action_dict = dict() handles_to_ask = list() observation = {k: torch.tensor(v, dtype=torch.float) for k, v in observation.items() if v is not None} for i in range(local_env.n_agents): if not done[i]: if local_env.obs_builder.greedy_checker.greedy_position(i): action_dict[i] = 0 elif i in observation: handles_to_ask.append(i) for handle in handles_to_ask: for opp_handle in local_env.obs_builder.encountered[handle]: if opp_handle != -1 and opp_handle not in observation: observation[opp_handle] = torch.tensor(local_env.obs_builder._get_internal(opp_handle), dtype=torch.float) time_taken_per_step.append(time.time() - time_start) time_start = time.time() controller_actions = controller.fast_select_actions(handles_to_ask, observation, local_env.obs_builder.encountered, train=True) action_dict.update(controller_actions) action_dict = {k: local_env.transform_action(k, v) for k, v in action_dict.items()} action_dict = {handle: action for handle, action in action_dict.items() if action != -1} time_taken = time.time() - time_start time_taken_by_controller.append(time_taken) time_start = time.time() observation, all_rewards, done, info = remote_client.env_step(action_dict) num_done = sum([1 for agent in local_env.agents if agent.status == RailAgentStatus.DONE_REMOVED]) num_started = sum([1 for handle in range(len(local_env.agents)) if local_env.obs_builder.timetable.is_ready(handle)]) finished_handles = [handle for handle in range(len(local_env.agents)) if local_env.obs_builder.timetable.ready_to_depart[handle] == 2] reward = torch.sum(local_env._max_episode_steps - local_env.obs_builder.timetable.end_time[finished_handles]) reward /= len(local_env.agents) * local_env._max_episode_steps percent_done = float(num_done) / len(local_env.agents) deadlocked = int(sum(local_env.obs_builder.deadlock_checker._is_deadlocked) + 0.5) steps += 1 time_taken = time.time() - time_start time_taken_per_step.append(time_taken) if done['__all__']: print("Done agents {}/{}".format(num_done, len(local_env.agents))) print("Started agents {}/{}".format(num_started, len(local_env.agents))) print("Deadlocked agents {}/{}".format(deadlocked, len(local_env.agents))) print("Reward: {} Percent done: {}".format(reward, percent_done)) sum_reward += reward sum_percent_done += percent_done print("Total reward: {} Avg percent done: {}".format(sum_reward, sum_percent_done / (evaluation_number + 1))) if render: env_renderer.close_window() break except TimeoutException as err: print("Timeout! Will skip this episode and go to the next.", err) break np_time_taken_by_controller = np.array(time_taken_by_controller) np_time_taken_per_step = np.array(time_taken_per_step) print("="*100) print("="*100) print("Evaluation Number : ", evaluation_number) print("Current Env Path : ", remote_client.current_env_path) print("Env Creation Time : ", env_creation_time) print("Number of Steps : {}/{}".format(steps, local_env._max_episode_steps)) print("Mean/Std/Sum of Time taken by Controller : ", np_time_taken_by_controller.mean(), np_time_taken_by_controller.std(), np_time_taken_by_controller.sum()) print("Mean/Std/Sum of Time per Step : ", np_time_taken_per_step.mean(), np_time_taken_per_step.std(), np_time_taken_per_step.sum()) log().print_time_metrics() log().zero_time_metrics() print("="*100) print("\n\n") print("Evaluation of all environments complete...") print(remote_client.submit())
print(str(base_dir)) sys.path.append(str(base_dir)) from flatland.evaluators.client import FlatlandRemoteClient from src.graph_observations import GraphObsForRailEnv from src.predictions import ShortestPathPredictorForRailEnv from src.state_machine import stateMachine from src.test_battery import TestBattery prediction_depth = 20 ##################################################################### # Instantiate a Remote Client ##################################################################### print('starting remote_client') remote_client = FlatlandRemoteClient() observation_builder = GraphObsForRailEnv(predictor=ShortestPathPredictorForRailEnv(max_depth=prediction_depth),bfs_depth=4) ##################################################################### # Main evaluation loop # # This iterates over an arbitrary number of env evaluations ##################################################################### evaluation_number = 0 railenv_action_dict = {} T_rewards = [] T_num_done_agents = [] T_all_done = [] while True: