# Use last action cache
USE_ACTION_CACHE = False

# Observation parameters (must match training parameters!)
if new:
    observation_tree_depth = 8
    max_depth=3
else:
    observation_tree_depth = 2
observation_radius = 10
observation_max_path_depth = 30

####################################################

remote_client = FlatlandRemoteClient()

# Observation builder
predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth)
if new:
    tree_observation = TreeObsForRailEnv(max_depth=max_depth, predictor=predictor)
else:
    tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth, predictor=predictor)

# Calculates state and action sizes
if new:
    n_nodes = observation_tree_depth
    state_size = (11 +1)* n_nodes-1
else:
    n_nodes = sum([np.power(4, i) for i in range(observation_tree_depth + 1)])
    state_size = tree_observation.observation_dim * n_nodes
예제 #2
0
파일: run.py 프로젝트: RaiAnant/Flatland-rl
from src.graph_observations import GraphObsForRailEnv
from flatland.evaluators.client import FlatlandRemoteClient
import numpy as np
import r2_solver
import time

remote_client = FlatlandRemoteClient()

my_observation_builder = GlobalObsForRailEnv()
evaluation_number = 0
while True:

    evaluation_number += 1

    time_start = time.time()
    observation, info = remote_client.env_create(
                    obs_builder_object=my_observation_builder
                )
    env_creation_time = time.time() - time_start
    if not observation:
        break
    
    print("Evaluation Number : {}".format(evaluation_number))

    local_env = remote_client.env
    solver = r2_solver.Solver(evaluation_number)

    time_taken_by_controller = []
    time_taken_per_step = []
    steps = 0
    while True:
예제 #3
0
#####################################################################
# Define which device the controller should run on, if supported by
# the controller
#####################################################################
if USE_GPU and torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("🐇 Using GPU")
else:
    device = torch.device("cpu")
    print("🐢 Using CPU")

#####################################################################
# Instantiate a Remote Client
#####################################################################
remote_client = FlatlandRemoteClient()

#####################################################################
# Instantiate your custom Observation Builder
#
# You can build your own Observation Builder by following
# the example here :
# https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/envs/observations.py#L14
#####################################################################
obs_tree_depth = 2
obs_max_path_depth = 20
predictor = ShortestPathPredictorForRailEnv(obs_max_path_depth)
obs_builder = TreeObsForRailEnv(max_depth=obs_tree_depth, predictor=predictor)

# Or if you want to use your own approach to build the observation from the env_step,
# please feel free to pass a DummyObservationBuilder() object as mentioned below,
예제 #4
0
from flatlander.envs.utils.priorization.priorizer import NrAgentsSameStart, DistToTargetPriorizer
import numpy as np

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

from collections import defaultdict
from flatlander.envs.utils.robust_gym_env import RobustFlatlandGymEnv
from flatland.evaluators.client import FlatlandRemoteClient, TimeoutException
from flatlander.envs.observations import make_obs
from flatlander.submission.helper import episode_start_info, episode_end_info, init_run, get_agent
from time import time
import tensorflow as tf

tf.compat.v1.disable_eager_execution()
remote_client = FlatlandRemoteClient()

TIME_LIMIT = 60 * 60 * 8
EXPLORE = True


def skip(done):
    print("Skipping episode")
    while not done['__all__']:
        observation, all_rewards, done, info = remote_client.env_step({})
        print('!', end='', flush=True)


def evaluate(config, run):
    start_time = time()
    obs_builder = make_obs(
예제 #5
0
import sys
import torch
from pathlib import Path
from importlib_resources import path

from flatland.evaluators.client import FlatlandRemoteClient  # For evaluation

from src.graph_observations import GraphObsForRailEnv
from src.predictions import ShortestPathPredictorForRailEnv
from src.dueling_double_dqn import Agent
import src.nets

base_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(base_dir))

remote_client = FlatlandRemoteClient()  # Init remote client for eval

prediction_depth = 40
observation_builder = GraphObsForRailEnv(bfs_depth=4, predictor=ShortestPathPredictorForRailEnv(max_depth=prediction_depth))


state_size = prediction_depth + 5
network_action_size = 2
controller = Agent('fc', state_size, network_action_size)
railenv_action_dict = dict()


with path(src.nets, "exp_graph_obs_4_prio100.pth") as file_in:
    controller.qnetwork_local.load_state_dict(torch.load(file_in))
    
evaluation_number = 0
예제 #6
0
def evaluate_remote():
    remote_client = FlatlandRemoteClient()
    my_observation_builder = SimpleObservation(max_depth=3, neighbours_depth=3,
            timetable=Judge(LinearOnAgentNumberSizeGenerator(0.03, 5), lr=0,
                    batch_size=0, optimization_epochs=0, device=torch.device("cpu")),
            deadlock_checker=DeadlockChecker(), greedy_checker=GreedyChecker(), parallel=False, eval=True)

    params = torch.load("generated/params.torch")
    params.neighbours_depth=my_observation_builder.neighbours_depth
    controller = PPOController(params, torch.device("cpu"))
    controller.load_controller("generated/controller.torch")
    my_observation_builder.timetable.load_judge("generated/judge.torch")

    render = False

    sum_reward, sum_percent_done = 0., 0.
    for evaluation_number in itertools.count():
        time_start = time.time()
        observation, info = remote_client.env_create(obs_builder_object=my_observation_builder)
        if not observation:
            break

        local_env = FlatlandWrapper(remote_client.env, FakeRewardShaper())
        local_env.n_agents = len(local_env.agents)
        log().check_time()
        if render:
            env_renderer = RenderTool(
                local_env.env,
                agent_render_variant=AgentRenderVariant.ONE_STEP_BEHIND,
                show_debug=True,
                screen_height=600,
                screen_width=800
            )

        env_creation_time = time.time() - time_start

        print("Evaluation Number : {}".format(evaluation_number))

        time_taken_by_controller = []
        time_taken_per_step = []
        steps = 0
        done = defaultdict(lambda: False)
        while True:
            try:
                if render:
                    env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
                time_start = time.time()
                action_dict = dict()
                handles_to_ask = list()
                observation = {k: torch.tensor(v, dtype=torch.float) for k, v in observation.items() if v is not None}
                for i in range(local_env.n_agents):
                    if not done[i]:
                        if local_env.obs_builder.greedy_checker.greedy_position(i):
                            action_dict[i] = 0
                        elif i in observation:
                            handles_to_ask.append(i)

                for handle in handles_to_ask:
                    for opp_handle in local_env.obs_builder.encountered[handle]:
                        if opp_handle != -1 and opp_handle not in observation:
                            observation[opp_handle] = torch.tensor(local_env.obs_builder._get_internal(opp_handle), dtype=torch.float)

                time_taken_per_step.append(time.time() - time_start)
                time_start = time.time()

                controller_actions = controller.fast_select_actions(handles_to_ask, observation,
                        local_env.obs_builder.encountered, train=True)
                action_dict.update(controller_actions)
                action_dict = {k: local_env.transform_action(k, v) for k, v in action_dict.items()}
                action_dict = {handle: action for handle, action in action_dict.items() if action != -1}

                time_taken = time.time() - time_start
                time_taken_by_controller.append(time_taken)

                time_start = time.time()
                observation, all_rewards, done, info = remote_client.env_step(action_dict)
                num_done = sum([1 for agent in local_env.agents if agent.status == RailAgentStatus.DONE_REMOVED])
                num_started = sum([1 for handle in range(len(local_env.agents)) if local_env.obs_builder.timetable.is_ready(handle)])

                finished_handles = [handle for handle in range(len(local_env.agents))
                        if local_env.obs_builder.timetable.ready_to_depart[handle] == 2]
                reward = torch.sum(local_env._max_episode_steps - local_env.obs_builder.timetable.end_time[finished_handles])
                reward /= len(local_env.agents) * local_env._max_episode_steps
                percent_done = float(num_done) / len(local_env.agents)
                deadlocked = int(sum(local_env.obs_builder.deadlock_checker._is_deadlocked) + 0.5)

                steps += 1
                time_taken = time.time() - time_start
                time_taken_per_step.append(time_taken)

                if done['__all__']:
                    print("Done agents {}/{}".format(num_done, len(local_env.agents)))
                    print("Started agents {}/{}".format(num_started, len(local_env.agents)))
                    print("Deadlocked agents {}/{}".format(deadlocked, len(local_env.agents)))
                    print("Reward: {}        Percent done: {}".format(reward, percent_done))
                    sum_reward += reward
                    sum_percent_done += percent_done
                    print("Total reward: {}        Avg percent done: {}".format(sum_reward, sum_percent_done / (evaluation_number + 1)))
                    if render:
                        env_renderer.close_window()
                    break
            except TimeoutException as err:
                print("Timeout! Will skip this episode and go to the next.", err)
                break

        
        np_time_taken_by_controller = np.array(time_taken_by_controller)
        np_time_taken_per_step = np.array(time_taken_per_step)
        print("="*100)
        print("="*100)
        print("Evaluation Number : ", evaluation_number)
        print("Current Env Path : ", remote_client.current_env_path)
        print("Env Creation Time : ", env_creation_time)
        print("Number of Steps : {}/{}".format(steps, local_env._max_episode_steps))
        print("Mean/Std/Sum of Time taken by Controller : ", np_time_taken_by_controller.mean(), np_time_taken_by_controller.std(), np_time_taken_by_controller.sum())
        print("Mean/Std/Sum of Time per Step : ", np_time_taken_per_step.mean(), np_time_taken_per_step.std(), np_time_taken_per_step.sum())
        log().print_time_metrics()
        log().zero_time_metrics()
        print("="*100)
        print("\n\n")

    print("Evaluation of all environments complete...")
    print(remote_client.submit())
예제 #7
0
print(str(base_dir))
sys.path.append(str(base_dir))

from flatland.evaluators.client import FlatlandRemoteClient
from src.graph_observations import GraphObsForRailEnv
from src.predictions import ShortestPathPredictorForRailEnv
from src.state_machine import stateMachine
from src.test_battery import TestBattery

prediction_depth = 20

#####################################################################
# Instantiate a Remote Client
#####################################################################
print('starting remote_client')
remote_client = FlatlandRemoteClient()
observation_builder = GraphObsForRailEnv(predictor=ShortestPathPredictorForRailEnv(max_depth=prediction_depth),bfs_depth=4)

#####################################################################
# Main evaluation loop
#
# This iterates over an arbitrary number of env evaluations
#####################################################################
evaluation_number = 0
railenv_action_dict = {}
T_rewards = []
T_num_done_agents = []
T_all_done = []

while True: